|
{ |
|
"best_metric": 0.11676687747240067, |
|
"best_model_checkpoint": "output/single/quirky_sciq_raw/checkpoint-2048", |
|
"epoch": 6.540518962075848, |
|
"eval_steps": 10000000000, |
|
"global_step": 2048, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"eval_val_acc_stderr": 0.012937023555103245, |
|
"eval_val_accuracy": 0.8282352941176471, |
|
"eval_val_loss": 1.4692819118499756, |
|
"eval_val_runtime": 32.1876, |
|
"eval_val_samples_per_second": 26.408, |
|
"eval_val_steps_per_second": 3.324, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_val_alice_acc_stderr": 0.01508756447132745, |
|
"eval_val_alice_accuracy": 0.8891454965357968, |
|
"eval_val_alice_loss": 1.4051198959350586, |
|
"eval_val_alice_runtime": 16.4546, |
|
"eval_val_alice_samples_per_second": 26.315, |
|
"eval_val_alice_steps_per_second": 3.343, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_val_bob_acc_stderr": 0.02083661056330264, |
|
"eval_val_bob_accuracy": 0.762589928057554, |
|
"eval_val_bob_loss": 1.537455677986145, |
|
"eval_val_bob_runtime": 16.6246, |
|
"eval_val_bob_samples_per_second": 25.083, |
|
"eval_val_bob_steps_per_second": 3.188, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_val_bob_gt_acc_stderr": 0.015341286771526373, |
|
"eval_val_bob_gt_accuracy": 0.8896882494004796, |
|
"eval_val_bob_gt_loss": 1.4268440008163452, |
|
"eval_val_bob_gt_runtime": 16.763, |
|
"eval_val_bob_gt_samples_per_second": 24.876, |
|
"eval_val_bob_gt_steps_per_second": 3.162, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_val_acc_stderr": 0.012937023555103245, |
|
"eval_val_accuracy": 0.8282352941176471, |
|
"eval_val_loss": 1.4688091278076172, |
|
"eval_val_runtime": 33.5257, |
|
"eval_val_samples_per_second": 25.354, |
|
"eval_val_steps_per_second": 3.192, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_val_alice_acc_stderr": 0.014948951634060766, |
|
"eval_val_alice_accuracy": 0.8914549653579676, |
|
"eval_val_alice_loss": 1.4036588668823242, |
|
"eval_val_alice_runtime": 16.9829, |
|
"eval_val_alice_samples_per_second": 25.496, |
|
"eval_val_alice_steps_per_second": 3.239, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_val_bob_acc_stderr": 0.020979742126541488, |
|
"eval_val_bob_accuracy": 0.7577937649880095, |
|
"eval_val_bob_loss": 1.5361874103546143, |
|
"eval_val_bob_runtime": 17.0578, |
|
"eval_val_bob_samples_per_second": 24.446, |
|
"eval_val_bob_steps_per_second": 3.107, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_val_bob_gt_acc_stderr": 0.015628947031815964, |
|
"eval_val_bob_gt_accuracy": 0.8848920863309353, |
|
"eval_val_bob_gt_loss": 1.4273743629455566, |
|
"eval_val_bob_gt_runtime": 17.0684, |
|
"eval_val_bob_gt_samples_per_second": 24.431, |
|
"eval_val_bob_gt_steps_per_second": 3.105, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_val_acc_stderr": 0.012937023555103245, |
|
"eval_val_accuracy": 0.8282352941176471, |
|
"eval_val_loss": 1.469063401222229, |
|
"eval_val_runtime": 33.7146, |
|
"eval_val_samples_per_second": 25.212, |
|
"eval_val_steps_per_second": 3.174, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_val_alice_acc_stderr": 0.014948951634060766, |
|
"eval_val_alice_accuracy": 0.8914549653579676, |
|
"eval_val_alice_loss": 1.4030324220657349, |
|
"eval_val_alice_runtime": 17.0492, |
|
"eval_val_alice_samples_per_second": 25.397, |
|
"eval_val_alice_steps_per_second": 3.226, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_val_bob_acc_stderr": 0.020908628616120924, |
|
"eval_val_bob_accuracy": 0.7601918465227818, |
|
"eval_val_bob_loss": 1.5368480682373047, |
|
"eval_val_bob_runtime": 17.0771, |
|
"eval_val_bob_samples_per_second": 24.419, |
|
"eval_val_bob_steps_per_second": 3.104, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_val_bob_gt_acc_stderr": 0.015486230123570196, |
|
"eval_val_bob_gt_accuracy": 0.8872901678657075, |
|
"eval_val_bob_gt_loss": 1.4271358251571655, |
|
"eval_val_bob_gt_runtime": 17.0847, |
|
"eval_val_bob_gt_samples_per_second": 24.408, |
|
"eval_val_bob_gt_steps_per_second": 3.102, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_val_acc_stderr": 0.012901796011470488, |
|
"eval_val_accuracy": 0.8294117647058824, |
|
"eval_val_loss": 1.469651699066162, |
|
"eval_val_runtime": 33.6873, |
|
"eval_val_samples_per_second": 25.232, |
|
"eval_val_steps_per_second": 3.176, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_val_alice_acc_stderr": 0.01480820963044188, |
|
"eval_val_alice_accuracy": 0.8937644341801386, |
|
"eval_val_alice_loss": 1.4056396484375, |
|
"eval_val_alice_runtime": 17.0533, |
|
"eval_val_alice_samples_per_second": 25.391, |
|
"eval_val_alice_steps_per_second": 3.225, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_val_bob_acc_stderr": 0.02083661056330264, |
|
"eval_val_bob_accuracy": 0.762589928057554, |
|
"eval_val_bob_loss": 1.5359561443328857, |
|
"eval_val_bob_runtime": 17.0852, |
|
"eval_val_bob_samples_per_second": 24.407, |
|
"eval_val_bob_steps_per_second": 3.102, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_val_bob_gt_acc_stderr": 0.015341286771526373, |
|
"eval_val_bob_gt_accuracy": 0.8896882494004796, |
|
"eval_val_bob_gt_loss": 1.4263936281204224, |
|
"eval_val_bob_gt_runtime": 17.0936, |
|
"eval_val_bob_gt_samples_per_second": 24.395, |
|
"eval_val_bob_gt_steps_per_second": 3.101, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_val_acc_stderr": 0.01297202990834543, |
|
"eval_val_accuracy": 0.8270588235294117, |
|
"eval_val_loss": 1.467901587486267, |
|
"eval_val_runtime": 33.707, |
|
"eval_val_samples_per_second": 25.217, |
|
"eval_val_steps_per_second": 3.174, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_val_alice_acc_stderr": 0.014948951634060766, |
|
"eval_val_alice_accuracy": 0.8914549653579676, |
|
"eval_val_alice_loss": 1.4035195112228394, |
|
"eval_val_alice_runtime": 17.0398, |
|
"eval_val_alice_samples_per_second": 25.411, |
|
"eval_val_alice_steps_per_second": 3.228, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_val_bob_acc_stderr": 0.020908628616120924, |
|
"eval_val_bob_accuracy": 0.7601918465227818, |
|
"eval_val_bob_loss": 1.535442590713501, |
|
"eval_val_bob_runtime": 17.0913, |
|
"eval_val_bob_samples_per_second": 24.398, |
|
"eval_val_bob_steps_per_second": 3.101, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_val_bob_gt_acc_stderr": 0.015486230123570196, |
|
"eval_val_bob_gt_accuracy": 0.8872901678657075, |
|
"eval_val_bob_gt_loss": 1.4257303476333618, |
|
"eval_val_bob_gt_runtime": 17.0928, |
|
"eval_val_bob_gt_samples_per_second": 24.396, |
|
"eval_val_bob_gt_steps_per_second": 3.101, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_val_acc_stderr": 0.012937023555103245, |
|
"eval_val_accuracy": 0.8282352941176471, |
|
"eval_val_loss": 1.4608973264694214, |
|
"eval_val_runtime": 33.7015, |
|
"eval_val_samples_per_second": 25.221, |
|
"eval_val_steps_per_second": 3.175, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_val_alice_acc_stderr": 0.014948951634060766, |
|
"eval_val_alice_accuracy": 0.8914549653579676, |
|
"eval_val_alice_loss": 1.396742343902588, |
|
"eval_val_alice_runtime": 17.0628, |
|
"eval_val_alice_samples_per_second": 25.377, |
|
"eval_val_alice_steps_per_second": 3.223, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_val_bob_acc_stderr": 0.02083661056330264, |
|
"eval_val_bob_accuracy": 0.762589928057554, |
|
"eval_val_bob_loss": 1.5283899307250977, |
|
"eval_val_bob_runtime": 17.0927, |
|
"eval_val_bob_samples_per_second": 24.396, |
|
"eval_val_bob_steps_per_second": 3.101, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_val_bob_gt_acc_stderr": 0.015341286771526373, |
|
"eval_val_bob_gt_accuracy": 0.8896882494004796, |
|
"eval_val_bob_gt_loss": 1.4182281494140625, |
|
"eval_val_bob_gt_runtime": 17.1142, |
|
"eval_val_bob_gt_samples_per_second": 24.366, |
|
"eval_val_bob_gt_steps_per_second": 3.097, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.102272727272729e-07, |
|
"loss": 1.4159, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_val_acc_stderr": 0.012937023555103245, |
|
"eval_val_accuracy": 0.8282352941176471, |
|
"eval_val_loss": 1.4041904211044312, |
|
"eval_val_runtime": 33.716, |
|
"eval_val_samples_per_second": 25.211, |
|
"eval_val_steps_per_second": 3.174, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_val_alice_acc_stderr": 0.01480820963044188, |
|
"eval_val_alice_accuracy": 0.8937644341801386, |
|
"eval_val_alice_loss": 1.3375937938690186, |
|
"eval_val_alice_runtime": 17.0702, |
|
"eval_val_alice_samples_per_second": 25.366, |
|
"eval_val_alice_steps_per_second": 3.222, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_val_bob_acc_stderr": 0.020908628616120924, |
|
"eval_val_bob_accuracy": 0.7601918465227818, |
|
"eval_val_bob_loss": 1.472985863685608, |
|
"eval_val_bob_runtime": 17.1363, |
|
"eval_val_bob_samples_per_second": 24.334, |
|
"eval_val_bob_steps_per_second": 3.093, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_val_bob_gt_acc_stderr": 0.015194053258476493, |
|
"eval_val_bob_gt_accuracy": 0.8920863309352518, |
|
"eval_val_bob_gt_loss": 1.3589271306991577, |
|
"eval_val_bob_gt_runtime": 17.1528, |
|
"eval_val_bob_gt_samples_per_second": 24.311, |
|
"eval_val_bob_gt_steps_per_second": 3.09, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.4204545454545458e-06, |
|
"loss": 1.2902, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_val_acc_stderr": 0.013041386157171642, |
|
"eval_val_accuracy": 0.8247058823529412, |
|
"eval_val_loss": 0.819312572479248, |
|
"eval_val_runtime": 33.8241, |
|
"eval_val_samples_per_second": 25.13, |
|
"eval_val_steps_per_second": 3.163, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_val_alice_acc_stderr": 0.014070292224264426, |
|
"eval_val_alice_accuracy": 0.9053117782909931, |
|
"eval_val_alice_loss": 0.73760586977005, |
|
"eval_val_alice_runtime": 17.1095, |
|
"eval_val_alice_samples_per_second": 25.308, |
|
"eval_val_alice_steps_per_second": 3.215, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_val_bob_acc_stderr": 0.021322054283776665, |
|
"eval_val_bob_accuracy": 0.7458033573141487, |
|
"eval_val_bob_loss": 0.9024503231048584, |
|
"eval_val_bob_runtime": 17.1531, |
|
"eval_val_bob_samples_per_second": 24.311, |
|
"eval_val_bob_steps_per_second": 3.09, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_val_bob_gt_acc_stderr": 0.015486230123570196, |
|
"eval_val_bob_gt_accuracy": 0.8872901678657075, |
|
"eval_val_bob_gt_loss": 0.7549682855606079, |
|
"eval_val_bob_gt_runtime": 17.1491, |
|
"eval_val_bob_gt_samples_per_second": 24.316, |
|
"eval_val_bob_gt_steps_per_second": 3.091, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.1306818181818183e-06, |
|
"loss": 0.8289, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.8409090909090916e-06, |
|
"loss": 0.412, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.5511363636363636e-06, |
|
"loss": 0.3014, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_val_acc_stderr": 0.010954525472743861, |
|
"eval_val_accuracy": 0.8847058823529412, |
|
"eval_val_loss": 0.30577030777931213, |
|
"eval_val_runtime": 33.8568, |
|
"eval_val_samples_per_second": 25.106, |
|
"eval_val_steps_per_second": 3.16, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_val_alice_acc_stderr": 0.012572317234488177, |
|
"eval_val_alice_accuracy": 0.9260969976905312, |
|
"eval_val_alice_loss": 0.21393465995788574, |
|
"eval_val_alice_runtime": 17.1088, |
|
"eval_val_alice_samples_per_second": 25.309, |
|
"eval_val_alice_steps_per_second": 3.215, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_val_bob_acc_stderr": 0.017983215186550393, |
|
"eval_val_bob_accuracy": 0.8393285371702638, |
|
"eval_val_bob_loss": 0.40229618549346924, |
|
"eval_val_bob_runtime": 17.1516, |
|
"eval_val_bob_samples_per_second": 24.313, |
|
"eval_val_bob_steps_per_second": 3.09, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_val_bob_gt_acc_stderr": 0.012256027700828325, |
|
"eval_val_bob_gt_accuracy": 0.9328537170263789, |
|
"eval_val_bob_gt_loss": 0.20910073816776276, |
|
"eval_val_bob_gt_runtime": 17.1417, |
|
"eval_val_bob_gt_samples_per_second": 24.327, |
|
"eval_val_bob_gt_steps_per_second": 3.092, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.2613636363636365e-06, |
|
"loss": 0.3062, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.9715909090909094e-06, |
|
"loss": 0.2271, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5.681818181818183e-06, |
|
"loss": 0.2346, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 6.392045454545454e-06, |
|
"loss": 0.2203, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 7.102272727272727e-06, |
|
"loss": 0.1968, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_val_acc_stderr": 0.008648647548423583, |
|
"eval_val_accuracy": 0.9317647058823529, |
|
"eval_val_loss": 0.18704643845558167, |
|
"eval_val_runtime": 33.6882, |
|
"eval_val_samples_per_second": 25.231, |
|
"eval_val_steps_per_second": 3.176, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_val_alice_acc_stderr": 0.010323486896101533, |
|
"eval_val_alice_accuracy": 0.9515011547344111, |
|
"eval_val_alice_loss": 0.12077159434556961, |
|
"eval_val_alice_runtime": 17.0206, |
|
"eval_val_alice_samples_per_second": 25.44, |
|
"eval_val_alice_steps_per_second": 3.231, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_val_bob_acc_stderr": 0.014093125547753297, |
|
"eval_val_bob_accuracy": 0.9088729016786571, |
|
"eval_val_bob_loss": 0.25665926933288574, |
|
"eval_val_bob_runtime": 17.0565, |
|
"eval_val_bob_samples_per_second": 24.448, |
|
"eval_val_bob_steps_per_second": 3.107, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_val_bob_gt_acc_stderr": 0.01604432860757207, |
|
"eval_val_bob_gt_accuracy": 0.8776978417266187, |
|
"eval_val_bob_gt_loss": 0.31840986013412476, |
|
"eval_val_bob_gt_runtime": 17.0564, |
|
"eval_val_bob_gt_samples_per_second": 24.448, |
|
"eval_val_bob_gt_steps_per_second": 3.107, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 7.8125e-06, |
|
"loss": 0.2178, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 8.522727272727273e-06, |
|
"loss": 0.1919, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.232954545454546e-06, |
|
"loss": 0.1539, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 9.943181818181819e-06, |
|
"loss": 0.15, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.0653409090909092e-05, |
|
"loss": 0.176, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.1363636363636366e-05, |
|
"loss": 0.1293, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.2073863636363636e-05, |
|
"loss": 0.1577, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.2784090909090909e-05, |
|
"loss": 0.135, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.3494318181818182e-05, |
|
"loss": 0.1246, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.4204545454545455e-05, |
|
"loss": 0.0965, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"eval_val_acc_stderr": 0.006907725389665332, |
|
"eval_val_accuracy": 0.9576470588235294, |
|
"eval_val_loss": 0.14458392560482025, |
|
"eval_val_runtime": 33.6709, |
|
"eval_val_samples_per_second": 25.244, |
|
"eval_val_steps_per_second": 3.178, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"eval_val_alice_acc_stderr": 0.009333387164702351, |
|
"eval_val_alice_accuracy": 0.9607390300230947, |
|
"eval_val_alice_loss": 0.129893496632576, |
|
"eval_val_alice_runtime": 17.0239, |
|
"eval_val_alice_samples_per_second": 25.435, |
|
"eval_val_alice_steps_per_second": 3.231, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"eval_val_bob_acc_stderr": 0.010212081074718785, |
|
"eval_val_bob_accuracy": 0.9544364508393285, |
|
"eval_val_bob_loss": 0.16120219230651855, |
|
"eval_val_bob_runtime": 17.0579, |
|
"eval_val_bob_samples_per_second": 24.446, |
|
"eval_val_bob_steps_per_second": 3.107, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"eval_val_bob_gt_acc_stderr": 0.016441676917357297, |
|
"eval_val_bob_gt_accuracy": 0.8705035971223022, |
|
"eval_val_bob_gt_loss": 0.5936062932014465, |
|
"eval_val_bob_gt_runtime": 17.0652, |
|
"eval_val_bob_gt_samples_per_second": 24.436, |
|
"eval_val_bob_gt_steps_per_second": 3.106, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1.4914772727272729e-05, |
|
"loss": 0.1054, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 1.5625e-05, |
|
"loss": 0.1164, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 1.6335227272727275e-05, |
|
"loss": 0.0701, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 1.7045454545454546e-05, |
|
"loss": 0.1118, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 1.775568181818182e-05, |
|
"loss": 0.088, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.8465909090909092e-05, |
|
"loss": 0.0578, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 1.9176136363636366e-05, |
|
"loss": 0.0737, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 1.9886363636363638e-05, |
|
"loss": 0.0777, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 1.9894763217238787e-05, |
|
"loss": 0.0562, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 1.976948133299925e-05, |
|
"loss": 0.0741, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 1.964419944875971e-05, |
|
"loss": 0.0504, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 1.951891756452017e-05, |
|
"loss": 0.0508, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 1.9393635680280633e-05, |
|
"loss": 0.0489, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 1.9268353796041094e-05, |
|
"loss": 0.0314, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 1.9143071911801552e-05, |
|
"loss": 0.041, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 1.9017790027562014e-05, |
|
"loss": 0.0348, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 1.8892508143322475e-05, |
|
"loss": 0.0404, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 1.8767226259082937e-05, |
|
"loss": 0.0406, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 1.8641944374843398e-05, |
|
"loss": 0.0287, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 1.851666249060386e-05, |
|
"loss": 0.0382, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"eval_val_acc_stderr": 0.004801960383990247, |
|
"eval_val_accuracy": 0.98, |
|
"eval_val_loss": 0.11676687747240067, |
|
"eval_val_runtime": 33.5165, |
|
"eval_val_samples_per_second": 25.361, |
|
"eval_val_steps_per_second": 3.192, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"eval_val_alice_acc_stderr": 0.008200955905749383, |
|
"eval_val_alice_accuracy": 0.9699769053117783, |
|
"eval_val_alice_loss": 0.17065930366516113, |
|
"eval_val_alice_runtime": 16.9426, |
|
"eval_val_alice_samples_per_second": 25.557, |
|
"eval_val_alice_steps_per_second": 3.246, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"eval_val_bob_acc_stderr": 0.004138631085700285, |
|
"eval_val_bob_accuracy": 0.9928057553956835, |
|
"eval_val_bob_loss": 0.06056825444102287, |
|
"eval_val_bob_runtime": 16.9895, |
|
"eval_val_bob_samples_per_second": 24.545, |
|
"eval_val_bob_steps_per_second": 3.12, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"eval_val_bob_gt_acc_stderr": 0.01787398723923547, |
|
"eval_val_bob_gt_accuracy": 0.841726618705036, |
|
"eval_val_bob_gt_loss": 1.0665634870529175, |
|
"eval_val_bob_gt_runtime": 16.987, |
|
"eval_val_bob_gt_samples_per_second": 24.548, |
|
"eval_val_bob_gt_steps_per_second": 3.12, |
|
"step": 2048 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 9390, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 10000000000, |
|
"total_flos": 7.4063386395648e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|