{ "best_metric": 0.11676687747240067, "best_model_checkpoint": "output/single/quirky_sciq_raw/checkpoint-2048", "epoch": 6.540518962075848, "eval_steps": 10000000000, "global_step": 2048, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "eval_val_acc_stderr": 0.012937023555103245, "eval_val_accuracy": 0.8282352941176471, "eval_val_loss": 1.4692819118499756, "eval_val_runtime": 32.1876, "eval_val_samples_per_second": 26.408, "eval_val_steps_per_second": 3.324, "step": 1 }, { "epoch": 0.0, "eval_val_alice_acc_stderr": 0.01508756447132745, "eval_val_alice_accuracy": 0.8891454965357968, "eval_val_alice_loss": 1.4051198959350586, "eval_val_alice_runtime": 16.4546, "eval_val_alice_samples_per_second": 26.315, "eval_val_alice_steps_per_second": 3.343, "step": 1 }, { "epoch": 0.0, "eval_val_bob_acc_stderr": 0.02083661056330264, "eval_val_bob_accuracy": 0.762589928057554, "eval_val_bob_loss": 1.537455677986145, "eval_val_bob_runtime": 16.6246, "eval_val_bob_samples_per_second": 25.083, "eval_val_bob_steps_per_second": 3.188, "step": 1 }, { "epoch": 0.0, "eval_val_bob_gt_acc_stderr": 0.015341286771526373, "eval_val_bob_gt_accuracy": 0.8896882494004796, "eval_val_bob_gt_loss": 1.4268440008163452, "eval_val_bob_gt_runtime": 16.763, "eval_val_bob_gt_samples_per_second": 24.876, "eval_val_bob_gt_steps_per_second": 3.162, "step": 1 }, { "epoch": 0.01, "eval_val_acc_stderr": 0.012937023555103245, "eval_val_accuracy": 0.8282352941176471, "eval_val_loss": 1.4688091278076172, "eval_val_runtime": 33.5257, "eval_val_samples_per_second": 25.354, "eval_val_steps_per_second": 3.192, "step": 2 }, { "epoch": 0.01, "eval_val_alice_acc_stderr": 0.014948951634060766, "eval_val_alice_accuracy": 0.8914549653579676, "eval_val_alice_loss": 1.4036588668823242, "eval_val_alice_runtime": 16.9829, "eval_val_alice_samples_per_second": 25.496, "eval_val_alice_steps_per_second": 3.239, "step": 2 }, { "epoch": 0.01, "eval_val_bob_acc_stderr": 0.020979742126541488, "eval_val_bob_accuracy": 0.7577937649880095, "eval_val_bob_loss": 1.5361874103546143, "eval_val_bob_runtime": 17.0578, "eval_val_bob_samples_per_second": 24.446, "eval_val_bob_steps_per_second": 3.107, "step": 2 }, { "epoch": 0.01, "eval_val_bob_gt_acc_stderr": 0.015628947031815964, "eval_val_bob_gt_accuracy": 0.8848920863309353, "eval_val_bob_gt_loss": 1.4273743629455566, "eval_val_bob_gt_runtime": 17.0684, "eval_val_bob_gt_samples_per_second": 24.431, "eval_val_bob_gt_steps_per_second": 3.105, "step": 2 }, { "epoch": 0.01, "eval_val_acc_stderr": 0.012937023555103245, "eval_val_accuracy": 0.8282352941176471, "eval_val_loss": 1.469063401222229, "eval_val_runtime": 33.7146, "eval_val_samples_per_second": 25.212, "eval_val_steps_per_second": 3.174, "step": 4 }, { "epoch": 0.01, "eval_val_alice_acc_stderr": 0.014948951634060766, "eval_val_alice_accuracy": 0.8914549653579676, "eval_val_alice_loss": 1.4030324220657349, "eval_val_alice_runtime": 17.0492, "eval_val_alice_samples_per_second": 25.397, "eval_val_alice_steps_per_second": 3.226, "step": 4 }, { "epoch": 0.01, "eval_val_bob_acc_stderr": 0.020908628616120924, "eval_val_bob_accuracy": 0.7601918465227818, "eval_val_bob_loss": 1.5368480682373047, "eval_val_bob_runtime": 17.0771, "eval_val_bob_samples_per_second": 24.419, "eval_val_bob_steps_per_second": 3.104, "step": 4 }, { "epoch": 0.01, "eval_val_bob_gt_acc_stderr": 0.015486230123570196, "eval_val_bob_gt_accuracy": 0.8872901678657075, "eval_val_bob_gt_loss": 1.4271358251571655, "eval_val_bob_gt_runtime": 17.0847, "eval_val_bob_gt_samples_per_second": 24.408, "eval_val_bob_gt_steps_per_second": 3.102, "step": 4 }, { "epoch": 0.03, "eval_val_acc_stderr": 0.012901796011470488, "eval_val_accuracy": 0.8294117647058824, "eval_val_loss": 1.469651699066162, "eval_val_runtime": 33.6873, "eval_val_samples_per_second": 25.232, "eval_val_steps_per_second": 3.176, "step": 8 }, { "epoch": 0.03, "eval_val_alice_acc_stderr": 0.01480820963044188, "eval_val_alice_accuracy": 0.8937644341801386, "eval_val_alice_loss": 1.4056396484375, "eval_val_alice_runtime": 17.0533, "eval_val_alice_samples_per_second": 25.391, "eval_val_alice_steps_per_second": 3.225, "step": 8 }, { "epoch": 0.03, "eval_val_bob_acc_stderr": 0.02083661056330264, "eval_val_bob_accuracy": 0.762589928057554, "eval_val_bob_loss": 1.5359561443328857, "eval_val_bob_runtime": 17.0852, "eval_val_bob_samples_per_second": 24.407, "eval_val_bob_steps_per_second": 3.102, "step": 8 }, { "epoch": 0.03, "eval_val_bob_gt_acc_stderr": 0.015341286771526373, "eval_val_bob_gt_accuracy": 0.8896882494004796, "eval_val_bob_gt_loss": 1.4263936281204224, "eval_val_bob_gt_runtime": 17.0936, "eval_val_bob_gt_samples_per_second": 24.395, "eval_val_bob_gt_steps_per_second": 3.101, "step": 8 }, { "epoch": 0.05, "eval_val_acc_stderr": 0.01297202990834543, "eval_val_accuracy": 0.8270588235294117, "eval_val_loss": 1.467901587486267, "eval_val_runtime": 33.707, "eval_val_samples_per_second": 25.217, "eval_val_steps_per_second": 3.174, "step": 16 }, { "epoch": 0.05, "eval_val_alice_acc_stderr": 0.014948951634060766, "eval_val_alice_accuracy": 0.8914549653579676, "eval_val_alice_loss": 1.4035195112228394, "eval_val_alice_runtime": 17.0398, "eval_val_alice_samples_per_second": 25.411, "eval_val_alice_steps_per_second": 3.228, "step": 16 }, { "epoch": 0.05, "eval_val_bob_acc_stderr": 0.020908628616120924, "eval_val_bob_accuracy": 0.7601918465227818, "eval_val_bob_loss": 1.535442590713501, "eval_val_bob_runtime": 17.0913, "eval_val_bob_samples_per_second": 24.398, "eval_val_bob_steps_per_second": 3.101, "step": 16 }, { "epoch": 0.05, "eval_val_bob_gt_acc_stderr": 0.015486230123570196, "eval_val_bob_gt_accuracy": 0.8872901678657075, "eval_val_bob_gt_loss": 1.4257303476333618, "eval_val_bob_gt_runtime": 17.0928, "eval_val_bob_gt_samples_per_second": 24.396, "eval_val_bob_gt_steps_per_second": 3.101, "step": 16 }, { "epoch": 0.1, "eval_val_acc_stderr": 0.012937023555103245, "eval_val_accuracy": 0.8282352941176471, "eval_val_loss": 1.4608973264694214, "eval_val_runtime": 33.7015, "eval_val_samples_per_second": 25.221, "eval_val_steps_per_second": 3.175, "step": 32 }, { "epoch": 0.1, "eval_val_alice_acc_stderr": 0.014948951634060766, "eval_val_alice_accuracy": 0.8914549653579676, "eval_val_alice_loss": 1.396742343902588, "eval_val_alice_runtime": 17.0628, "eval_val_alice_samples_per_second": 25.377, "eval_val_alice_steps_per_second": 3.223, "step": 32 }, { "epoch": 0.1, "eval_val_bob_acc_stderr": 0.02083661056330264, "eval_val_bob_accuracy": 0.762589928057554, "eval_val_bob_loss": 1.5283899307250977, "eval_val_bob_runtime": 17.0927, "eval_val_bob_samples_per_second": 24.396, "eval_val_bob_steps_per_second": 3.101, "step": 32 }, { "epoch": 0.1, "eval_val_bob_gt_acc_stderr": 0.015341286771526373, "eval_val_bob_gt_accuracy": 0.8896882494004796, "eval_val_bob_gt_loss": 1.4182281494140625, "eval_val_bob_gt_runtime": 17.1142, "eval_val_bob_gt_samples_per_second": 24.366, "eval_val_bob_gt_steps_per_second": 3.097, "step": 32 }, { "epoch": 0.16, "learning_rate": 7.102272727272729e-07, "loss": 1.4159, "step": 50 }, { "epoch": 0.2, "eval_val_acc_stderr": 0.012937023555103245, "eval_val_accuracy": 0.8282352941176471, "eval_val_loss": 1.4041904211044312, "eval_val_runtime": 33.716, "eval_val_samples_per_second": 25.211, "eval_val_steps_per_second": 3.174, "step": 64 }, { "epoch": 0.2, "eval_val_alice_acc_stderr": 0.01480820963044188, "eval_val_alice_accuracy": 0.8937644341801386, "eval_val_alice_loss": 1.3375937938690186, "eval_val_alice_runtime": 17.0702, "eval_val_alice_samples_per_second": 25.366, "eval_val_alice_steps_per_second": 3.222, "step": 64 }, { "epoch": 0.2, "eval_val_bob_acc_stderr": 0.020908628616120924, "eval_val_bob_accuracy": 0.7601918465227818, "eval_val_bob_loss": 1.472985863685608, "eval_val_bob_runtime": 17.1363, "eval_val_bob_samples_per_second": 24.334, "eval_val_bob_steps_per_second": 3.093, "step": 64 }, { "epoch": 0.2, "eval_val_bob_gt_acc_stderr": 0.015194053258476493, "eval_val_bob_gt_accuracy": 0.8920863309352518, "eval_val_bob_gt_loss": 1.3589271306991577, "eval_val_bob_gt_runtime": 17.1528, "eval_val_bob_gt_samples_per_second": 24.311, "eval_val_bob_gt_steps_per_second": 3.09, "step": 64 }, { "epoch": 0.32, "learning_rate": 1.4204545454545458e-06, "loss": 1.2902, "step": 100 }, { "epoch": 0.41, "eval_val_acc_stderr": 0.013041386157171642, "eval_val_accuracy": 0.8247058823529412, "eval_val_loss": 0.819312572479248, "eval_val_runtime": 33.8241, "eval_val_samples_per_second": 25.13, "eval_val_steps_per_second": 3.163, "step": 128 }, { "epoch": 0.41, "eval_val_alice_acc_stderr": 0.014070292224264426, "eval_val_alice_accuracy": 0.9053117782909931, "eval_val_alice_loss": 0.73760586977005, "eval_val_alice_runtime": 17.1095, "eval_val_alice_samples_per_second": 25.308, "eval_val_alice_steps_per_second": 3.215, "step": 128 }, { "epoch": 0.41, "eval_val_bob_acc_stderr": 0.021322054283776665, "eval_val_bob_accuracy": 0.7458033573141487, "eval_val_bob_loss": 0.9024503231048584, "eval_val_bob_runtime": 17.1531, "eval_val_bob_samples_per_second": 24.311, "eval_val_bob_steps_per_second": 3.09, "step": 128 }, { "epoch": 0.41, "eval_val_bob_gt_acc_stderr": 0.015486230123570196, "eval_val_bob_gt_accuracy": 0.8872901678657075, "eval_val_bob_gt_loss": 0.7549682855606079, "eval_val_bob_gt_runtime": 17.1491, "eval_val_bob_gt_samples_per_second": 24.316, "eval_val_bob_gt_steps_per_second": 3.091, "step": 128 }, { "epoch": 0.48, "learning_rate": 2.1306818181818183e-06, "loss": 0.8289, "step": 150 }, { "epoch": 0.64, "learning_rate": 2.8409090909090916e-06, "loss": 0.412, "step": 200 }, { "epoch": 0.8, "learning_rate": 3.5511363636363636e-06, "loss": 0.3014, "step": 250 }, { "epoch": 0.82, "eval_val_acc_stderr": 0.010954525472743861, "eval_val_accuracy": 0.8847058823529412, "eval_val_loss": 0.30577030777931213, "eval_val_runtime": 33.8568, "eval_val_samples_per_second": 25.106, "eval_val_steps_per_second": 3.16, "step": 256 }, { "epoch": 0.82, "eval_val_alice_acc_stderr": 0.012572317234488177, "eval_val_alice_accuracy": 0.9260969976905312, "eval_val_alice_loss": 0.21393465995788574, "eval_val_alice_runtime": 17.1088, "eval_val_alice_samples_per_second": 25.309, "eval_val_alice_steps_per_second": 3.215, "step": 256 }, { "epoch": 0.82, "eval_val_bob_acc_stderr": 0.017983215186550393, "eval_val_bob_accuracy": 0.8393285371702638, "eval_val_bob_loss": 0.40229618549346924, "eval_val_bob_runtime": 17.1516, "eval_val_bob_samples_per_second": 24.313, "eval_val_bob_steps_per_second": 3.09, "step": 256 }, { "epoch": 0.82, "eval_val_bob_gt_acc_stderr": 0.012256027700828325, "eval_val_bob_gt_accuracy": 0.9328537170263789, "eval_val_bob_gt_loss": 0.20910073816776276, "eval_val_bob_gt_runtime": 17.1417, "eval_val_bob_gt_samples_per_second": 24.327, "eval_val_bob_gt_steps_per_second": 3.092, "step": 256 }, { "epoch": 0.96, "learning_rate": 4.2613636363636365e-06, "loss": 0.3062, "step": 300 }, { "epoch": 1.12, "learning_rate": 4.9715909090909094e-06, "loss": 0.2271, "step": 350 }, { "epoch": 1.28, "learning_rate": 5.681818181818183e-06, "loss": 0.2346, "step": 400 }, { "epoch": 1.44, "learning_rate": 6.392045454545454e-06, "loss": 0.2203, "step": 450 }, { "epoch": 1.6, "learning_rate": 7.102272727272727e-06, "loss": 0.1968, "step": 500 }, { "epoch": 1.64, "eval_val_acc_stderr": 0.008648647548423583, "eval_val_accuracy": 0.9317647058823529, "eval_val_loss": 0.18704643845558167, "eval_val_runtime": 33.6882, "eval_val_samples_per_second": 25.231, "eval_val_steps_per_second": 3.176, "step": 512 }, { "epoch": 1.64, "eval_val_alice_acc_stderr": 0.010323486896101533, "eval_val_alice_accuracy": 0.9515011547344111, "eval_val_alice_loss": 0.12077159434556961, "eval_val_alice_runtime": 17.0206, "eval_val_alice_samples_per_second": 25.44, "eval_val_alice_steps_per_second": 3.231, "step": 512 }, { "epoch": 1.64, "eval_val_bob_acc_stderr": 0.014093125547753297, "eval_val_bob_accuracy": 0.9088729016786571, "eval_val_bob_loss": 0.25665926933288574, "eval_val_bob_runtime": 17.0565, "eval_val_bob_samples_per_second": 24.448, "eval_val_bob_steps_per_second": 3.107, "step": 512 }, { "epoch": 1.64, "eval_val_bob_gt_acc_stderr": 0.01604432860757207, "eval_val_bob_gt_accuracy": 0.8776978417266187, "eval_val_bob_gt_loss": 0.31840986013412476, "eval_val_bob_gt_runtime": 17.0564, "eval_val_bob_gt_samples_per_second": 24.448, "eval_val_bob_gt_steps_per_second": 3.107, "step": 512 }, { "epoch": 1.76, "learning_rate": 7.8125e-06, "loss": 0.2178, "step": 550 }, { "epoch": 1.92, "learning_rate": 8.522727272727273e-06, "loss": 0.1919, "step": 600 }, { "epoch": 2.08, "learning_rate": 9.232954545454546e-06, "loss": 0.1539, "step": 650 }, { "epoch": 2.24, "learning_rate": 9.943181818181819e-06, "loss": 0.15, "step": 700 }, { "epoch": 2.4, "learning_rate": 1.0653409090909092e-05, "loss": 0.176, "step": 750 }, { "epoch": 2.55, "learning_rate": 1.1363636363636366e-05, "loss": 0.1293, "step": 800 }, { "epoch": 2.71, "learning_rate": 1.2073863636363636e-05, "loss": 0.1577, "step": 850 }, { "epoch": 2.87, "learning_rate": 1.2784090909090909e-05, "loss": 0.135, "step": 900 }, { "epoch": 3.03, "learning_rate": 1.3494318181818182e-05, "loss": 0.1246, "step": 950 }, { "epoch": 3.19, "learning_rate": 1.4204545454545455e-05, "loss": 0.0965, "step": 1000 }, { "epoch": 3.27, "eval_val_acc_stderr": 0.006907725389665332, "eval_val_accuracy": 0.9576470588235294, "eval_val_loss": 0.14458392560482025, "eval_val_runtime": 33.6709, "eval_val_samples_per_second": 25.244, "eval_val_steps_per_second": 3.178, "step": 1024 }, { "epoch": 3.27, "eval_val_alice_acc_stderr": 0.009333387164702351, "eval_val_alice_accuracy": 0.9607390300230947, "eval_val_alice_loss": 0.129893496632576, "eval_val_alice_runtime": 17.0239, "eval_val_alice_samples_per_second": 25.435, "eval_val_alice_steps_per_second": 3.231, "step": 1024 }, { "epoch": 3.27, "eval_val_bob_acc_stderr": 0.010212081074718785, "eval_val_bob_accuracy": 0.9544364508393285, "eval_val_bob_loss": 0.16120219230651855, "eval_val_bob_runtime": 17.0579, "eval_val_bob_samples_per_second": 24.446, "eval_val_bob_steps_per_second": 3.107, "step": 1024 }, { "epoch": 3.27, "eval_val_bob_gt_acc_stderr": 0.016441676917357297, "eval_val_bob_gt_accuracy": 0.8705035971223022, "eval_val_bob_gt_loss": 0.5936062932014465, "eval_val_bob_gt_runtime": 17.0652, "eval_val_bob_gt_samples_per_second": 24.436, "eval_val_bob_gt_steps_per_second": 3.106, "step": 1024 }, { "epoch": 3.35, "learning_rate": 1.4914772727272729e-05, "loss": 0.1054, "step": 1050 }, { "epoch": 3.51, "learning_rate": 1.5625e-05, "loss": 0.1164, "step": 1100 }, { "epoch": 3.67, "learning_rate": 1.6335227272727275e-05, "loss": 0.0701, "step": 1150 }, { "epoch": 3.83, "learning_rate": 1.7045454545454546e-05, "loss": 0.1118, "step": 1200 }, { "epoch": 3.99, "learning_rate": 1.775568181818182e-05, "loss": 0.088, "step": 1250 }, { "epoch": 4.15, "learning_rate": 1.8465909090909092e-05, "loss": 0.0578, "step": 1300 }, { "epoch": 4.31, "learning_rate": 1.9176136363636366e-05, "loss": 0.0737, "step": 1350 }, { "epoch": 4.47, "learning_rate": 1.9886363636363638e-05, "loss": 0.0777, "step": 1400 }, { "epoch": 4.63, "learning_rate": 1.9894763217238787e-05, "loss": 0.0562, "step": 1450 }, { "epoch": 4.79, "learning_rate": 1.976948133299925e-05, "loss": 0.0741, "step": 1500 }, { "epoch": 4.95, "learning_rate": 1.964419944875971e-05, "loss": 0.0504, "step": 1550 }, { "epoch": 5.11, "learning_rate": 1.951891756452017e-05, "loss": 0.0508, "step": 1600 }, { "epoch": 5.27, "learning_rate": 1.9393635680280633e-05, "loss": 0.0489, "step": 1650 }, { "epoch": 5.43, "learning_rate": 1.9268353796041094e-05, "loss": 0.0314, "step": 1700 }, { "epoch": 5.59, "learning_rate": 1.9143071911801552e-05, "loss": 0.041, "step": 1750 }, { "epoch": 5.75, "learning_rate": 1.9017790027562014e-05, "loss": 0.0348, "step": 1800 }, { "epoch": 5.91, "learning_rate": 1.8892508143322475e-05, "loss": 0.0404, "step": 1850 }, { "epoch": 6.07, "learning_rate": 1.8767226259082937e-05, "loss": 0.0406, "step": 1900 }, { "epoch": 6.23, "learning_rate": 1.8641944374843398e-05, "loss": 0.0287, "step": 1950 }, { "epoch": 6.39, "learning_rate": 1.851666249060386e-05, "loss": 0.0382, "step": 2000 }, { "epoch": 6.54, "eval_val_acc_stderr": 0.004801960383990247, "eval_val_accuracy": 0.98, "eval_val_loss": 0.11676687747240067, "eval_val_runtime": 33.5165, "eval_val_samples_per_second": 25.361, "eval_val_steps_per_second": 3.192, "step": 2048 }, { "epoch": 6.54, "eval_val_alice_acc_stderr": 0.008200955905749383, "eval_val_alice_accuracy": 0.9699769053117783, "eval_val_alice_loss": 0.17065930366516113, "eval_val_alice_runtime": 16.9426, "eval_val_alice_samples_per_second": 25.557, "eval_val_alice_steps_per_second": 3.246, "step": 2048 }, { "epoch": 6.54, "eval_val_bob_acc_stderr": 0.004138631085700285, "eval_val_bob_accuracy": 0.9928057553956835, "eval_val_bob_loss": 0.06056825444102287, "eval_val_bob_runtime": 16.9895, "eval_val_bob_samples_per_second": 24.545, "eval_val_bob_steps_per_second": 3.12, "step": 2048 }, { "epoch": 6.54, "eval_val_bob_gt_acc_stderr": 0.01787398723923547, "eval_val_bob_gt_accuracy": 0.841726618705036, "eval_val_bob_gt_loss": 1.0665634870529175, "eval_val_bob_gt_runtime": 16.987, "eval_val_bob_gt_samples_per_second": 24.548, "eval_val_bob_gt_steps_per_second": 3.12, "step": 2048 } ], "logging_steps": 50, "max_steps": 9390, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 10000000000, "total_flos": 7.4063386395648e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }