ejenner's picture
Upload folder using huggingface_hub
ca7bd2f verified
{
"best_metric": 0.11676687747240067,
"best_model_checkpoint": "output/single/quirky_sciq_raw/checkpoint-2048",
"epoch": 6.540518962075848,
"eval_steps": 10000000000,
"global_step": 2048,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"eval_val_acc_stderr": 0.012937023555103245,
"eval_val_accuracy": 0.8282352941176471,
"eval_val_loss": 1.4692819118499756,
"eval_val_runtime": 32.1876,
"eval_val_samples_per_second": 26.408,
"eval_val_steps_per_second": 3.324,
"step": 1
},
{
"epoch": 0.0,
"eval_val_alice_acc_stderr": 0.01508756447132745,
"eval_val_alice_accuracy": 0.8891454965357968,
"eval_val_alice_loss": 1.4051198959350586,
"eval_val_alice_runtime": 16.4546,
"eval_val_alice_samples_per_second": 26.315,
"eval_val_alice_steps_per_second": 3.343,
"step": 1
},
{
"epoch": 0.0,
"eval_val_bob_acc_stderr": 0.02083661056330264,
"eval_val_bob_accuracy": 0.762589928057554,
"eval_val_bob_loss": 1.537455677986145,
"eval_val_bob_runtime": 16.6246,
"eval_val_bob_samples_per_second": 25.083,
"eval_val_bob_steps_per_second": 3.188,
"step": 1
},
{
"epoch": 0.0,
"eval_val_bob_gt_acc_stderr": 0.015341286771526373,
"eval_val_bob_gt_accuracy": 0.8896882494004796,
"eval_val_bob_gt_loss": 1.4268440008163452,
"eval_val_bob_gt_runtime": 16.763,
"eval_val_bob_gt_samples_per_second": 24.876,
"eval_val_bob_gt_steps_per_second": 3.162,
"step": 1
},
{
"epoch": 0.01,
"eval_val_acc_stderr": 0.012937023555103245,
"eval_val_accuracy": 0.8282352941176471,
"eval_val_loss": 1.4688091278076172,
"eval_val_runtime": 33.5257,
"eval_val_samples_per_second": 25.354,
"eval_val_steps_per_second": 3.192,
"step": 2
},
{
"epoch": 0.01,
"eval_val_alice_acc_stderr": 0.014948951634060766,
"eval_val_alice_accuracy": 0.8914549653579676,
"eval_val_alice_loss": 1.4036588668823242,
"eval_val_alice_runtime": 16.9829,
"eval_val_alice_samples_per_second": 25.496,
"eval_val_alice_steps_per_second": 3.239,
"step": 2
},
{
"epoch": 0.01,
"eval_val_bob_acc_stderr": 0.020979742126541488,
"eval_val_bob_accuracy": 0.7577937649880095,
"eval_val_bob_loss": 1.5361874103546143,
"eval_val_bob_runtime": 17.0578,
"eval_val_bob_samples_per_second": 24.446,
"eval_val_bob_steps_per_second": 3.107,
"step": 2
},
{
"epoch": 0.01,
"eval_val_bob_gt_acc_stderr": 0.015628947031815964,
"eval_val_bob_gt_accuracy": 0.8848920863309353,
"eval_val_bob_gt_loss": 1.4273743629455566,
"eval_val_bob_gt_runtime": 17.0684,
"eval_val_bob_gt_samples_per_second": 24.431,
"eval_val_bob_gt_steps_per_second": 3.105,
"step": 2
},
{
"epoch": 0.01,
"eval_val_acc_stderr": 0.012937023555103245,
"eval_val_accuracy": 0.8282352941176471,
"eval_val_loss": 1.469063401222229,
"eval_val_runtime": 33.7146,
"eval_val_samples_per_second": 25.212,
"eval_val_steps_per_second": 3.174,
"step": 4
},
{
"epoch": 0.01,
"eval_val_alice_acc_stderr": 0.014948951634060766,
"eval_val_alice_accuracy": 0.8914549653579676,
"eval_val_alice_loss": 1.4030324220657349,
"eval_val_alice_runtime": 17.0492,
"eval_val_alice_samples_per_second": 25.397,
"eval_val_alice_steps_per_second": 3.226,
"step": 4
},
{
"epoch": 0.01,
"eval_val_bob_acc_stderr": 0.020908628616120924,
"eval_val_bob_accuracy": 0.7601918465227818,
"eval_val_bob_loss": 1.5368480682373047,
"eval_val_bob_runtime": 17.0771,
"eval_val_bob_samples_per_second": 24.419,
"eval_val_bob_steps_per_second": 3.104,
"step": 4
},
{
"epoch": 0.01,
"eval_val_bob_gt_acc_stderr": 0.015486230123570196,
"eval_val_bob_gt_accuracy": 0.8872901678657075,
"eval_val_bob_gt_loss": 1.4271358251571655,
"eval_val_bob_gt_runtime": 17.0847,
"eval_val_bob_gt_samples_per_second": 24.408,
"eval_val_bob_gt_steps_per_second": 3.102,
"step": 4
},
{
"epoch": 0.03,
"eval_val_acc_stderr": 0.012901796011470488,
"eval_val_accuracy": 0.8294117647058824,
"eval_val_loss": 1.469651699066162,
"eval_val_runtime": 33.6873,
"eval_val_samples_per_second": 25.232,
"eval_val_steps_per_second": 3.176,
"step": 8
},
{
"epoch": 0.03,
"eval_val_alice_acc_stderr": 0.01480820963044188,
"eval_val_alice_accuracy": 0.8937644341801386,
"eval_val_alice_loss": 1.4056396484375,
"eval_val_alice_runtime": 17.0533,
"eval_val_alice_samples_per_second": 25.391,
"eval_val_alice_steps_per_second": 3.225,
"step": 8
},
{
"epoch": 0.03,
"eval_val_bob_acc_stderr": 0.02083661056330264,
"eval_val_bob_accuracy": 0.762589928057554,
"eval_val_bob_loss": 1.5359561443328857,
"eval_val_bob_runtime": 17.0852,
"eval_val_bob_samples_per_second": 24.407,
"eval_val_bob_steps_per_second": 3.102,
"step": 8
},
{
"epoch": 0.03,
"eval_val_bob_gt_acc_stderr": 0.015341286771526373,
"eval_val_bob_gt_accuracy": 0.8896882494004796,
"eval_val_bob_gt_loss": 1.4263936281204224,
"eval_val_bob_gt_runtime": 17.0936,
"eval_val_bob_gt_samples_per_second": 24.395,
"eval_val_bob_gt_steps_per_second": 3.101,
"step": 8
},
{
"epoch": 0.05,
"eval_val_acc_stderr": 0.01297202990834543,
"eval_val_accuracy": 0.8270588235294117,
"eval_val_loss": 1.467901587486267,
"eval_val_runtime": 33.707,
"eval_val_samples_per_second": 25.217,
"eval_val_steps_per_second": 3.174,
"step": 16
},
{
"epoch": 0.05,
"eval_val_alice_acc_stderr": 0.014948951634060766,
"eval_val_alice_accuracy": 0.8914549653579676,
"eval_val_alice_loss": 1.4035195112228394,
"eval_val_alice_runtime": 17.0398,
"eval_val_alice_samples_per_second": 25.411,
"eval_val_alice_steps_per_second": 3.228,
"step": 16
},
{
"epoch": 0.05,
"eval_val_bob_acc_stderr": 0.020908628616120924,
"eval_val_bob_accuracy": 0.7601918465227818,
"eval_val_bob_loss": 1.535442590713501,
"eval_val_bob_runtime": 17.0913,
"eval_val_bob_samples_per_second": 24.398,
"eval_val_bob_steps_per_second": 3.101,
"step": 16
},
{
"epoch": 0.05,
"eval_val_bob_gt_acc_stderr": 0.015486230123570196,
"eval_val_bob_gt_accuracy": 0.8872901678657075,
"eval_val_bob_gt_loss": 1.4257303476333618,
"eval_val_bob_gt_runtime": 17.0928,
"eval_val_bob_gt_samples_per_second": 24.396,
"eval_val_bob_gt_steps_per_second": 3.101,
"step": 16
},
{
"epoch": 0.1,
"eval_val_acc_stderr": 0.012937023555103245,
"eval_val_accuracy": 0.8282352941176471,
"eval_val_loss": 1.4608973264694214,
"eval_val_runtime": 33.7015,
"eval_val_samples_per_second": 25.221,
"eval_val_steps_per_second": 3.175,
"step": 32
},
{
"epoch": 0.1,
"eval_val_alice_acc_stderr": 0.014948951634060766,
"eval_val_alice_accuracy": 0.8914549653579676,
"eval_val_alice_loss": 1.396742343902588,
"eval_val_alice_runtime": 17.0628,
"eval_val_alice_samples_per_second": 25.377,
"eval_val_alice_steps_per_second": 3.223,
"step": 32
},
{
"epoch": 0.1,
"eval_val_bob_acc_stderr": 0.02083661056330264,
"eval_val_bob_accuracy": 0.762589928057554,
"eval_val_bob_loss": 1.5283899307250977,
"eval_val_bob_runtime": 17.0927,
"eval_val_bob_samples_per_second": 24.396,
"eval_val_bob_steps_per_second": 3.101,
"step": 32
},
{
"epoch": 0.1,
"eval_val_bob_gt_acc_stderr": 0.015341286771526373,
"eval_val_bob_gt_accuracy": 0.8896882494004796,
"eval_val_bob_gt_loss": 1.4182281494140625,
"eval_val_bob_gt_runtime": 17.1142,
"eval_val_bob_gt_samples_per_second": 24.366,
"eval_val_bob_gt_steps_per_second": 3.097,
"step": 32
},
{
"epoch": 0.16,
"learning_rate": 7.102272727272729e-07,
"loss": 1.4159,
"step": 50
},
{
"epoch": 0.2,
"eval_val_acc_stderr": 0.012937023555103245,
"eval_val_accuracy": 0.8282352941176471,
"eval_val_loss": 1.4041904211044312,
"eval_val_runtime": 33.716,
"eval_val_samples_per_second": 25.211,
"eval_val_steps_per_second": 3.174,
"step": 64
},
{
"epoch": 0.2,
"eval_val_alice_acc_stderr": 0.01480820963044188,
"eval_val_alice_accuracy": 0.8937644341801386,
"eval_val_alice_loss": 1.3375937938690186,
"eval_val_alice_runtime": 17.0702,
"eval_val_alice_samples_per_second": 25.366,
"eval_val_alice_steps_per_second": 3.222,
"step": 64
},
{
"epoch": 0.2,
"eval_val_bob_acc_stderr": 0.020908628616120924,
"eval_val_bob_accuracy": 0.7601918465227818,
"eval_val_bob_loss": 1.472985863685608,
"eval_val_bob_runtime": 17.1363,
"eval_val_bob_samples_per_second": 24.334,
"eval_val_bob_steps_per_second": 3.093,
"step": 64
},
{
"epoch": 0.2,
"eval_val_bob_gt_acc_stderr": 0.015194053258476493,
"eval_val_bob_gt_accuracy": 0.8920863309352518,
"eval_val_bob_gt_loss": 1.3589271306991577,
"eval_val_bob_gt_runtime": 17.1528,
"eval_val_bob_gt_samples_per_second": 24.311,
"eval_val_bob_gt_steps_per_second": 3.09,
"step": 64
},
{
"epoch": 0.32,
"learning_rate": 1.4204545454545458e-06,
"loss": 1.2902,
"step": 100
},
{
"epoch": 0.41,
"eval_val_acc_stderr": 0.013041386157171642,
"eval_val_accuracy": 0.8247058823529412,
"eval_val_loss": 0.819312572479248,
"eval_val_runtime": 33.8241,
"eval_val_samples_per_second": 25.13,
"eval_val_steps_per_second": 3.163,
"step": 128
},
{
"epoch": 0.41,
"eval_val_alice_acc_stderr": 0.014070292224264426,
"eval_val_alice_accuracy": 0.9053117782909931,
"eval_val_alice_loss": 0.73760586977005,
"eval_val_alice_runtime": 17.1095,
"eval_val_alice_samples_per_second": 25.308,
"eval_val_alice_steps_per_second": 3.215,
"step": 128
},
{
"epoch": 0.41,
"eval_val_bob_acc_stderr": 0.021322054283776665,
"eval_val_bob_accuracy": 0.7458033573141487,
"eval_val_bob_loss": 0.9024503231048584,
"eval_val_bob_runtime": 17.1531,
"eval_val_bob_samples_per_second": 24.311,
"eval_val_bob_steps_per_second": 3.09,
"step": 128
},
{
"epoch": 0.41,
"eval_val_bob_gt_acc_stderr": 0.015486230123570196,
"eval_val_bob_gt_accuracy": 0.8872901678657075,
"eval_val_bob_gt_loss": 0.7549682855606079,
"eval_val_bob_gt_runtime": 17.1491,
"eval_val_bob_gt_samples_per_second": 24.316,
"eval_val_bob_gt_steps_per_second": 3.091,
"step": 128
},
{
"epoch": 0.48,
"learning_rate": 2.1306818181818183e-06,
"loss": 0.8289,
"step": 150
},
{
"epoch": 0.64,
"learning_rate": 2.8409090909090916e-06,
"loss": 0.412,
"step": 200
},
{
"epoch": 0.8,
"learning_rate": 3.5511363636363636e-06,
"loss": 0.3014,
"step": 250
},
{
"epoch": 0.82,
"eval_val_acc_stderr": 0.010954525472743861,
"eval_val_accuracy": 0.8847058823529412,
"eval_val_loss": 0.30577030777931213,
"eval_val_runtime": 33.8568,
"eval_val_samples_per_second": 25.106,
"eval_val_steps_per_second": 3.16,
"step": 256
},
{
"epoch": 0.82,
"eval_val_alice_acc_stderr": 0.012572317234488177,
"eval_val_alice_accuracy": 0.9260969976905312,
"eval_val_alice_loss": 0.21393465995788574,
"eval_val_alice_runtime": 17.1088,
"eval_val_alice_samples_per_second": 25.309,
"eval_val_alice_steps_per_second": 3.215,
"step": 256
},
{
"epoch": 0.82,
"eval_val_bob_acc_stderr": 0.017983215186550393,
"eval_val_bob_accuracy": 0.8393285371702638,
"eval_val_bob_loss": 0.40229618549346924,
"eval_val_bob_runtime": 17.1516,
"eval_val_bob_samples_per_second": 24.313,
"eval_val_bob_steps_per_second": 3.09,
"step": 256
},
{
"epoch": 0.82,
"eval_val_bob_gt_acc_stderr": 0.012256027700828325,
"eval_val_bob_gt_accuracy": 0.9328537170263789,
"eval_val_bob_gt_loss": 0.20910073816776276,
"eval_val_bob_gt_runtime": 17.1417,
"eval_val_bob_gt_samples_per_second": 24.327,
"eval_val_bob_gt_steps_per_second": 3.092,
"step": 256
},
{
"epoch": 0.96,
"learning_rate": 4.2613636363636365e-06,
"loss": 0.3062,
"step": 300
},
{
"epoch": 1.12,
"learning_rate": 4.9715909090909094e-06,
"loss": 0.2271,
"step": 350
},
{
"epoch": 1.28,
"learning_rate": 5.681818181818183e-06,
"loss": 0.2346,
"step": 400
},
{
"epoch": 1.44,
"learning_rate": 6.392045454545454e-06,
"loss": 0.2203,
"step": 450
},
{
"epoch": 1.6,
"learning_rate": 7.102272727272727e-06,
"loss": 0.1968,
"step": 500
},
{
"epoch": 1.64,
"eval_val_acc_stderr": 0.008648647548423583,
"eval_val_accuracy": 0.9317647058823529,
"eval_val_loss": 0.18704643845558167,
"eval_val_runtime": 33.6882,
"eval_val_samples_per_second": 25.231,
"eval_val_steps_per_second": 3.176,
"step": 512
},
{
"epoch": 1.64,
"eval_val_alice_acc_stderr": 0.010323486896101533,
"eval_val_alice_accuracy": 0.9515011547344111,
"eval_val_alice_loss": 0.12077159434556961,
"eval_val_alice_runtime": 17.0206,
"eval_val_alice_samples_per_second": 25.44,
"eval_val_alice_steps_per_second": 3.231,
"step": 512
},
{
"epoch": 1.64,
"eval_val_bob_acc_stderr": 0.014093125547753297,
"eval_val_bob_accuracy": 0.9088729016786571,
"eval_val_bob_loss": 0.25665926933288574,
"eval_val_bob_runtime": 17.0565,
"eval_val_bob_samples_per_second": 24.448,
"eval_val_bob_steps_per_second": 3.107,
"step": 512
},
{
"epoch": 1.64,
"eval_val_bob_gt_acc_stderr": 0.01604432860757207,
"eval_val_bob_gt_accuracy": 0.8776978417266187,
"eval_val_bob_gt_loss": 0.31840986013412476,
"eval_val_bob_gt_runtime": 17.0564,
"eval_val_bob_gt_samples_per_second": 24.448,
"eval_val_bob_gt_steps_per_second": 3.107,
"step": 512
},
{
"epoch": 1.76,
"learning_rate": 7.8125e-06,
"loss": 0.2178,
"step": 550
},
{
"epoch": 1.92,
"learning_rate": 8.522727272727273e-06,
"loss": 0.1919,
"step": 600
},
{
"epoch": 2.08,
"learning_rate": 9.232954545454546e-06,
"loss": 0.1539,
"step": 650
},
{
"epoch": 2.24,
"learning_rate": 9.943181818181819e-06,
"loss": 0.15,
"step": 700
},
{
"epoch": 2.4,
"learning_rate": 1.0653409090909092e-05,
"loss": 0.176,
"step": 750
},
{
"epoch": 2.55,
"learning_rate": 1.1363636363636366e-05,
"loss": 0.1293,
"step": 800
},
{
"epoch": 2.71,
"learning_rate": 1.2073863636363636e-05,
"loss": 0.1577,
"step": 850
},
{
"epoch": 2.87,
"learning_rate": 1.2784090909090909e-05,
"loss": 0.135,
"step": 900
},
{
"epoch": 3.03,
"learning_rate": 1.3494318181818182e-05,
"loss": 0.1246,
"step": 950
},
{
"epoch": 3.19,
"learning_rate": 1.4204545454545455e-05,
"loss": 0.0965,
"step": 1000
},
{
"epoch": 3.27,
"eval_val_acc_stderr": 0.006907725389665332,
"eval_val_accuracy": 0.9576470588235294,
"eval_val_loss": 0.14458392560482025,
"eval_val_runtime": 33.6709,
"eval_val_samples_per_second": 25.244,
"eval_val_steps_per_second": 3.178,
"step": 1024
},
{
"epoch": 3.27,
"eval_val_alice_acc_stderr": 0.009333387164702351,
"eval_val_alice_accuracy": 0.9607390300230947,
"eval_val_alice_loss": 0.129893496632576,
"eval_val_alice_runtime": 17.0239,
"eval_val_alice_samples_per_second": 25.435,
"eval_val_alice_steps_per_second": 3.231,
"step": 1024
},
{
"epoch": 3.27,
"eval_val_bob_acc_stderr": 0.010212081074718785,
"eval_val_bob_accuracy": 0.9544364508393285,
"eval_val_bob_loss": 0.16120219230651855,
"eval_val_bob_runtime": 17.0579,
"eval_val_bob_samples_per_second": 24.446,
"eval_val_bob_steps_per_second": 3.107,
"step": 1024
},
{
"epoch": 3.27,
"eval_val_bob_gt_acc_stderr": 0.016441676917357297,
"eval_val_bob_gt_accuracy": 0.8705035971223022,
"eval_val_bob_gt_loss": 0.5936062932014465,
"eval_val_bob_gt_runtime": 17.0652,
"eval_val_bob_gt_samples_per_second": 24.436,
"eval_val_bob_gt_steps_per_second": 3.106,
"step": 1024
},
{
"epoch": 3.35,
"learning_rate": 1.4914772727272729e-05,
"loss": 0.1054,
"step": 1050
},
{
"epoch": 3.51,
"learning_rate": 1.5625e-05,
"loss": 0.1164,
"step": 1100
},
{
"epoch": 3.67,
"learning_rate": 1.6335227272727275e-05,
"loss": 0.0701,
"step": 1150
},
{
"epoch": 3.83,
"learning_rate": 1.7045454545454546e-05,
"loss": 0.1118,
"step": 1200
},
{
"epoch": 3.99,
"learning_rate": 1.775568181818182e-05,
"loss": 0.088,
"step": 1250
},
{
"epoch": 4.15,
"learning_rate": 1.8465909090909092e-05,
"loss": 0.0578,
"step": 1300
},
{
"epoch": 4.31,
"learning_rate": 1.9176136363636366e-05,
"loss": 0.0737,
"step": 1350
},
{
"epoch": 4.47,
"learning_rate": 1.9886363636363638e-05,
"loss": 0.0777,
"step": 1400
},
{
"epoch": 4.63,
"learning_rate": 1.9894763217238787e-05,
"loss": 0.0562,
"step": 1450
},
{
"epoch": 4.79,
"learning_rate": 1.976948133299925e-05,
"loss": 0.0741,
"step": 1500
},
{
"epoch": 4.95,
"learning_rate": 1.964419944875971e-05,
"loss": 0.0504,
"step": 1550
},
{
"epoch": 5.11,
"learning_rate": 1.951891756452017e-05,
"loss": 0.0508,
"step": 1600
},
{
"epoch": 5.27,
"learning_rate": 1.9393635680280633e-05,
"loss": 0.0489,
"step": 1650
},
{
"epoch": 5.43,
"learning_rate": 1.9268353796041094e-05,
"loss": 0.0314,
"step": 1700
},
{
"epoch": 5.59,
"learning_rate": 1.9143071911801552e-05,
"loss": 0.041,
"step": 1750
},
{
"epoch": 5.75,
"learning_rate": 1.9017790027562014e-05,
"loss": 0.0348,
"step": 1800
},
{
"epoch": 5.91,
"learning_rate": 1.8892508143322475e-05,
"loss": 0.0404,
"step": 1850
},
{
"epoch": 6.07,
"learning_rate": 1.8767226259082937e-05,
"loss": 0.0406,
"step": 1900
},
{
"epoch": 6.23,
"learning_rate": 1.8641944374843398e-05,
"loss": 0.0287,
"step": 1950
},
{
"epoch": 6.39,
"learning_rate": 1.851666249060386e-05,
"loss": 0.0382,
"step": 2000
},
{
"epoch": 6.54,
"eval_val_acc_stderr": 0.004801960383990247,
"eval_val_accuracy": 0.98,
"eval_val_loss": 0.11676687747240067,
"eval_val_runtime": 33.5165,
"eval_val_samples_per_second": 25.361,
"eval_val_steps_per_second": 3.192,
"step": 2048
},
{
"epoch": 6.54,
"eval_val_alice_acc_stderr": 0.008200955905749383,
"eval_val_alice_accuracy": 0.9699769053117783,
"eval_val_alice_loss": 0.17065930366516113,
"eval_val_alice_runtime": 16.9426,
"eval_val_alice_samples_per_second": 25.557,
"eval_val_alice_steps_per_second": 3.246,
"step": 2048
},
{
"epoch": 6.54,
"eval_val_bob_acc_stderr": 0.004138631085700285,
"eval_val_bob_accuracy": 0.9928057553956835,
"eval_val_bob_loss": 0.06056825444102287,
"eval_val_bob_runtime": 16.9895,
"eval_val_bob_samples_per_second": 24.545,
"eval_val_bob_steps_per_second": 3.12,
"step": 2048
},
{
"epoch": 6.54,
"eval_val_bob_gt_acc_stderr": 0.01787398723923547,
"eval_val_bob_gt_accuracy": 0.841726618705036,
"eval_val_bob_gt_loss": 1.0665634870529175,
"eval_val_bob_gt_runtime": 16.987,
"eval_val_bob_gt_samples_per_second": 24.548,
"eval_val_bob_gt_steps_per_second": 3.12,
"step": 2048
}
],
"logging_steps": 50,
"max_steps": 9390,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 10000000000,
"total_flos": 7.4063386395648e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}