|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.6157872455066774, |
|
"eval_steps": 200, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0003078936227533387, |
|
"grad_norm": 0.567386619418904, |
|
"learning_rate": 6.153846153846154e-07, |
|
"loss": 1.3715, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0015394681137666935, |
|
"grad_norm": 0.5288856739094128, |
|
"learning_rate": 3.0769230769230774e-06, |
|
"loss": 1.3257, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.003078936227533387, |
|
"grad_norm": 0.514193298068005, |
|
"learning_rate": 6.153846153846155e-06, |
|
"loss": 1.3509, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.004618404341300081, |
|
"grad_norm": 0.5439189819234317, |
|
"learning_rate": 9.230769230769232e-06, |
|
"loss": 1.3301, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.006157872455066774, |
|
"grad_norm": 0.40780463269778033, |
|
"learning_rate": 1.230769230769231e-05, |
|
"loss": 1.3432, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007697340568833468, |
|
"grad_norm": 0.21241926749939638, |
|
"learning_rate": 1.5384615384615387e-05, |
|
"loss": 1.2848, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.009236808682600161, |
|
"grad_norm": 0.22229732202041577, |
|
"learning_rate": 1.8461538461538465e-05, |
|
"loss": 1.2157, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.010776276796366856, |
|
"grad_norm": 0.22180484192931016, |
|
"learning_rate": 2.1538461538461542e-05, |
|
"loss": 1.2437, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.012315744910133548, |
|
"grad_norm": 0.26148958772269565, |
|
"learning_rate": 2.461538461538462e-05, |
|
"loss": 1.1774, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.013855213023900243, |
|
"grad_norm": 0.1381120764969843, |
|
"learning_rate": 2.7692307692307694e-05, |
|
"loss": 1.202, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.015394681137666935, |
|
"grad_norm": 0.11244566836575712, |
|
"learning_rate": 3.0769230769230774e-05, |
|
"loss": 1.147, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01693414925143363, |
|
"grad_norm": 0.11021185664583819, |
|
"learning_rate": 3.384615384615385e-05, |
|
"loss": 1.1643, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.018473617365200323, |
|
"grad_norm": 0.09699197885691936, |
|
"learning_rate": 3.692307692307693e-05, |
|
"loss": 1.1375, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.020013085478967015, |
|
"grad_norm": 0.08432503700919332, |
|
"learning_rate": 4e-05, |
|
"loss": 1.157, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.02155255359273371, |
|
"grad_norm": 0.08841813904259435, |
|
"learning_rate": 4.3076923076923084e-05, |
|
"loss": 1.1193, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.023092021706500404, |
|
"grad_norm": 0.09260059592301743, |
|
"learning_rate": 4.615384615384616e-05, |
|
"loss": 1.1143, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.024631489820267097, |
|
"grad_norm": 0.11247544113589666, |
|
"learning_rate": 4.923076923076924e-05, |
|
"loss": 1.1422, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.026170957934033793, |
|
"grad_norm": 0.10305516893175011, |
|
"learning_rate": 5.230769230769231e-05, |
|
"loss": 1.1301, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.027710426047800486, |
|
"grad_norm": 0.10094078438604197, |
|
"learning_rate": 5.538461538461539e-05, |
|
"loss": 1.07, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02924989416156718, |
|
"grad_norm": 0.09878535861697932, |
|
"learning_rate": 5.846153846153847e-05, |
|
"loss": 1.1062, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.03078936227533387, |
|
"grad_norm": 0.0880914200143609, |
|
"learning_rate": 6.153846153846155e-05, |
|
"loss": 1.0792, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03232883038910057, |
|
"grad_norm": 0.09739295977826301, |
|
"learning_rate": 6.461538461538462e-05, |
|
"loss": 1.0886, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.03386829850286726, |
|
"grad_norm": 0.12352134648547326, |
|
"learning_rate": 6.76923076923077e-05, |
|
"loss": 1.0846, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03540776661663395, |
|
"grad_norm": 0.09422623480770391, |
|
"learning_rate": 7.076923076923078e-05, |
|
"loss": 1.0892, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.036947234730400645, |
|
"grad_norm": 0.11191294618136459, |
|
"learning_rate": 7.384615384615386e-05, |
|
"loss": 1.0709, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03848670284416734, |
|
"grad_norm": 0.10765812088608237, |
|
"learning_rate": 7.692307692307693e-05, |
|
"loss": 1.084, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.04002617095793403, |
|
"grad_norm": 0.0922143630736113, |
|
"learning_rate": 8e-05, |
|
"loss": 1.1205, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04156563907170073, |
|
"grad_norm": 0.12839655709773654, |
|
"learning_rate": 8.307692307692309e-05, |
|
"loss": 1.0738, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.04310510718546742, |
|
"grad_norm": 0.11121867345439279, |
|
"learning_rate": 8.615384615384617e-05, |
|
"loss": 1.0821, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.044644575299234115, |
|
"grad_norm": 0.0928933843755614, |
|
"learning_rate": 8.923076923076924e-05, |
|
"loss": 1.0604, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.04618404341300081, |
|
"grad_norm": 0.09586858247024442, |
|
"learning_rate": 9.230769230769232e-05, |
|
"loss": 1.0644, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0477235115267675, |
|
"grad_norm": 0.10507553798424621, |
|
"learning_rate": 9.53846153846154e-05, |
|
"loss": 1.0617, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.049262979640534194, |
|
"grad_norm": 0.11036562639685599, |
|
"learning_rate": 9.846153846153848e-05, |
|
"loss": 1.0732, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.050802447754300886, |
|
"grad_norm": 0.11309124416557308, |
|
"learning_rate": 0.00010153846153846153, |
|
"loss": 1.0557, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.052341915868067586, |
|
"grad_norm": 0.15677878221994676, |
|
"learning_rate": 0.00010461538461538463, |
|
"loss": 1.0706, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05388138398183428, |
|
"grad_norm": 0.11661519431178777, |
|
"learning_rate": 0.0001076923076923077, |
|
"loss": 1.098, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.05542085209560097, |
|
"grad_norm": 0.12109397309650635, |
|
"learning_rate": 0.00011076923076923077, |
|
"loss": 1.0311, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.056960320209367664, |
|
"grad_norm": 0.11093871974267784, |
|
"learning_rate": 0.00011384615384615384, |
|
"loss": 1.0707, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.05849978832313436, |
|
"grad_norm": 0.11357908911113367, |
|
"learning_rate": 0.00011692307692307694, |
|
"loss": 1.0845, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.06003925643690105, |
|
"grad_norm": 0.12394460027791813, |
|
"learning_rate": 0.00012, |
|
"loss": 1.0495, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.06157872455066774, |
|
"grad_norm": 0.11194042323004598, |
|
"learning_rate": 0.0001230769230769231, |
|
"loss": 1.0614, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06157872455066774, |
|
"eval_loss": 1.0632128715515137, |
|
"eval_runtime": 3818.978, |
|
"eval_samples_per_second": 6.051, |
|
"eval_steps_per_second": 0.378, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06311819266443444, |
|
"grad_norm": 0.1033181877601895, |
|
"learning_rate": 0.00012615384615384615, |
|
"loss": 1.0433, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.06465766077820113, |
|
"grad_norm": 0.12063646484450094, |
|
"learning_rate": 0.00012923076923076923, |
|
"loss": 1.0714, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06619712889196783, |
|
"grad_norm": 0.11588361936604014, |
|
"learning_rate": 0.0001323076923076923, |
|
"loss": 1.057, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.06773659700573452, |
|
"grad_norm": 0.11616868785038624, |
|
"learning_rate": 0.0001353846153846154, |
|
"loss": 1.0553, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06927606511950121, |
|
"grad_norm": 0.09562740234655313, |
|
"learning_rate": 0.00013846153846153847, |
|
"loss": 1.0257, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.0708155332332679, |
|
"grad_norm": 0.09393900612118493, |
|
"learning_rate": 0.00014153846153846156, |
|
"loss": 1.0445, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.0723550013470346, |
|
"grad_norm": 0.10841095433316549, |
|
"learning_rate": 0.0001446153846153846, |
|
"loss": 1.0103, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.07389446946080129, |
|
"grad_norm": 0.10223643461806235, |
|
"learning_rate": 0.00014769230769230772, |
|
"loss": 1.0717, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07543393757456798, |
|
"grad_norm": 0.11204179743813408, |
|
"learning_rate": 0.00015076923076923077, |
|
"loss": 1.0724, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.07697340568833468, |
|
"grad_norm": 0.09200458344532117, |
|
"learning_rate": 0.00015384615384615385, |
|
"loss": 1.0636, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07851287380210137, |
|
"grad_norm": 0.11400449644867688, |
|
"learning_rate": 0.00015692307692307693, |
|
"loss": 0.9971, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.08005234191586806, |
|
"grad_norm": 0.11612263416102313, |
|
"learning_rate": 0.00016, |
|
"loss": 1.0694, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.08159181002963477, |
|
"grad_norm": 0.09082765987050115, |
|
"learning_rate": 0.0001630769230769231, |
|
"loss": 1.0467, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.08313127814340146, |
|
"grad_norm": 0.091051842252486, |
|
"learning_rate": 0.00016615384615384617, |
|
"loss": 1.0909, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.08467074625716815, |
|
"grad_norm": 0.09996524510558816, |
|
"learning_rate": 0.00016923076923076923, |
|
"loss": 1.038, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.08621021437093485, |
|
"grad_norm": 0.08559129966794987, |
|
"learning_rate": 0.00017230769230769234, |
|
"loss": 1.055, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.08774968248470154, |
|
"grad_norm": 0.09417658234399054, |
|
"learning_rate": 0.0001753846153846154, |
|
"loss": 1.0628, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.08928915059846823, |
|
"grad_norm": 0.09367818742833312, |
|
"learning_rate": 0.00017846153846153847, |
|
"loss": 1.0698, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.09082861871223492, |
|
"grad_norm": 0.091480252033389, |
|
"learning_rate": 0.00018153846153846155, |
|
"loss": 1.065, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.09236808682600162, |
|
"grad_norm": 0.0938796736597296, |
|
"learning_rate": 0.00018461538461538463, |
|
"loss": 1.0532, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09390755493976831, |
|
"grad_norm": 0.09576286670666531, |
|
"learning_rate": 0.0001876923076923077, |
|
"loss": 1.0496, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.095447023053535, |
|
"grad_norm": 0.09248254454681183, |
|
"learning_rate": 0.0001907692307692308, |
|
"loss": 1.0636, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.0969864911673017, |
|
"grad_norm": 0.08693274686688061, |
|
"learning_rate": 0.00019384615384615385, |
|
"loss": 1.0838, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.09852595928106839, |
|
"grad_norm": 0.07975001335214404, |
|
"learning_rate": 0.00019692307692307696, |
|
"loss": 1.037, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.10006542739483508, |
|
"grad_norm": 0.09900399369206245, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0476, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.10160489550860177, |
|
"grad_norm": 0.0883635278339313, |
|
"learning_rate": 0.00019999855506507185, |
|
"loss": 1.0765, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.10314436362236847, |
|
"grad_norm": 0.08877039836306448, |
|
"learning_rate": 0.00019999422030204418, |
|
"loss": 1.059, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.10468383173613517, |
|
"grad_norm": 0.07996176059015918, |
|
"learning_rate": 0.00019998699583618593, |
|
"loss": 1.0418, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.10622329984990186, |
|
"grad_norm": 0.0870003860102271, |
|
"learning_rate": 0.00019997688187627482, |
|
"loss": 1.0557, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.10776276796366856, |
|
"grad_norm": 0.09383600372135154, |
|
"learning_rate": 0.0001999638787145911, |
|
"loss": 1.0735, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.10930223607743525, |
|
"grad_norm": 0.09891188283344632, |
|
"learning_rate": 0.0001999479867269092, |
|
"loss": 1.0584, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.11084170419120194, |
|
"grad_norm": 0.12102740620306172, |
|
"learning_rate": 0.00019992920637248697, |
|
"loss": 1.0489, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.11238117230496864, |
|
"grad_norm": 0.08137547822372986, |
|
"learning_rate": 0.00019990753819405213, |
|
"loss": 1.0277, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.11392064041873533, |
|
"grad_norm": 0.08421354630258387, |
|
"learning_rate": 0.00019988298281778684, |
|
"loss": 1.062, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.11546010853250202, |
|
"grad_norm": 0.08071712029036497, |
|
"learning_rate": 0.00019985554095330955, |
|
"loss": 1.0215, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.11699957664626871, |
|
"grad_norm": 0.0928078752872806, |
|
"learning_rate": 0.0001998252133936544, |
|
"loss": 1.053, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1185390447600354, |
|
"grad_norm": 0.08562676553948985, |
|
"learning_rate": 0.00019979200101524845, |
|
"loss": 1.0701, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.1200785128738021, |
|
"grad_norm": 0.08403649770517699, |
|
"learning_rate": 0.00019975590477788613, |
|
"loss": 1.0514, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.12161798098756879, |
|
"grad_norm": 0.08183724425629081, |
|
"learning_rate": 0.0001997169257247018, |
|
"loss": 1.0095, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.12315744910133548, |
|
"grad_norm": 0.08039658119705519, |
|
"learning_rate": 0.00019967506498213931, |
|
"loss": 1.0689, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.12315744910133548, |
|
"eval_loss": 1.0475565195083618, |
|
"eval_runtime": 3802.3739, |
|
"eval_samples_per_second": 6.078, |
|
"eval_steps_per_second": 0.38, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.12469691721510218, |
|
"grad_norm": 0.08379276105126061, |
|
"learning_rate": 0.00019963032375991966, |
|
"loss": 1.0782, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.12623638532886888, |
|
"grad_norm": 0.07908083743105887, |
|
"learning_rate": 0.00019958270335100595, |
|
"loss": 1.0235, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.12777585344263556, |
|
"grad_norm": 0.08493671539476158, |
|
"learning_rate": 0.00019953220513156602, |
|
"loss": 1.0907, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.12931532155640227, |
|
"grad_norm": 0.08337456151040325, |
|
"learning_rate": 0.0001994788305609327, |
|
"loss": 1.0326, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.13085478967016895, |
|
"grad_norm": 0.08547228164895211, |
|
"learning_rate": 0.00019942258118156163, |
|
"loss": 1.0442, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.13239425778393565, |
|
"grad_norm": 0.0750158452168834, |
|
"learning_rate": 0.00019936345861898663, |
|
"loss": 1.0684, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.13393372589770233, |
|
"grad_norm": 0.07579154319260911, |
|
"learning_rate": 0.0001993014645817728, |
|
"loss": 1.0547, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.13547319401146904, |
|
"grad_norm": 0.07323307416728322, |
|
"learning_rate": 0.00019923660086146723, |
|
"loss": 1.0491, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.13701266212523572, |
|
"grad_norm": 0.08937561723880813, |
|
"learning_rate": 0.0001991688693325469, |
|
"loss": 1.0484, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.13855213023900242, |
|
"grad_norm": 0.08089791643667314, |
|
"learning_rate": 0.00019909827195236493, |
|
"loss": 1.0422, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.14009159835276913, |
|
"grad_norm": 0.07004371269562998, |
|
"learning_rate": 0.00019902481076109372, |
|
"loss": 1.0779, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.1416310664665358, |
|
"grad_norm": 0.07219709532645062, |
|
"learning_rate": 0.00019894848788166604, |
|
"loss": 1.0578, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.14317053458030252, |
|
"grad_norm": 0.08319824295039546, |
|
"learning_rate": 0.00019886930551971387, |
|
"loss": 1.0776, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.1447100026940692, |
|
"grad_norm": 0.08461977605731112, |
|
"learning_rate": 0.0001987872659635043, |
|
"loss": 1.0398, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.1462494708078359, |
|
"grad_norm": 0.07913296391073374, |
|
"learning_rate": 0.00019870237158387384, |
|
"loss": 1.0496, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.14778893892160258, |
|
"grad_norm": 0.07884194841699313, |
|
"learning_rate": 0.00019861462483415952, |
|
"loss": 1.0401, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.1493284070353693, |
|
"grad_norm": 0.08715608209779739, |
|
"learning_rate": 0.0001985240282501282, |
|
"loss": 1.0458, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.15086787514913597, |
|
"grad_norm": 0.07255994722623947, |
|
"learning_rate": 0.0001984305844499033, |
|
"loss": 1.023, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.15240734326290267, |
|
"grad_norm": 0.0769318406138446, |
|
"learning_rate": 0.00019833429613388902, |
|
"loss": 1.0476, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.15394681137666935, |
|
"grad_norm": 0.07401383185768683, |
|
"learning_rate": 0.0001982351660846924, |
|
"loss": 1.0415, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.15548627949043606, |
|
"grad_norm": 0.07649017525208568, |
|
"learning_rate": 0.00019813319716704278, |
|
"loss": 1.0427, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.15702574760420274, |
|
"grad_norm": 0.07708900519770054, |
|
"learning_rate": 0.00019802839232770921, |
|
"loss": 1.0855, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.15856521571796944, |
|
"grad_norm": 0.07439539455159229, |
|
"learning_rate": 0.00019792075459541518, |
|
"loss": 1.0224, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.16010468383173612, |
|
"grad_norm": 0.08182572188820655, |
|
"learning_rate": 0.00019781028708075102, |
|
"loss": 1.0302, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.16164415194550283, |
|
"grad_norm": 0.08161041915734439, |
|
"learning_rate": 0.00019769699297608417, |
|
"loss": 1.0245, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.16318362005926954, |
|
"grad_norm": 0.08888339173595923, |
|
"learning_rate": 0.00019758087555546682, |
|
"loss": 1.0747, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.16472308817303621, |
|
"grad_norm": 0.07764918511303621, |
|
"learning_rate": 0.0001974619381745413, |
|
"loss": 1.0696, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.16626255628680292, |
|
"grad_norm": 0.07999928337629646, |
|
"learning_rate": 0.00019734018427044307, |
|
"loss": 1.0351, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.1678020244005696, |
|
"grad_norm": 0.07656288262788609, |
|
"learning_rate": 0.0001972156173617016, |
|
"loss": 1.0399, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.1693414925143363, |
|
"grad_norm": 0.07902096762825829, |
|
"learning_rate": 0.00019708824104813837, |
|
"loss": 1.0397, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.17088096062810298, |
|
"grad_norm": 0.08100109284928467, |
|
"learning_rate": 0.00019695805901076308, |
|
"loss": 1.0661, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.1724204287418697, |
|
"grad_norm": 0.07617725821540045, |
|
"learning_rate": 0.00019682507501166718, |
|
"loss": 1.088, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.17395989685563637, |
|
"grad_norm": 0.0781772918354948, |
|
"learning_rate": 0.00019668929289391523, |
|
"loss": 1.0325, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.17549936496940308, |
|
"grad_norm": 0.07602400296386462, |
|
"learning_rate": 0.00019655071658143366, |
|
"loss": 1.0493, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.17703883308316976, |
|
"grad_norm": 0.07965301834511823, |
|
"learning_rate": 0.00019640935007889755, |
|
"loss": 1.0759, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.17857830119693646, |
|
"grad_norm": 0.07753220082647658, |
|
"learning_rate": 0.0001962651974716149, |
|
"loss": 1.0601, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.18011776931070314, |
|
"grad_norm": 0.07648415793765183, |
|
"learning_rate": 0.0001961182629254084, |
|
"loss": 1.0151, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.18165723742446985, |
|
"grad_norm": 0.08029992977054808, |
|
"learning_rate": 0.00019596855068649522, |
|
"loss": 1.0499, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.18319670553823653, |
|
"grad_norm": 0.08092320232434004, |
|
"learning_rate": 0.00019581606508136426, |
|
"loss": 1.0631, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.18473617365200323, |
|
"grad_norm": 0.0748434186581261, |
|
"learning_rate": 0.00019566081051665098, |
|
"loss": 1.0053, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.18473617365200323, |
|
"eval_loss": 1.0412589311599731, |
|
"eval_runtime": 3798.478, |
|
"eval_samples_per_second": 6.084, |
|
"eval_steps_per_second": 0.38, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.18627564176576994, |
|
"grad_norm": 0.07474772922675897, |
|
"learning_rate": 0.00019550279147901036, |
|
"loss": 1.0811, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.18781510987953662, |
|
"grad_norm": 0.0773035541962382, |
|
"learning_rate": 0.00019534201253498682, |
|
"loss": 1.0359, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.18935457799330332, |
|
"grad_norm": 0.08385957791440671, |
|
"learning_rate": 0.0001951784783308827, |
|
"loss": 0.9995, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.19089404610707, |
|
"grad_norm": 0.07841850327107855, |
|
"learning_rate": 0.0001950121935926236, |
|
"loss": 1.0417, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.1924335142208367, |
|
"grad_norm": 0.11766060709484247, |
|
"learning_rate": 0.00019484316312562205, |
|
"loss": 1.0227, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.1939729823346034, |
|
"grad_norm": 0.07110694761741441, |
|
"learning_rate": 0.00019467139181463862, |
|
"loss": 1.0652, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.1955124504483701, |
|
"grad_norm": 0.07470835721419704, |
|
"learning_rate": 0.00019449688462364056, |
|
"loss": 1.0299, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.19705191856213677, |
|
"grad_norm": 0.08102828325973369, |
|
"learning_rate": 0.00019431964659565867, |
|
"loss": 1.0488, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.19859138667590348, |
|
"grad_norm": 0.07603967351010721, |
|
"learning_rate": 0.0001941396828526412, |
|
"loss": 1.0459, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.20013085478967016, |
|
"grad_norm": 0.08273841396400562, |
|
"learning_rate": 0.00019395699859530623, |
|
"loss": 1.0194, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.20167032290343687, |
|
"grad_norm": 0.08236332870987446, |
|
"learning_rate": 0.00019377159910299093, |
|
"loss": 1.0307, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.20320979101720354, |
|
"grad_norm": 0.06867994321607887, |
|
"learning_rate": 0.00019358348973349943, |
|
"loss": 1.0098, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.20474925913097025, |
|
"grad_norm": 0.08144032256455716, |
|
"learning_rate": 0.00019339267592294763, |
|
"loss": 1.048, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.20628872724473693, |
|
"grad_norm": 0.07918355281230142, |
|
"learning_rate": 0.00019319916318560635, |
|
"loss": 1.0227, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.20782819535850364, |
|
"grad_norm": 0.07792694280227995, |
|
"learning_rate": 0.00019300295711374187, |
|
"loss": 1.039, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.20936766347227034, |
|
"grad_norm": 0.07841373321559497, |
|
"learning_rate": 0.00019280406337745428, |
|
"loss": 1.0185, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.21090713158603702, |
|
"grad_norm": 0.12957233088012476, |
|
"learning_rate": 0.00019260248772451377, |
|
"loss": 1.0496, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.21244659969980373, |
|
"grad_norm": 0.07758713390528212, |
|
"learning_rate": 0.0001923982359801943, |
|
"loss": 1.0425, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.2139860678135704, |
|
"grad_norm": 0.08451147321948667, |
|
"learning_rate": 0.00019219131404710552, |
|
"loss": 1.0749, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.21552553592733711, |
|
"grad_norm": 0.08218923027527074, |
|
"learning_rate": 0.00019198172790502196, |
|
"loss": 1.0244, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.2170650040411038, |
|
"grad_norm": 0.07475377041516394, |
|
"learning_rate": 0.0001917694836107104, |
|
"loss": 1.0367, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.2186044721548705, |
|
"grad_norm": 0.06989397102142611, |
|
"learning_rate": 0.00019155458729775467, |
|
"loss": 1.049, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.22014394026863718, |
|
"grad_norm": 0.07403450910939992, |
|
"learning_rate": 0.0001913370451763786, |
|
"loss": 1.0135, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.22168340838240388, |
|
"grad_norm": 0.06736867483748331, |
|
"learning_rate": 0.00019111686353326631, |
|
"loss": 1.0213, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.22322287649617056, |
|
"grad_norm": 0.07406189263799307, |
|
"learning_rate": 0.00019089404873138082, |
|
"loss": 1.0521, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.22476234460993727, |
|
"grad_norm": 0.07355438357203191, |
|
"learning_rate": 0.00019066860720977986, |
|
"loss": 1.0483, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.22630181272370395, |
|
"grad_norm": 0.07568463711454308, |
|
"learning_rate": 0.00019044054548343002, |
|
"loss": 1.0289, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.22784128083747066, |
|
"grad_norm": 0.07229067305689793, |
|
"learning_rate": 0.0001902098701430184, |
|
"loss": 1.0694, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.22938074895123733, |
|
"grad_norm": 0.07530804590739208, |
|
"learning_rate": 0.00018997658785476214, |
|
"loss": 1.0651, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.23092021706500404, |
|
"grad_norm": 0.07259570093477205, |
|
"learning_rate": 0.00018974070536021572, |
|
"loss": 1.0685, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.23245968517877075, |
|
"grad_norm": 0.06991198063848746, |
|
"learning_rate": 0.00018950222947607625, |
|
"loss": 1.0524, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.23399915329253743, |
|
"grad_norm": 0.07071964916232602, |
|
"learning_rate": 0.0001892611670939865, |
|
"loss": 0.9967, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.23553862140630413, |
|
"grad_norm": 0.08069984398117862, |
|
"learning_rate": 0.00018901752518033548, |
|
"loss": 1.0503, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.2370780895200708, |
|
"grad_norm": 0.0719126875966159, |
|
"learning_rate": 0.0001887713107760575, |
|
"loss": 1.0497, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.23861755763383752, |
|
"grad_norm": 0.07933165083127114, |
|
"learning_rate": 0.00018852253099642833, |
|
"loss": 1.0163, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.2401570257476042, |
|
"grad_norm": 0.07529876789807866, |
|
"learning_rate": 0.0001882711930308599, |
|
"loss": 1.0503, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.2416964938613709, |
|
"grad_norm": 0.074705285570636, |
|
"learning_rate": 0.00018801730414269225, |
|
"loss": 1.0424, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.24323596197513758, |
|
"grad_norm": 0.07414239254048278, |
|
"learning_rate": 0.0001877608716689839, |
|
"loss": 1.0655, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.2447754300889043, |
|
"grad_norm": 0.07941506265986978, |
|
"learning_rate": 0.00018750190302029956, |
|
"loss": 1.0193, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.24631489820267097, |
|
"grad_norm": 0.08230667165269098, |
|
"learning_rate": 0.00018724040568049612, |
|
"loss": 1.0446, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.24631489820267097, |
|
"eval_loss": 1.0366028547286987, |
|
"eval_runtime": 3798.2715, |
|
"eval_samples_per_second": 6.084, |
|
"eval_steps_per_second": 0.38, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.24785436631643767, |
|
"grad_norm": 0.08052061406166201, |
|
"learning_rate": 0.00018697638720650646, |
|
"loss": 1.0329, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.24939383443020435, |
|
"grad_norm": 0.07060612206330524, |
|
"learning_rate": 0.00018670985522812084, |
|
"loss": 1.0123, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.25093330254397106, |
|
"grad_norm": 0.07261155032686553, |
|
"learning_rate": 0.0001864408174477665, |
|
"loss": 1.0394, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.25247277065773777, |
|
"grad_norm": 0.07296759582556935, |
|
"learning_rate": 0.00018616928164028523, |
|
"loss": 1.0021, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.2540122387715045, |
|
"grad_norm": 0.06646733390910516, |
|
"learning_rate": 0.00018589525565270844, |
|
"loss": 1.0286, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.2555517068852711, |
|
"grad_norm": 0.07496596424661404, |
|
"learning_rate": 0.0001856187474040306, |
|
"loss": 1.0502, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.25709117499903783, |
|
"grad_norm": 0.08500360217319118, |
|
"learning_rate": 0.00018533976488498016, |
|
"loss": 1.0256, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.25863064311280454, |
|
"grad_norm": 0.07817756873072405, |
|
"learning_rate": 0.0001850583161577889, |
|
"loss": 1.0609, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.26017011122657124, |
|
"grad_norm": 0.07136612848707545, |
|
"learning_rate": 0.00018477440935595873, |
|
"loss": 1.0775, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.2617095793403379, |
|
"grad_norm": 0.07292608365481835, |
|
"learning_rate": 0.00018448805268402672, |
|
"loss": 1.058, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.2632490474541046, |
|
"grad_norm": 0.07716711803643432, |
|
"learning_rate": 0.00018419925441732804, |
|
"loss": 1.0294, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.2647885155678713, |
|
"grad_norm": 0.07526261921660161, |
|
"learning_rate": 0.00018390802290175673, |
|
"loss": 1.0467, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.266327983681638, |
|
"grad_norm": 0.0735157839737638, |
|
"learning_rate": 0.00018361436655352456, |
|
"loss": 1.0278, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.26786745179540467, |
|
"grad_norm": 0.07101822956411033, |
|
"learning_rate": 0.00018331829385891783, |
|
"loss": 1.0188, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.26940691990917137, |
|
"grad_norm": 0.07406443039738211, |
|
"learning_rate": 0.00018301981337405212, |
|
"loss": 1.0476, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.2709463880229381, |
|
"grad_norm": 0.07470379094242477, |
|
"learning_rate": 0.00018271893372462497, |
|
"loss": 1.0468, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.2724858561367048, |
|
"grad_norm": 0.07458412123750419, |
|
"learning_rate": 0.00018241566360566665, |
|
"loss": 1.0279, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.27402532425047144, |
|
"grad_norm": 0.08164107594170099, |
|
"learning_rate": 0.00018211001178128892, |
|
"loss": 1.0472, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.27556479236423814, |
|
"grad_norm": 0.07748097167228449, |
|
"learning_rate": 0.00018180198708443173, |
|
"loss": 1.0534, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.27710426047800485, |
|
"grad_norm": 0.07485972229218758, |
|
"learning_rate": 0.00018149159841660795, |
|
"loss": 1.0419, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.27864372859177156, |
|
"grad_norm": 0.07553124022662376, |
|
"learning_rate": 0.00018117885474764613, |
|
"loss": 1.0836, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.28018319670553826, |
|
"grad_norm": 0.07966215645919128, |
|
"learning_rate": 0.00018086376511543126, |
|
"loss": 1.0642, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.2817226648193049, |
|
"grad_norm": 0.08376456009997757, |
|
"learning_rate": 0.00018054633862564368, |
|
"loss": 1.0398, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.2832621329330716, |
|
"grad_norm": 0.075508959246266, |
|
"learning_rate": 0.0001802265844514958, |
|
"loss": 0.9996, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.2848016010468383, |
|
"grad_norm": 0.07358158800850821, |
|
"learning_rate": 0.0001799045118334671, |
|
"loss": 1.0542, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.28634106916060503, |
|
"grad_norm": 0.08094264187967125, |
|
"learning_rate": 0.00017958013007903713, |
|
"loss": 1.0563, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.2878805372743717, |
|
"grad_norm": 0.07424176124118159, |
|
"learning_rate": 0.0001792534485624164, |
|
"loss": 1.0405, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.2894200053881384, |
|
"grad_norm": 0.07418414794842867, |
|
"learning_rate": 0.00017892447672427563, |
|
"loss": 1.0391, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.2909594735019051, |
|
"grad_norm": 0.0704593007549167, |
|
"learning_rate": 0.00017859322407147272, |
|
"loss": 1.0543, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.2924989416156718, |
|
"grad_norm": 0.07058098730245323, |
|
"learning_rate": 0.00017825970017677832, |
|
"loss": 1.0693, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.29403840972943845, |
|
"grad_norm": 0.07100077379863531, |
|
"learning_rate": 0.00017792391467859886, |
|
"loss": 1.0157, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.29557787784320516, |
|
"grad_norm": 0.07264965656385536, |
|
"learning_rate": 0.0001775858772806983, |
|
"loss": 1.0669, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.29711734595697187, |
|
"grad_norm": 0.06945646756969821, |
|
"learning_rate": 0.00017724559775191744, |
|
"loss": 1.0282, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.2986568140707386, |
|
"grad_norm": 0.0800750187488917, |
|
"learning_rate": 0.00017690308592589182, |
|
"loss": 1.0424, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.3001962821845053, |
|
"grad_norm": 0.07826578070698212, |
|
"learning_rate": 0.0001765583517007675, |
|
"loss": 0.994, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.30173575029827193, |
|
"grad_norm": 0.07185624380063993, |
|
"learning_rate": 0.00017621140503891488, |
|
"loss": 1.0117, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.30327521841203864, |
|
"grad_norm": 0.07770724836361542, |
|
"learning_rate": 0.00017586225596664102, |
|
"loss": 1.0282, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.30481468652580535, |
|
"grad_norm": 0.07425549788358596, |
|
"learning_rate": 0.00017551091457389966, |
|
"loss": 1.0332, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.30635415463957205, |
|
"grad_norm": 0.07157671192234144, |
|
"learning_rate": 0.00017515739101399983, |
|
"loss": 1.0202, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.3078936227533387, |
|
"grad_norm": 0.07195148099166214, |
|
"learning_rate": 0.00017480169550331231, |
|
"loss": 1.0091, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3078936227533387, |
|
"eval_loss": 1.033624291419983, |
|
"eval_runtime": 3799.3073, |
|
"eval_samples_per_second": 6.082, |
|
"eval_steps_per_second": 0.38, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3094330908671054, |
|
"grad_norm": 0.0709967222808181, |
|
"learning_rate": 0.00017444383832097442, |
|
"loss": 1.0306, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.3109725589808721, |
|
"grad_norm": 0.08017250953363526, |
|
"learning_rate": 0.00017408382980859305, |
|
"loss": 1.0335, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.3125120270946388, |
|
"grad_norm": 0.0763005407159528, |
|
"learning_rate": 0.00017372168036994566, |
|
"loss": 1.0155, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.3140514952084055, |
|
"grad_norm": 0.068090767981409, |
|
"learning_rate": 0.00017335740047067972, |
|
"loss": 1.0226, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.3155909633221722, |
|
"grad_norm": 0.07053765308848822, |
|
"learning_rate": 0.0001729910006380102, |
|
"loss": 1.0455, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.3171304314359389, |
|
"grad_norm": 0.07639366775520491, |
|
"learning_rate": 0.00017262249146041546, |
|
"loss": 1.0737, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.3186698995497056, |
|
"grad_norm": 0.07414091472835294, |
|
"learning_rate": 0.00017225188358733107, |
|
"loss": 1.0159, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.32020936766347224, |
|
"grad_norm": 0.07840200264036183, |
|
"learning_rate": 0.00017187918772884232, |
|
"loss": 1.0605, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.32174883577723895, |
|
"grad_norm": 0.06946548404139283, |
|
"learning_rate": 0.00017150441465537447, |
|
"loss": 1.0549, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.32328830389100566, |
|
"grad_norm": 0.0726329779508538, |
|
"learning_rate": 0.00017112757519738154, |
|
"loss": 1.0294, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.32482777200477236, |
|
"grad_norm": 0.07366641465053547, |
|
"learning_rate": 0.0001707486802450335, |
|
"loss": 1.0439, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.32636724011853907, |
|
"grad_norm": 0.07461023494546891, |
|
"learning_rate": 0.00017036774074790132, |
|
"loss": 1.0036, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.3279067082323057, |
|
"grad_norm": 0.07745841056330656, |
|
"learning_rate": 0.00016998476771464072, |
|
"loss": 1.039, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.32944617634607243, |
|
"grad_norm": 0.07562279638819498, |
|
"learning_rate": 0.00016959977221267392, |
|
"loss": 1.0136, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.33098564445983913, |
|
"grad_norm": 0.07269409212200949, |
|
"learning_rate": 0.0001692127653678699, |
|
"loss": 1.0447, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.33252511257360584, |
|
"grad_norm": 0.07863977410900856, |
|
"learning_rate": 0.00016882375836422284, |
|
"loss": 1.032, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.3340645806873725, |
|
"grad_norm": 0.08154682576838618, |
|
"learning_rate": 0.00016843276244352885, |
|
"loss": 1.0576, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.3356040488011392, |
|
"grad_norm": 0.07324914224304953, |
|
"learning_rate": 0.00016803978890506113, |
|
"loss": 1.0677, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.3371435169149059, |
|
"grad_norm": 0.08330706239189462, |
|
"learning_rate": 0.00016764484910524358, |
|
"loss": 1.0244, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.3386829850286726, |
|
"grad_norm": 0.07527643648007623, |
|
"learning_rate": 0.00016724795445732243, |
|
"loss": 0.9977, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.34022245314243926, |
|
"grad_norm": 0.07895912028160554, |
|
"learning_rate": 0.00016684911643103642, |
|
"loss": 1.0575, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.34176192125620597, |
|
"grad_norm": 0.073939133015858, |
|
"learning_rate": 0.0001664483465522855, |
|
"loss": 1.0337, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.3433013893699727, |
|
"grad_norm": 0.07648599682491888, |
|
"learning_rate": 0.00016604565640279754, |
|
"loss": 1.0462, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.3448408574837394, |
|
"grad_norm": 0.07375239907970622, |
|
"learning_rate": 0.0001656410576197938, |
|
"loss": 1.0537, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.3463803255975061, |
|
"grad_norm": 0.07218952828382294, |
|
"learning_rate": 0.0001652345618956526, |
|
"loss": 1.0702, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.34791979371127274, |
|
"grad_norm": 0.07501734343767677, |
|
"learning_rate": 0.00016482618097757122, |
|
"loss": 1.045, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.34945926182503945, |
|
"grad_norm": 0.07478505250167114, |
|
"learning_rate": 0.00016441592666722684, |
|
"loss": 1.0356, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.35099872993880615, |
|
"grad_norm": 0.07035501241737965, |
|
"learning_rate": 0.00016400381082043507, |
|
"loss": 1.0819, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.35253819805257286, |
|
"grad_norm": 0.07713003380587562, |
|
"learning_rate": 0.00016358984534680748, |
|
"loss": 1.0494, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.3540776661663395, |
|
"grad_norm": 0.07012091124270008, |
|
"learning_rate": 0.00016317404220940758, |
|
"loss": 1.022, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.3556171342801062, |
|
"grad_norm": 0.06697708347109951, |
|
"learning_rate": 0.00016275641342440483, |
|
"loss": 1.0589, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.3571566023938729, |
|
"grad_norm": 0.07573896521834783, |
|
"learning_rate": 0.0001623369710607277, |
|
"loss": 1.0044, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.35869607050763963, |
|
"grad_norm": 0.06946529088193742, |
|
"learning_rate": 0.00016191572723971455, |
|
"loss": 1.0652, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.3602355386214063, |
|
"grad_norm": 0.0727340465476027, |
|
"learning_rate": 0.00016149269413476353, |
|
"loss": 1.0057, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.361775006735173, |
|
"grad_norm": 0.08017843239666048, |
|
"learning_rate": 0.00016106788397098095, |
|
"loss": 0.9942, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.3633144748489397, |
|
"grad_norm": 0.06899110462149576, |
|
"learning_rate": 0.0001606413090248276, |
|
"loss": 0.9958, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.3648539429627064, |
|
"grad_norm": 0.07737025459856088, |
|
"learning_rate": 0.00016021298162376428, |
|
"loss": 1.0211, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.36639341107647305, |
|
"grad_norm": 0.07807131065906221, |
|
"learning_rate": 0.00015978291414589542, |
|
"loss": 1.039, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.36793287919023976, |
|
"grad_norm": 0.07059155235596021, |
|
"learning_rate": 0.0001593511190196115, |
|
"loss": 1.0513, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.36947234730400647, |
|
"grad_norm": 0.07422722970665956, |
|
"learning_rate": 0.00015891760872322963, |
|
"loss": 1.0093, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.36947234730400647, |
|
"eval_loss": 1.0309594869613647, |
|
"eval_runtime": 3796.5579, |
|
"eval_samples_per_second": 6.087, |
|
"eval_steps_per_second": 0.381, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.37101181541777317, |
|
"grad_norm": 0.06806084338199529, |
|
"learning_rate": 0.00015848239578463325, |
|
"loss": 1.0504, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.3725512835315399, |
|
"grad_norm": 0.07638211255486586, |
|
"learning_rate": 0.00015804549278090982, |
|
"loss": 1.0145, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.37409075164530653, |
|
"grad_norm": 0.07232165039483601, |
|
"learning_rate": 0.00015760691233798757, |
|
"loss": 1.011, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.37563021975907324, |
|
"grad_norm": 0.0725477342684882, |
|
"learning_rate": 0.00015716666713027055, |
|
"loss": 1.0338, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.37716968787283994, |
|
"grad_norm": 0.08448404468374969, |
|
"learning_rate": 0.00015672476988027228, |
|
"loss": 1.0388, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.37870915598660665, |
|
"grad_norm": 0.08451055602238913, |
|
"learning_rate": 0.0001562812333582482, |
|
"loss": 1.0041, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.3802486241003733, |
|
"grad_norm": 0.07357435195090126, |
|
"learning_rate": 0.00015583607038182655, |
|
"loss": 1.0286, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.38178809221414, |
|
"grad_norm": 0.07981414373807207, |
|
"learning_rate": 0.000155389293815638, |
|
"loss": 1.0293, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.3833275603279067, |
|
"grad_norm": 0.074241776686085, |
|
"learning_rate": 0.00015494091657094385, |
|
"loss": 1.033, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.3848670284416734, |
|
"grad_norm": 0.07517760872068341, |
|
"learning_rate": 0.00015449095160526292, |
|
"loss": 1.0559, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.38640649655544007, |
|
"grad_norm": 0.07476423646372729, |
|
"learning_rate": 0.00015403941192199718, |
|
"loss": 1.0343, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.3879459646692068, |
|
"grad_norm": 0.07214431115898451, |
|
"learning_rate": 0.0001535863105700558, |
|
"loss": 1.0467, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.3894854327829735, |
|
"grad_norm": 0.07644175139453621, |
|
"learning_rate": 0.00015313166064347814, |
|
"loss": 1.0188, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.3910249008967402, |
|
"grad_norm": 0.06984113294468933, |
|
"learning_rate": 0.00015267547528105538, |
|
"loss": 1.0341, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.3925643690105069, |
|
"grad_norm": 0.06690863213166448, |
|
"learning_rate": 0.0001522177676659508, |
|
"loss": 1.0625, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.39410383712427355, |
|
"grad_norm": 0.06950059946334636, |
|
"learning_rate": 0.00015175855102531887, |
|
"loss": 1.0123, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.39564330523804025, |
|
"grad_norm": 0.06964516306819979, |
|
"learning_rate": 0.00015129783862992283, |
|
"loss": 1.0201, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.39718277335180696, |
|
"grad_norm": 0.080130428292661, |
|
"learning_rate": 0.0001508356437937512, |
|
"loss": 1.0448, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.39872224146557367, |
|
"grad_norm": 0.07373600496196321, |
|
"learning_rate": 0.00015037197987363338, |
|
"loss": 1.0272, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.4002617095793403, |
|
"grad_norm": 0.07444079143838864, |
|
"learning_rate": 0.0001499068602688532, |
|
"loss": 1.0625, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.401801177693107, |
|
"grad_norm": 0.07495218470810172, |
|
"learning_rate": 0.00014944029842076185, |
|
"loss": 1.0277, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.40334064580687373, |
|
"grad_norm": 0.07148138370454796, |
|
"learning_rate": 0.0001489723078123896, |
|
"loss": 1.0393, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.40488011392064044, |
|
"grad_norm": 0.07184439296288066, |
|
"learning_rate": 0.00014850290196805594, |
|
"loss": 1.0413, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.4064195820344071, |
|
"grad_norm": 0.07052670997188848, |
|
"learning_rate": 0.00014803209445297887, |
|
"loss": 1.0056, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.4079590501481738, |
|
"grad_norm": 0.07344695167875763, |
|
"learning_rate": 0.00014755989887288285, |
|
"loss": 1.0411, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.4094985182619405, |
|
"grad_norm": 0.0769706853052285, |
|
"learning_rate": 0.00014708632887360564, |
|
"loss": 1.0387, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.4110379863757072, |
|
"grad_norm": 0.0739404718972198, |
|
"learning_rate": 0.0001466113981407039, |
|
"loss": 1.0452, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.41257745448947386, |
|
"grad_norm": 0.08213823024505344, |
|
"learning_rate": 0.00014613512039905765, |
|
"loss": 1.0339, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.41411692260324057, |
|
"grad_norm": 0.07328616522330499, |
|
"learning_rate": 0.00014565750941247386, |
|
"loss": 1.0133, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.4156563907170073, |
|
"grad_norm": 0.07420350570178859, |
|
"learning_rate": 0.0001451785789832884, |
|
"loss": 1.0186, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.417195858830774, |
|
"grad_norm": 0.07745166588825841, |
|
"learning_rate": 0.00014469834295196743, |
|
"loss": 1.0498, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.4187353269445407, |
|
"grad_norm": 0.07355335272924506, |
|
"learning_rate": 0.00014421681519670722, |
|
"loss": 1.0435, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.42027479505830734, |
|
"grad_norm": 0.07280563497532423, |
|
"learning_rate": 0.0001437340096330332, |
|
"loss": 1.0503, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.42181426317207404, |
|
"grad_norm": 0.08984865578017236, |
|
"learning_rate": 0.0001432499402133979, |
|
"loss": 1.0373, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.42335373128584075, |
|
"grad_norm": 0.07403634652711334, |
|
"learning_rate": 0.0001427646209267775, |
|
"loss": 1.0296, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.42489319939960746, |
|
"grad_norm": 0.06941364422871579, |
|
"learning_rate": 0.00014227806579826774, |
|
"loss": 1.0097, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.4264326675133741, |
|
"grad_norm": 0.07802992325990128, |
|
"learning_rate": 0.00014179028888867867, |
|
"loss": 1.0745, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.4279721356271408, |
|
"grad_norm": 0.08282848213775869, |
|
"learning_rate": 0.00014130130429412815, |
|
"loss": 1.0273, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.4295116037409075, |
|
"grad_norm": 0.08153105885218227, |
|
"learning_rate": 0.0001408111261456346, |
|
"loss": 1.0099, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.43105107185467423, |
|
"grad_norm": 0.0676196363646441, |
|
"learning_rate": 0.00014031976860870855, |
|
"loss": 1.0086, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.43105107185467423, |
|
"eval_loss": 1.0290647745132446, |
|
"eval_runtime": 3812.7057, |
|
"eval_samples_per_second": 6.061, |
|
"eval_steps_per_second": 0.379, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.4325905399684409, |
|
"grad_norm": 0.07464451118448104, |
|
"learning_rate": 0.00013982724588294335, |
|
"loss": 1.0198, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.4341300080822076, |
|
"grad_norm": 0.07512676303409935, |
|
"learning_rate": 0.00013933357220160476, |
|
"loss": 1.0591, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.4356694761959743, |
|
"grad_norm": 0.06938767156943278, |
|
"learning_rate": 0.00013883876183121973, |
|
"loss": 1.0523, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.437208944309741, |
|
"grad_norm": 0.0777955845768148, |
|
"learning_rate": 0.000138342829071164, |
|
"loss": 1.0436, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.4387484124235077, |
|
"grad_norm": 0.076768877519892, |
|
"learning_rate": 0.00013784578825324885, |
|
"loss": 1.0098, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.44028788053727436, |
|
"grad_norm": 0.07138945295655025, |
|
"learning_rate": 0.00013734765374130717, |
|
"loss": 1.0262, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.44182734865104106, |
|
"grad_norm": 0.07543031342117723, |
|
"learning_rate": 0.00013684843993077788, |
|
"loss": 1.0124, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.44336681676480777, |
|
"grad_norm": 0.07202890380872994, |
|
"learning_rate": 0.00013634816124829063, |
|
"loss": 1.0183, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.4449062848785745, |
|
"grad_norm": 0.06782846978809214, |
|
"learning_rate": 0.0001358468321512481, |
|
"loss": 1.0552, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.4464457529923411, |
|
"grad_norm": 0.07176237407539487, |
|
"learning_rate": 0.00013534446712740877, |
|
"loss": 1.025, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.44798522110610783, |
|
"grad_norm": 0.07601715009456655, |
|
"learning_rate": 0.0001348410806944681, |
|
"loss": 1.0153, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.44952468921987454, |
|
"grad_norm": 0.0761359028970367, |
|
"learning_rate": 0.00013433668739963882, |
|
"loss": 1.0244, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.45106415733364125, |
|
"grad_norm": 0.07165181931814346, |
|
"learning_rate": 0.00013383130181923071, |
|
"loss": 1.0237, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.4526036254474079, |
|
"grad_norm": 0.07219427827224394, |
|
"learning_rate": 0.00013332493855822936, |
|
"loss": 1.0064, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.4541430935611746, |
|
"grad_norm": 0.07315240256522645, |
|
"learning_rate": 0.00013281761224987398, |
|
"loss": 1.0049, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.4556825616749413, |
|
"grad_norm": 0.07283831171004836, |
|
"learning_rate": 0.00013230933755523466, |
|
"loss": 1.028, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.457222029788708, |
|
"grad_norm": 0.08277958377037488, |
|
"learning_rate": 0.00013180012916278854, |
|
"loss": 1.0402, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.45876149790247467, |
|
"grad_norm": 0.0732834274712129, |
|
"learning_rate": 0.00013129000178799548, |
|
"loss": 1.0366, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.4603009660162414, |
|
"grad_norm": 0.07270925231246442, |
|
"learning_rate": 0.00013077897017287272, |
|
"loss": 1.0006, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.4618404341300081, |
|
"grad_norm": 0.07601545515982518, |
|
"learning_rate": 0.00013026704908556888, |
|
"loss": 1.0555, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4633799022437748, |
|
"grad_norm": 0.0775749000511019, |
|
"learning_rate": 0.0001297542533199371, |
|
"loss": 1.0409, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.4649193703575415, |
|
"grad_norm": 0.07378648062711159, |
|
"learning_rate": 0.00012924059769510768, |
|
"loss": 1.0314, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.46645883847130815, |
|
"grad_norm": 0.07321573368492998, |
|
"learning_rate": 0.00012872609705505964, |
|
"loss": 1.0502, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.46799830658507485, |
|
"grad_norm": 0.07930733821420928, |
|
"learning_rate": 0.00012821076626819196, |
|
"loss": 1.0414, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.46953777469884156, |
|
"grad_norm": 0.07511260278964532, |
|
"learning_rate": 0.00012769462022689363, |
|
"loss": 1.0205, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.47107724281260827, |
|
"grad_norm": 0.06779778370699593, |
|
"learning_rate": 0.0001271776738471136, |
|
"loss": 1.0274, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.4726167109263749, |
|
"grad_norm": 0.07717323700425802, |
|
"learning_rate": 0.00012665994206792938, |
|
"loss": 1.0589, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.4741561790401416, |
|
"grad_norm": 0.06842508429114769, |
|
"learning_rate": 0.00012614143985111565, |
|
"loss": 1.0987, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.47569564715390833, |
|
"grad_norm": 0.07643849238067586, |
|
"learning_rate": 0.00012562218218071164, |
|
"loss": 1.0218, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.47723511526767504, |
|
"grad_norm": 0.07406016562514833, |
|
"learning_rate": 0.0001251021840625883, |
|
"loss": 1.0182, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.4787745833814417, |
|
"grad_norm": 0.07954321360596633, |
|
"learning_rate": 0.00012458146052401442, |
|
"loss": 1.0283, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.4803140514952084, |
|
"grad_norm": 0.07374926807557698, |
|
"learning_rate": 0.00012406002661322264, |
|
"loss": 1.0165, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.4818535196089751, |
|
"grad_norm": 0.07376676091481264, |
|
"learning_rate": 0.00012353789739897437, |
|
"loss": 1.0503, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.4833929877227418, |
|
"grad_norm": 0.07439474348790363, |
|
"learning_rate": 0.00012301508797012432, |
|
"loss": 1.0292, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.4849324558365085, |
|
"grad_norm": 0.07661999249880341, |
|
"learning_rate": 0.00012249161343518466, |
|
"loss": 1.0111, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.48647192395027516, |
|
"grad_norm": 0.07208564187421422, |
|
"learning_rate": 0.00012196748892188816, |
|
"loss": 1.0441, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.48801139206404187, |
|
"grad_norm": 0.07849547483606649, |
|
"learning_rate": 0.00012144272957675108, |
|
"loss": 1.0235, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.4895508601778086, |
|
"grad_norm": 0.07505211623162304, |
|
"learning_rate": 0.00012091735056463562, |
|
"loss": 1.0032, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.4910903282915753, |
|
"grad_norm": 0.08312558481401704, |
|
"learning_rate": 0.00012039136706831145, |
|
"loss": 1.059, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.49262979640534194, |
|
"grad_norm": 0.07454335234650318, |
|
"learning_rate": 0.00011986479428801709, |
|
"loss": 1.0362, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.49262979640534194, |
|
"eval_loss": 1.0269535779953003, |
|
"eval_runtime": 3800.036, |
|
"eval_samples_per_second": 6.081, |
|
"eval_steps_per_second": 0.38, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.49416926451910864, |
|
"grad_norm": 0.07456362557265384, |
|
"learning_rate": 0.00011933764744102058, |
|
"loss": 1.016, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.49570873263287535, |
|
"grad_norm": 0.08008662438079932, |
|
"learning_rate": 0.00011880994176117976, |
|
"loss": 1.0392, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.49724820074664206, |
|
"grad_norm": 0.07176749751196013, |
|
"learning_rate": 0.00011828169249850201, |
|
"loss": 1.0392, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.4987876688604087, |
|
"grad_norm": 0.07655608798136061, |
|
"learning_rate": 0.00011775291491870351, |
|
"loss": 1.0212, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.5003271369741754, |
|
"grad_norm": 0.07786227626103659, |
|
"learning_rate": 0.00011722362430276816, |
|
"loss": 1.03, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.5018666050879421, |
|
"grad_norm": 0.07799113973393568, |
|
"learning_rate": 0.00011669383594650593, |
|
"loss": 1.0589, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.5034060732017088, |
|
"grad_norm": 0.06547994125184468, |
|
"learning_rate": 0.00011616356516011083, |
|
"loss": 1.0084, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.5049455413154755, |
|
"grad_norm": 0.07784862670275924, |
|
"learning_rate": 0.00011563282726771847, |
|
"loss": 1.0449, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.5064850094292422, |
|
"grad_norm": 0.0771399540024009, |
|
"learning_rate": 0.0001151016376069632, |
|
"loss": 1.0634, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.508024477543009, |
|
"grad_norm": 0.07334720494239291, |
|
"learning_rate": 0.00011457001152853493, |
|
"loss": 1.0142, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.5095639456567755, |
|
"grad_norm": 0.07439128068501075, |
|
"learning_rate": 0.00011403796439573544, |
|
"loss": 1.0309, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.5111034137705422, |
|
"grad_norm": 0.0708288260968639, |
|
"learning_rate": 0.00011350551158403442, |
|
"loss": 1.0531, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.512642881884309, |
|
"grad_norm": 0.06763171945470464, |
|
"learning_rate": 0.0001129726684806252, |
|
"loss": 1.0086, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.5141823499980757, |
|
"grad_norm": 0.07768921401375369, |
|
"learning_rate": 0.00011243945048398003, |
|
"loss": 1.0148, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.5157218181118424, |
|
"grad_norm": 0.06896327791840266, |
|
"learning_rate": 0.000111905873003405, |
|
"loss": 1.0261, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.5172612862256091, |
|
"grad_norm": 0.07842199537412599, |
|
"learning_rate": 0.00011137195145859494, |
|
"loss": 0.999, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.5188007543393758, |
|
"grad_norm": 0.06865343546929636, |
|
"learning_rate": 0.00011083770127918762, |
|
"loss": 0.9982, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.5203402224531425, |
|
"grad_norm": 0.08103281697737574, |
|
"learning_rate": 0.00011030313790431788, |
|
"loss": 1.042, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.5218796905669091, |
|
"grad_norm": 0.07974961051333619, |
|
"learning_rate": 0.00010976827678217161, |
|
"loss": 1.0039, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.5234191586806758, |
|
"grad_norm": 0.06625737159764002, |
|
"learning_rate": 0.00010923313336953913, |
|
"loss": 1.0115, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.5249586267944425, |
|
"grad_norm": 0.07000163587644152, |
|
"learning_rate": 0.00010869772313136861, |
|
"loss": 1.0223, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.5264980949082092, |
|
"grad_norm": 0.06839426065828255, |
|
"learning_rate": 0.00010816206154031916, |
|
"loss": 1.0088, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.5280375630219759, |
|
"grad_norm": 0.07949491269796151, |
|
"learning_rate": 0.00010762616407631356, |
|
"loss": 1.071, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.5295770311357426, |
|
"grad_norm": 0.07557511886462906, |
|
"learning_rate": 0.00010709004622609116, |
|
"loss": 1.0676, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.5311164992495093, |
|
"grad_norm": 0.08195884945191133, |
|
"learning_rate": 0.00010655372348276006, |
|
"loss": 1.0198, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.532655967363276, |
|
"grad_norm": 0.0781242359342465, |
|
"learning_rate": 0.00010601721134534959, |
|
"loss": 1.0314, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.5341954354770427, |
|
"grad_norm": 0.07628144377233338, |
|
"learning_rate": 0.00010548052531836223, |
|
"loss": 1.0299, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.5357349035908093, |
|
"grad_norm": 0.07983920659956817, |
|
"learning_rate": 0.00010494368091132576, |
|
"loss": 1.0317, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.537274371704576, |
|
"grad_norm": 0.07418551402340584, |
|
"learning_rate": 0.00010440669363834483, |
|
"loss": 1.0129, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.5388138398183427, |
|
"grad_norm": 0.07002417164492032, |
|
"learning_rate": 0.00010386957901765277, |
|
"loss": 1.0278, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.5403533079321095, |
|
"grad_norm": 0.0707377171946109, |
|
"learning_rate": 0.00010333235257116313, |
|
"loss": 0.9727, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.5418927760458762, |
|
"grad_norm": 0.0737915692626489, |
|
"learning_rate": 0.00010279502982402103, |
|
"loss": 1.0433, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.5434322441596429, |
|
"grad_norm": 0.07512990163556856, |
|
"learning_rate": 0.00010225762630415457, |
|
"loss": 1.0111, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.5449717122734096, |
|
"grad_norm": 0.0753662165245646, |
|
"learning_rate": 0.00010172015754182607, |
|
"loss": 1.037, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.5465111803871763, |
|
"grad_norm": 0.1349186814580228, |
|
"learning_rate": 0.00010118263906918331, |
|
"loss": 1.0381, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.5480506485009429, |
|
"grad_norm": 0.07557478098317172, |
|
"learning_rate": 0.00010064508641981054, |
|
"loss": 0.9955, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.5495901166147096, |
|
"grad_norm": 0.07668998832423247, |
|
"learning_rate": 0.0001001075151282798, |
|
"loss": 1.051, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.5511295847284763, |
|
"grad_norm": 0.07585620860956059, |
|
"learning_rate": 9.956994072970179e-05, |
|
"loss": 1.0272, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.552669052842243, |
|
"grad_norm": 0.07008056728318604, |
|
"learning_rate": 9.903237875927698e-05, |
|
"loss": 1.0653, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.5542085209560097, |
|
"grad_norm": 0.07151388140558161, |
|
"learning_rate": 9.849484475184672e-05, |
|
"loss": 1.0155, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.5542085209560097, |
|
"eval_loss": 1.0255825519561768, |
|
"eval_runtime": 3798.3842, |
|
"eval_samples_per_second": 6.084, |
|
"eval_steps_per_second": 0.38, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.5557479890697764, |
|
"grad_norm": 0.08081651371496165, |
|
"learning_rate": 9.795735424144428e-05, |
|
"loss": 1.0102, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.5572874571835431, |
|
"grad_norm": 0.11914760104172627, |
|
"learning_rate": 9.74199227608459e-05, |
|
"loss": 1.0316, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.5588269252973098, |
|
"grad_norm": 0.07607697309485299, |
|
"learning_rate": 9.688256584112192e-05, |
|
"loss": 1.0158, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.5603663934110765, |
|
"grad_norm": 0.07962971403841683, |
|
"learning_rate": 9.634529901118799e-05, |
|
"loss": 1.0243, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.5619058615248431, |
|
"grad_norm": 0.0715309251551143, |
|
"learning_rate": 9.580813779735624e-05, |
|
"loss": 1.0354, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.5634453296386098, |
|
"grad_norm": 0.0752110141233035, |
|
"learning_rate": 9.52710977228867e-05, |
|
"loss": 1.0291, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.5649847977523765, |
|
"grad_norm": 0.07768363081585787, |
|
"learning_rate": 9.473419430753864e-05, |
|
"loss": 0.9735, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.5665242658661432, |
|
"grad_norm": 0.07642437297948991, |
|
"learning_rate": 9.419744306712197e-05, |
|
"loss": 1.0035, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.56806373397991, |
|
"grad_norm": 0.0725535618760288, |
|
"learning_rate": 9.3660859513049e-05, |
|
"loss": 1.0624, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.5696032020936767, |
|
"grad_norm": 0.075838714661654, |
|
"learning_rate": 9.312445915188609e-05, |
|
"loss": 1.0273, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.5711426702074434, |
|
"grad_norm": 0.07494133610203221, |
|
"learning_rate": 9.258825748490558e-05, |
|
"loss": 1.043, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.5726821383212101, |
|
"grad_norm": 0.0774699889487116, |
|
"learning_rate": 9.205227000763788e-05, |
|
"loss": 1.0386, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.5742216064349767, |
|
"grad_norm": 0.0767041878914581, |
|
"learning_rate": 9.151651220942349e-05, |
|
"loss": 1.0475, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.5757610745487434, |
|
"grad_norm": 0.07367320031783889, |
|
"learning_rate": 9.098099957296552e-05, |
|
"loss": 1.0356, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.5773005426625101, |
|
"grad_norm": 0.07237560796588688, |
|
"learning_rate": 9.044574757388224e-05, |
|
"loss": 1.0291, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.5788400107762768, |
|
"grad_norm": 0.0733597412062657, |
|
"learning_rate": 8.991077168025976e-05, |
|
"loss": 1.0289, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.5803794788900435, |
|
"grad_norm": 0.0781082222836356, |
|
"learning_rate": 8.937608735220527e-05, |
|
"loss": 1.0411, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.5819189470038102, |
|
"grad_norm": 0.0787222190348577, |
|
"learning_rate": 8.884171004139996e-05, |
|
"loss": 1.0176, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.5834584151175769, |
|
"grad_norm": 0.07114468470409495, |
|
"learning_rate": 8.830765519065262e-05, |
|
"loss": 0.9838, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.5849978832313436, |
|
"grad_norm": 0.08086984212119945, |
|
"learning_rate": 8.777393823345343e-05, |
|
"loss": 1.0438, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5865373513451103, |
|
"grad_norm": 0.0703832911854875, |
|
"learning_rate": 8.724057459352784e-05, |
|
"loss": 0.9889, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.5880768194588769, |
|
"grad_norm": 0.06956832590218136, |
|
"learning_rate": 8.670757968439086e-05, |
|
"loss": 1.0573, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.5896162875726436, |
|
"grad_norm": 0.07709995404199901, |
|
"learning_rate": 8.617496890890179e-05, |
|
"loss": 1.0277, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.5911557556864103, |
|
"grad_norm": 0.07724046895867277, |
|
"learning_rate": 8.564275765881887e-05, |
|
"loss": 1.0349, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.592695223800177, |
|
"grad_norm": 0.0743764979962109, |
|
"learning_rate": 8.511096131435454e-05, |
|
"loss": 1.0117, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.5942346919139437, |
|
"grad_norm": 0.07389820884013973, |
|
"learning_rate": 8.457959524373109e-05, |
|
"loss": 1.025, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.5957741600277104, |
|
"grad_norm": 0.07448602446253362, |
|
"learning_rate": 8.404867480273636e-05, |
|
"loss": 1.0524, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.5973136281414771, |
|
"grad_norm": 0.07016834564183058, |
|
"learning_rate": 8.351821533428023e-05, |
|
"loss": 1.0253, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.5988530962552439, |
|
"grad_norm": 0.07324470240300807, |
|
"learning_rate": 8.298823216795093e-05, |
|
"loss": 1.0454, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 0.6003925643690106, |
|
"grad_norm": 0.07179143590884257, |
|
"learning_rate": 8.245874061957224e-05, |
|
"loss": 1.0349, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.6019320324827772, |
|
"grad_norm": 0.07332238543813964, |
|
"learning_rate": 8.192975599076078e-05, |
|
"loss": 1.0112, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 0.6034715005965439, |
|
"grad_norm": 0.06832929405392676, |
|
"learning_rate": 8.140129356848387e-05, |
|
"loss": 1.0159, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.6050109687103106, |
|
"grad_norm": 0.07163605823180465, |
|
"learning_rate": 8.087336862461783e-05, |
|
"loss": 1.0064, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 0.6065504368240773, |
|
"grad_norm": 0.07827638868021947, |
|
"learning_rate": 8.034599641550642e-05, |
|
"loss": 1.0431, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.608089904937844, |
|
"grad_norm": 0.07793183715870357, |
|
"learning_rate": 7.981919218152016e-05, |
|
"loss": 0.9968, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.6096293730516107, |
|
"grad_norm": 0.07306876363104758, |
|
"learning_rate": 7.929297114661581e-05, |
|
"loss": 1.0114, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.6111688411653774, |
|
"grad_norm": 0.07438965485093788, |
|
"learning_rate": 7.876734851789643e-05, |
|
"loss": 1.042, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.6127083092791441, |
|
"grad_norm": 0.07813162760393345, |
|
"learning_rate": 7.824233948517185e-05, |
|
"loss": 1.0437, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.6142477773929107, |
|
"grad_norm": 0.07126718370690863, |
|
"learning_rate": 7.771795922051999e-05, |
|
"loss": 1.0444, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.6157872455066774, |
|
"grad_norm": 0.0756263745043903, |
|
"learning_rate": 7.719422287784798e-05, |
|
"loss": 1.0138, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6157872455066774, |
|
"eval_loss": 1.02396821975708, |
|
"eval_runtime": 3800.6654, |
|
"eval_samples_per_second": 6.08, |
|
"eval_steps_per_second": 0.38, |
|
"step": 2000 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 3247, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 2.3416021014544384e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|