|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997305930800908, |
|
"eval_steps": 500, |
|
"global_step": 3247, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0003078936227533387, |
|
"grad_norm": 42.473334342863225, |
|
"learning_rate": 6.153846153846154e-07, |
|
"loss": 1.3715, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0015394681137666935, |
|
"grad_norm": 39.584233835268385, |
|
"learning_rate": 3.0769230769230774e-06, |
|
"loss": 1.3252, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.003078936227533387, |
|
"grad_norm": 38.124176994124966, |
|
"learning_rate": 6.153846153846155e-06, |
|
"loss": 1.3472, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.004618404341300081, |
|
"grad_norm": 33.949332930378546, |
|
"learning_rate": 9.230769230769232e-06, |
|
"loss": 1.3191, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.006157872455066774, |
|
"grad_norm": 13.263339730204837, |
|
"learning_rate": 1.230769230769231e-05, |
|
"loss": 1.3273, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007697340568833468, |
|
"grad_norm": 10.03432034420483, |
|
"learning_rate": 1.5384615384615387e-05, |
|
"loss": 1.2756, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.009236808682600161, |
|
"grad_norm": 10.404219206269893, |
|
"learning_rate": 1.8461538461538465e-05, |
|
"loss": 1.2, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.010776276796366856, |
|
"grad_norm": 4.904063566508077, |
|
"learning_rate": 2.1538461538461542e-05, |
|
"loss": 1.2194, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.012315744910133548, |
|
"grad_norm": 2.625318230724936, |
|
"learning_rate": 2.461538461538462e-05, |
|
"loss": 1.1508, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.013855213023900243, |
|
"grad_norm": 1.554813766822268, |
|
"learning_rate": 2.7692307692307694e-05, |
|
"loss": 1.1786, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.015394681137666935, |
|
"grad_norm": 1.1809148162862095, |
|
"learning_rate": 3.0769230769230774e-05, |
|
"loss": 1.1277, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01693414925143363, |
|
"grad_norm": 1.203464354806506, |
|
"learning_rate": 3.384615384615385e-05, |
|
"loss": 1.1463, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.018473617365200323, |
|
"grad_norm": 1.1383524365220588, |
|
"learning_rate": 3.692307692307693e-05, |
|
"loss": 1.1188, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.020013085478967015, |
|
"grad_norm": 0.9857720112400227, |
|
"learning_rate": 4e-05, |
|
"loss": 1.136, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.02155255359273371, |
|
"grad_norm": 0.8036894302534616, |
|
"learning_rate": 4.3076923076923084e-05, |
|
"loss": 1.0966, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.023092021706500404, |
|
"grad_norm": 1.3013628925636782, |
|
"learning_rate": 4.615384615384616e-05, |
|
"loss": 1.092, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.024631489820267097, |
|
"grad_norm": 1.2268436983757836, |
|
"learning_rate": 4.923076923076924e-05, |
|
"loss": 1.1211, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.026170957934033793, |
|
"grad_norm": 1.1230597630357015, |
|
"learning_rate": 5.230769230769231e-05, |
|
"loss": 1.1099, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.027710426047800486, |
|
"grad_norm": 1.1223875634038776, |
|
"learning_rate": 5.538461538461539e-05, |
|
"loss": 1.051, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02924989416156718, |
|
"grad_norm": 1.3366126407920835, |
|
"learning_rate": 5.846153846153847e-05, |
|
"loss": 1.0889, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.03078936227533387, |
|
"grad_norm": 0.8876804012745447, |
|
"learning_rate": 6.153846153846155e-05, |
|
"loss": 1.0635, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03232883038910057, |
|
"grad_norm": 0.985567117532096, |
|
"learning_rate": 6.461538461538462e-05, |
|
"loss": 1.0729, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.03386829850286726, |
|
"grad_norm": 1.3857701900950476, |
|
"learning_rate": 6.76923076923077e-05, |
|
"loss": 1.0687, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03540776661663395, |
|
"grad_norm": 1.0307077006740535, |
|
"learning_rate": 7.076923076923078e-05, |
|
"loss": 1.0756, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.036947234730400645, |
|
"grad_norm": 0.915309617477138, |
|
"learning_rate": 7.384615384615386e-05, |
|
"loss": 1.0578, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03848670284416734, |
|
"grad_norm": 1.1416205921187619, |
|
"learning_rate": 7.692307692307693e-05, |
|
"loss": 1.0722, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.04002617095793403, |
|
"grad_norm": 0.7855309627413886, |
|
"learning_rate": 8e-05, |
|
"loss": 1.1091, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04156563907170073, |
|
"grad_norm": 1.027162314274905, |
|
"learning_rate": 8.307692307692309e-05, |
|
"loss": 1.0632, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.04310510718546742, |
|
"grad_norm": 1.1386693099418572, |
|
"learning_rate": 8.615384615384617e-05, |
|
"loss": 1.0736, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.044644575299234115, |
|
"grad_norm": 0.7717462620986457, |
|
"learning_rate": 8.923076923076924e-05, |
|
"loss": 1.0523, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.04618404341300081, |
|
"grad_norm": 0.7657994075849279, |
|
"learning_rate": 9.230769230769232e-05, |
|
"loss": 1.0557, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0477235115267675, |
|
"grad_norm": 1.1680871949004308, |
|
"learning_rate": 9.53846153846154e-05, |
|
"loss": 1.0548, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.049262979640534194, |
|
"grad_norm": 0.8996216400147768, |
|
"learning_rate": 9.846153846153848e-05, |
|
"loss": 1.0665, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.050802447754300886, |
|
"grad_norm": 0.700328592160364, |
|
"learning_rate": 0.00010153846153846153, |
|
"loss": 1.0496, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.052341915868067586, |
|
"grad_norm": 1.0558734188354055, |
|
"learning_rate": 0.00010461538461538463, |
|
"loss": 1.065, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05388138398183428, |
|
"grad_norm": 0.8578829636967044, |
|
"learning_rate": 0.0001076923076923077, |
|
"loss": 1.0923, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.05542085209560097, |
|
"grad_norm": 0.8831349656869951, |
|
"learning_rate": 0.00011076923076923077, |
|
"loss": 1.0259, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.056960320209367664, |
|
"grad_norm": 0.9573775567280769, |
|
"learning_rate": 0.00011384615384615384, |
|
"loss": 1.0661, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.05849978832313436, |
|
"grad_norm": 0.9495008375470986, |
|
"learning_rate": 0.00011692307692307694, |
|
"loss": 1.0811, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.06003925643690105, |
|
"grad_norm": 1.1061782824597675, |
|
"learning_rate": 0.00012, |
|
"loss": 1.0466, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.06157872455066774, |
|
"grad_norm": 1.1202640249643756, |
|
"learning_rate": 0.0001230769230769231, |
|
"loss": 1.0589, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06311819266443444, |
|
"grad_norm": 1.0125076015545247, |
|
"learning_rate": 0.00012615384615384615, |
|
"loss": 1.0407, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.06465766077820113, |
|
"grad_norm": 1.0060078228523262, |
|
"learning_rate": 0.00012923076923076923, |
|
"loss": 1.0688, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06619712889196783, |
|
"grad_norm": 0.7760894033200619, |
|
"learning_rate": 0.0001323076923076923, |
|
"loss": 1.0549, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.06773659700573452, |
|
"grad_norm": 0.8990085149589899, |
|
"learning_rate": 0.0001353846153846154, |
|
"loss": 1.0537, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06927606511950121, |
|
"grad_norm": 0.8549275834093875, |
|
"learning_rate": 0.00013846153846153847, |
|
"loss": 1.0251, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.0708155332332679, |
|
"grad_norm": 0.8075081750437678, |
|
"learning_rate": 0.00014153846153846156, |
|
"loss": 1.0443, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.0723550013470346, |
|
"grad_norm": 0.9259388698536392, |
|
"learning_rate": 0.0001446153846153846, |
|
"loss": 1.0134, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.07389446946080129, |
|
"grad_norm": 1.7409427946151614, |
|
"learning_rate": 0.00014769230769230772, |
|
"loss": 1.073, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07543393757456798, |
|
"grad_norm": 0.9864593980213632, |
|
"learning_rate": 0.00015076923076923077, |
|
"loss": 1.0735, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.07697340568833468, |
|
"grad_norm": 0.7479355733124677, |
|
"learning_rate": 0.00015384615384615385, |
|
"loss": 1.0642, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07851287380210137, |
|
"grad_norm": 0.9485409237720248, |
|
"learning_rate": 0.00015692307692307693, |
|
"loss": 1.0, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.08005234191586806, |
|
"grad_norm": 1.0842298527942793, |
|
"learning_rate": 0.00016, |
|
"loss": 1.0726, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.08159181002963477, |
|
"grad_norm": 1.4490484618864288, |
|
"learning_rate": 0.0001630769230769231, |
|
"loss": 1.0506, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.08313127814340146, |
|
"grad_norm": 0.7322203679384404, |
|
"learning_rate": 0.00016615384615384617, |
|
"loss": 1.0934, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.08467074625716815, |
|
"grad_norm": 1.0463212758166829, |
|
"learning_rate": 0.00016923076923076923, |
|
"loss": 1.0414, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.08621021437093485, |
|
"grad_norm": 0.7534147115742311, |
|
"learning_rate": 0.00017230769230769234, |
|
"loss": 1.0582, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.08774968248470154, |
|
"grad_norm": 0.8938192794309777, |
|
"learning_rate": 0.0001753846153846154, |
|
"loss": 1.0676, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.08928915059846823, |
|
"grad_norm": 0.7350477942859435, |
|
"learning_rate": 0.00017846153846153847, |
|
"loss": 1.0727, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.09082861871223492, |
|
"grad_norm": 0.7547431024386564, |
|
"learning_rate": 0.00018153846153846155, |
|
"loss": 1.069, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.09236808682600162, |
|
"grad_norm": 1.0017027549794293, |
|
"learning_rate": 0.00018461538461538463, |
|
"loss": 1.0586, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09390755493976831, |
|
"grad_norm": 0.7906132895743292, |
|
"learning_rate": 0.0001876923076923077, |
|
"loss": 1.0546, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.095447023053535, |
|
"grad_norm": 1.401442713529287, |
|
"learning_rate": 0.0001907692307692308, |
|
"loss": 1.0698, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.0969864911673017, |
|
"grad_norm": 0.5922407946201297, |
|
"learning_rate": 0.00019384615384615385, |
|
"loss": 1.0901, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.09852595928106839, |
|
"grad_norm": 0.6300597452052293, |
|
"learning_rate": 0.00019692307692307696, |
|
"loss": 1.0429, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.10006542739483508, |
|
"grad_norm": 0.8806436669088872, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0552, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.10160489550860177, |
|
"grad_norm": 1.0983869005820532, |
|
"learning_rate": 0.00019999855506507185, |
|
"loss": 1.0851, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.10314436362236847, |
|
"grad_norm": 0.9815804026010979, |
|
"learning_rate": 0.00019999422030204418, |
|
"loss": 1.0673, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.10468383173613517, |
|
"grad_norm": 0.7301517719864844, |
|
"learning_rate": 0.00019998699583618593, |
|
"loss": 1.0516, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.10622329984990186, |
|
"grad_norm": 0.7786167269205654, |
|
"learning_rate": 0.00019997688187627482, |
|
"loss": 1.064, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.10776276796366856, |
|
"grad_norm": 0.7511697290569181, |
|
"learning_rate": 0.0001999638787145911, |
|
"loss": 1.0836, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.10930223607743525, |
|
"grad_norm": 0.8111968485885569, |
|
"learning_rate": 0.0001999479867269092, |
|
"loss": 1.0681, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.11084170419120194, |
|
"grad_norm": 1.1816993244319927, |
|
"learning_rate": 0.00019992920637248697, |
|
"loss": 1.0571, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.11238117230496864, |
|
"grad_norm": 0.7021483137095024, |
|
"learning_rate": 0.00019990753819405213, |
|
"loss": 1.0366, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.11392064041873533, |
|
"grad_norm": 0.786152904921158, |
|
"learning_rate": 0.00019988298281778684, |
|
"loss": 1.0745, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.11546010853250202, |
|
"grad_norm": 0.8625505891808654, |
|
"learning_rate": 0.00019985554095330955, |
|
"loss": 1.0309, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.11699957664626871, |
|
"grad_norm": 0.7101475350198745, |
|
"learning_rate": 0.0001998252133936544, |
|
"loss": 1.0623, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1185390447600354, |
|
"grad_norm": 0.7943463609732394, |
|
"learning_rate": 0.00019979200101524845, |
|
"loss": 1.0798, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.1200785128738021, |
|
"grad_norm": 1.1298857584764006, |
|
"learning_rate": 0.00019975590477788613, |
|
"loss": 1.0619, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.12161798098756879, |
|
"grad_norm": 0.6519227994726357, |
|
"learning_rate": 0.0001997169257247018, |
|
"loss": 1.0198, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.12315744910133548, |
|
"grad_norm": 0.7712100700155368, |
|
"learning_rate": 0.00019967506498213931, |
|
"loss": 1.08, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.12469691721510218, |
|
"grad_norm": 0.785301798573464, |
|
"learning_rate": 0.00019963032375991966, |
|
"loss": 1.0889, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.12623638532886888, |
|
"grad_norm": 0.7950130962836747, |
|
"learning_rate": 0.00019958270335100595, |
|
"loss": 1.0349, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.12777585344263556, |
|
"grad_norm": 0.6411989099588487, |
|
"learning_rate": 0.00019953220513156602, |
|
"loss": 1.1014, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.12931532155640227, |
|
"grad_norm": 0.6881556293517902, |
|
"learning_rate": 0.0001994788305609327, |
|
"loss": 1.0433, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.13085478967016895, |
|
"grad_norm": 0.8822396645970323, |
|
"learning_rate": 0.00019942258118156163, |
|
"loss": 1.0555, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.13239425778393565, |
|
"grad_norm": 0.8566501292249419, |
|
"learning_rate": 0.00019936345861898663, |
|
"loss": 1.0791, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.13393372589770233, |
|
"grad_norm": 0.6000374004935582, |
|
"learning_rate": 0.0001993014645817728, |
|
"loss": 1.0644, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.13547319401146904, |
|
"grad_norm": 0.7094811645126697, |
|
"learning_rate": 0.00019923660086146723, |
|
"loss": 1.0588, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.13701266212523572, |
|
"grad_norm": 0.7299783423452573, |
|
"learning_rate": 0.0001991688693325469, |
|
"loss": 1.0606, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.13855213023900242, |
|
"grad_norm": 0.7793849224097605, |
|
"learning_rate": 0.00019909827195236493, |
|
"loss": 1.0535, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.14009159835276913, |
|
"grad_norm": 0.8123995612581718, |
|
"learning_rate": 0.00019902481076109372, |
|
"loss": 1.0903, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.1416310664665358, |
|
"grad_norm": 0.8269490106548164, |
|
"learning_rate": 0.00019894848788166604, |
|
"loss": 1.0682, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.14317053458030252, |
|
"grad_norm": 0.6945769324601003, |
|
"learning_rate": 0.00019886930551971387, |
|
"loss": 1.0918, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.1447100026940692, |
|
"grad_norm": 1.0743052082703004, |
|
"learning_rate": 0.0001987872659635043, |
|
"loss": 1.0512, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.1462494708078359, |
|
"grad_norm": 0.7274051186012023, |
|
"learning_rate": 0.00019870237158387384, |
|
"loss": 1.0609, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.14778893892160258, |
|
"grad_norm": 0.8436743181685329, |
|
"learning_rate": 0.00019861462483415952, |
|
"loss": 1.0534, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.1493284070353693, |
|
"grad_norm": 0.5539587236696554, |
|
"learning_rate": 0.0001985240282501282, |
|
"loss": 1.0562, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.15086787514913597, |
|
"grad_norm": 0.6673041088797687, |
|
"learning_rate": 0.0001984305844499033, |
|
"loss": 1.0343, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.15240734326290267, |
|
"grad_norm": 0.6860635549461462, |
|
"learning_rate": 0.00019833429613388902, |
|
"loss": 1.0602, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.15394681137666935, |
|
"grad_norm": 1.0779978346564654, |
|
"learning_rate": 0.0001982351660846924, |
|
"loss": 1.0539, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.15548627949043606, |
|
"grad_norm": 0.6737414679607289, |
|
"learning_rate": 0.00019813319716704278, |
|
"loss": 1.0559, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.15702574760420274, |
|
"grad_norm": 0.7252765201034201, |
|
"learning_rate": 0.00019802839232770921, |
|
"loss": 1.0986, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.15856521571796944, |
|
"grad_norm": 1.3250469121293895, |
|
"learning_rate": 0.00019792075459541518, |
|
"loss": 1.0327, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.16010468383173612, |
|
"grad_norm": 0.6408679533216792, |
|
"learning_rate": 0.00019781028708075102, |
|
"loss": 1.0415, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.16164415194550283, |
|
"grad_norm": 0.9808936518837947, |
|
"learning_rate": 0.00019769699297608417, |
|
"loss": 1.0386, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.16318362005926954, |
|
"grad_norm": 0.7892295118178237, |
|
"learning_rate": 0.00019758087555546682, |
|
"loss": 1.0866, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.16472308817303621, |
|
"grad_norm": 0.6508163256108649, |
|
"learning_rate": 0.0001974619381745413, |
|
"loss": 1.0804, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.16626255628680292, |
|
"grad_norm": 0.7619370139690409, |
|
"learning_rate": 0.00019734018427044307, |
|
"loss": 1.0471, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.1678020244005696, |
|
"grad_norm": 1.3124132163956248, |
|
"learning_rate": 0.0001972156173617016, |
|
"loss": 1.0517, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.1693414925143363, |
|
"grad_norm": 0.969795077947931, |
|
"learning_rate": 0.00019708824104813837, |
|
"loss": 1.0518, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.17088096062810298, |
|
"grad_norm": 0.7315912579640327, |
|
"learning_rate": 0.00019695805901076308, |
|
"loss": 1.0784, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.1724204287418697, |
|
"grad_norm": 0.7357250477683509, |
|
"learning_rate": 0.00019682507501166718, |
|
"loss": 1.1008, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.17395989685563637, |
|
"grad_norm": 0.96782095170125, |
|
"learning_rate": 0.00019668929289391523, |
|
"loss": 1.0444, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.17549936496940308, |
|
"grad_norm": 0.7600182365968516, |
|
"learning_rate": 0.00019655071658143366, |
|
"loss": 1.0625, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.17703883308316976, |
|
"grad_norm": 0.6161742360584987, |
|
"learning_rate": 0.00019640935007889755, |
|
"loss": 1.091, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.17857830119693646, |
|
"grad_norm": 0.5777577463068309, |
|
"learning_rate": 0.0001962651974716149, |
|
"loss": 1.072, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.18011776931070314, |
|
"grad_norm": 0.644094996294978, |
|
"learning_rate": 0.0001961182629254084, |
|
"loss": 1.0275, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.18165723742446985, |
|
"grad_norm": 0.8589876413667922, |
|
"learning_rate": 0.00019596855068649522, |
|
"loss": 1.0605, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.18319670553823653, |
|
"grad_norm": 0.7740247212495535, |
|
"learning_rate": 0.00019581606508136426, |
|
"loss": 1.0764, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.18473617365200323, |
|
"grad_norm": 0.6540083386801736, |
|
"learning_rate": 0.00019566081051665098, |
|
"loss": 1.0174, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.18627564176576994, |
|
"grad_norm": 0.7815089251045078, |
|
"learning_rate": 0.00019550279147901036, |
|
"loss": 1.0952, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.18781510987953662, |
|
"grad_norm": 0.6670730533500357, |
|
"learning_rate": 0.00019534201253498682, |
|
"loss": 1.0484, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.18935457799330332, |
|
"grad_norm": 0.7552112484078745, |
|
"learning_rate": 0.0001951784783308827, |
|
"loss": 1.0104, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.19089404610707, |
|
"grad_norm": 0.6343514261370218, |
|
"learning_rate": 0.0001950121935926236, |
|
"loss": 1.0525, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.1924335142208367, |
|
"grad_norm": 1.4473777235442007, |
|
"learning_rate": 0.00019484316312562205, |
|
"loss": 1.033, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.1939729823346034, |
|
"grad_norm": 0.5653781957358222, |
|
"learning_rate": 0.00019467139181463862, |
|
"loss": 1.078, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.1955124504483701, |
|
"grad_norm": 0.6315352985318332, |
|
"learning_rate": 0.00019449688462364056, |
|
"loss": 1.042, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.19705191856213677, |
|
"grad_norm": 0.8308310132176421, |
|
"learning_rate": 0.00019431964659565867, |
|
"loss": 1.0604, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.19859138667590348, |
|
"grad_norm": 0.5915623011666878, |
|
"learning_rate": 0.0001941396828526412, |
|
"loss": 1.058, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.20013085478967016, |
|
"grad_norm": 0.9623739180076736, |
|
"learning_rate": 0.00019395699859530623, |
|
"loss": 1.0314, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.20167032290343687, |
|
"grad_norm": 0.7770406923743924, |
|
"learning_rate": 0.00019377159910299093, |
|
"loss": 1.0418, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.20320979101720354, |
|
"grad_norm": 0.582853981181253, |
|
"learning_rate": 0.00019358348973349943, |
|
"loss": 1.0228, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.20474925913097025, |
|
"grad_norm": 0.6672404317903096, |
|
"learning_rate": 0.00019339267592294763, |
|
"loss": 1.0602, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.20628872724473693, |
|
"grad_norm": 0.7929111958910604, |
|
"learning_rate": 0.00019319916318560635, |
|
"loss": 1.0359, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.20782819535850364, |
|
"grad_norm": 0.6853640952957439, |
|
"learning_rate": 0.00019300295711374187, |
|
"loss": 1.0519, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.20936766347227034, |
|
"grad_norm": 0.6773550735079633, |
|
"learning_rate": 0.00019280406337745428, |
|
"loss": 1.0295, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.21090713158603702, |
|
"grad_norm": 0.7456356464144906, |
|
"learning_rate": 0.00019260248772451377, |
|
"loss": 1.0614, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.21244659969980373, |
|
"grad_norm": 0.7051829760237421, |
|
"learning_rate": 0.0001923982359801943, |
|
"loss": 1.0556, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.2139860678135704, |
|
"grad_norm": 0.6567867383663277, |
|
"learning_rate": 0.00019219131404710552, |
|
"loss": 1.0854, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.21552553592733711, |
|
"grad_norm": 0.6369210418492607, |
|
"learning_rate": 0.00019198172790502196, |
|
"loss": 1.0368, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.2170650040411038, |
|
"grad_norm": 0.6342900260792401, |
|
"learning_rate": 0.0001917694836107104, |
|
"loss": 1.0484, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.2186044721548705, |
|
"grad_norm": 0.5622570224326242, |
|
"learning_rate": 0.00019155458729775467, |
|
"loss": 1.0595, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.22014394026863718, |
|
"grad_norm": 0.809911467681193, |
|
"learning_rate": 0.0001913370451763786, |
|
"loss": 1.0278, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.22168340838240388, |
|
"grad_norm": 0.6267743698514823, |
|
"learning_rate": 0.00019111686353326631, |
|
"loss": 1.0309, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.22322287649617056, |
|
"grad_norm": 0.5838744010816593, |
|
"learning_rate": 0.00019089404873138082, |
|
"loss": 1.0637, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.22476234460993727, |
|
"grad_norm": 0.5837090263106021, |
|
"learning_rate": 0.00019066860720977986, |
|
"loss": 1.059, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.22630181272370395, |
|
"grad_norm": 0.7696380706231972, |
|
"learning_rate": 0.00019044054548343002, |
|
"loss": 1.0403, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.22784128083747066, |
|
"grad_norm": 0.5699826758505713, |
|
"learning_rate": 0.0001902098701430184, |
|
"loss": 1.0799, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.22938074895123733, |
|
"grad_norm": 0.6868169075444956, |
|
"learning_rate": 0.00018997658785476214, |
|
"loss": 1.0781, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.23092021706500404, |
|
"grad_norm": 0.583006345831051, |
|
"learning_rate": 0.00018974070536021572, |
|
"loss": 1.0814, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.23245968517877075, |
|
"grad_norm": 0.6006905084344202, |
|
"learning_rate": 0.00018950222947607625, |
|
"loss": 1.0624, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.23399915329253743, |
|
"grad_norm": 0.5697764314148909, |
|
"learning_rate": 0.0001892611670939865, |
|
"loss": 1.008, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.23553862140630413, |
|
"grad_norm": 0.7413320445202513, |
|
"learning_rate": 0.00018901752518033548, |
|
"loss": 1.0612, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.2370780895200708, |
|
"grad_norm": 0.5363151044675402, |
|
"learning_rate": 0.0001887713107760575, |
|
"loss": 1.0605, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.23861755763383752, |
|
"grad_norm": 0.8682202097559629, |
|
"learning_rate": 0.00018852253099642833, |
|
"loss": 1.031, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.2401570257476042, |
|
"grad_norm": 0.5831373382248602, |
|
"learning_rate": 0.0001882711930308599, |
|
"loss": 1.06, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.2416964938613709, |
|
"grad_norm": 0.5590693033565884, |
|
"learning_rate": 0.00018801730414269225, |
|
"loss": 1.0533, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.24323596197513758, |
|
"grad_norm": 0.6205945960602542, |
|
"learning_rate": 0.0001877608716689839, |
|
"loss": 1.0757, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.2447754300889043, |
|
"grad_norm": 0.7900938502871934, |
|
"learning_rate": 0.00018750190302029956, |
|
"loss": 1.0301, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.24631489820267097, |
|
"grad_norm": 1.0979738402724484, |
|
"learning_rate": 0.00018724040568049612, |
|
"loss": 1.0547, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.24785436631643767, |
|
"grad_norm": 0.7255960650743382, |
|
"learning_rate": 0.00018697638720650646, |
|
"loss": 1.0454, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.24939383443020435, |
|
"grad_norm": 0.6620553781722808, |
|
"learning_rate": 0.00018670985522812084, |
|
"loss": 1.0219, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.25093330254397106, |
|
"grad_norm": 0.6854793724956825, |
|
"learning_rate": 0.0001864408174477665, |
|
"loss": 1.0509, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.25247277065773777, |
|
"grad_norm": 0.5492317189334932, |
|
"learning_rate": 0.00018616928164028523, |
|
"loss": 1.0159, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.2540122387715045, |
|
"grad_norm": 0.8404726657300783, |
|
"learning_rate": 0.00018589525565270844, |
|
"loss": 1.0411, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.2555517068852711, |
|
"grad_norm": 0.6623737864891829, |
|
"learning_rate": 0.0001856187474040306, |
|
"loss": 1.0602, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.25709117499903783, |
|
"grad_norm": 0.5418949047693957, |
|
"learning_rate": 0.00018533976488498016, |
|
"loss": 1.037, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.25863064311280454, |
|
"grad_norm": 0.7761449291018582, |
|
"learning_rate": 0.0001850583161577889, |
|
"loss": 1.0738, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.26017011122657124, |
|
"grad_norm": 0.5847223837062915, |
|
"learning_rate": 0.00018477440935595873, |
|
"loss": 1.0875, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.2617095793403379, |
|
"grad_norm": 0.5106913164341648, |
|
"learning_rate": 0.00018448805268402672, |
|
"loss": 1.0685, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.2632490474541046, |
|
"grad_norm": 0.7444912623137734, |
|
"learning_rate": 0.00018419925441732804, |
|
"loss": 1.0407, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.2647885155678713, |
|
"grad_norm": 0.5183580101131431, |
|
"learning_rate": 0.00018390802290175673, |
|
"loss": 1.0572, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.266327983681638, |
|
"grad_norm": 0.6844054776521512, |
|
"learning_rate": 0.00018361436655352456, |
|
"loss": 1.0383, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.26786745179540467, |
|
"grad_norm": 1.0067719874632055, |
|
"learning_rate": 0.00018331829385891783, |
|
"loss": 1.031, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.26940691990917137, |
|
"grad_norm": 0.5484350200540726, |
|
"learning_rate": 0.00018301981337405212, |
|
"loss": 1.0585, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.2709463880229381, |
|
"grad_norm": 0.6056460900910511, |
|
"learning_rate": 0.00018271893372462497, |
|
"loss": 1.0585, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.2724858561367048, |
|
"grad_norm": 0.48416296519569474, |
|
"learning_rate": 0.00018241566360566665, |
|
"loss": 1.039, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.27402532425047144, |
|
"grad_norm": 0.6050871964307601, |
|
"learning_rate": 0.00018211001178128892, |
|
"loss": 1.0571, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.27556479236423814, |
|
"grad_norm": 0.5969541614482573, |
|
"learning_rate": 0.00018180198708443173, |
|
"loss": 1.0653, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.27710426047800485, |
|
"grad_norm": 0.7541885423123031, |
|
"learning_rate": 0.00018149159841660795, |
|
"loss": 1.0521, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.27864372859177156, |
|
"grad_norm": 0.49528417781944445, |
|
"learning_rate": 0.00018117885474764613, |
|
"loss": 1.0943, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.28018319670553826, |
|
"grad_norm": 0.6918705895372237, |
|
"learning_rate": 0.00018086376511543126, |
|
"loss": 1.0747, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.2817226648193049, |
|
"grad_norm": 0.7195296276082791, |
|
"learning_rate": 0.00018054633862564368, |
|
"loss": 1.0507, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.2832621329330716, |
|
"grad_norm": 0.5709551372136472, |
|
"learning_rate": 0.0001802265844514958, |
|
"loss": 1.0094, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.2848016010468383, |
|
"grad_norm": 0.43690572218401663, |
|
"learning_rate": 0.0001799045118334671, |
|
"loss": 1.064, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.28634106916060503, |
|
"grad_norm": 0.5329066679717666, |
|
"learning_rate": 0.00017958013007903713, |
|
"loss": 1.067, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.2878805372743717, |
|
"grad_norm": 0.53800354086698, |
|
"learning_rate": 0.0001792534485624164, |
|
"loss": 1.0491, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.2894200053881384, |
|
"grad_norm": 0.6415976681925623, |
|
"learning_rate": 0.00017892447672427563, |
|
"loss": 1.0496, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.2909594735019051, |
|
"grad_norm": 0.5366936452063369, |
|
"learning_rate": 0.00017859322407147272, |
|
"loss": 1.0657, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.2924989416156718, |
|
"grad_norm": 0.5700114756555418, |
|
"learning_rate": 0.00017825970017677832, |
|
"loss": 1.0808, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.29403840972943845, |
|
"grad_norm": 0.5602975678360493, |
|
"learning_rate": 0.00017792391467859886, |
|
"loss": 1.0255, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.29557787784320516, |
|
"grad_norm": 0.5222669096259323, |
|
"learning_rate": 0.0001775858772806983, |
|
"loss": 1.0762, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.29711734595697187, |
|
"grad_norm": 0.6950722858238849, |
|
"learning_rate": 0.00017724559775191744, |
|
"loss": 1.0382, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.2986568140707386, |
|
"grad_norm": 1.0357933657324396, |
|
"learning_rate": 0.00017690308592589182, |
|
"loss": 1.0541, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.3001962821845053, |
|
"grad_norm": 0.5871650345246843, |
|
"learning_rate": 0.0001765583517007675, |
|
"loss": 1.0035, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.30173575029827193, |
|
"grad_norm": 0.5209052634359257, |
|
"learning_rate": 0.00017621140503891488, |
|
"loss": 1.0206, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.30327521841203864, |
|
"grad_norm": 0.6487211604010458, |
|
"learning_rate": 0.00017586225596664102, |
|
"loss": 1.0381, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.30481468652580535, |
|
"grad_norm": 0.5629565123905481, |
|
"learning_rate": 0.00017551091457389966, |
|
"loss": 1.0434, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.30635415463957205, |
|
"grad_norm": 0.8112166900825726, |
|
"learning_rate": 0.00017515739101399983, |
|
"loss": 1.0287, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.3078936227533387, |
|
"grad_norm": 0.6366943000992968, |
|
"learning_rate": 0.00017480169550331231, |
|
"loss": 1.0193, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3094330908671054, |
|
"grad_norm": 0.5040648629606902, |
|
"learning_rate": 0.00017444383832097442, |
|
"loss": 1.0411, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.3109725589808721, |
|
"grad_norm": 0.8283443113345634, |
|
"learning_rate": 0.00017408382980859305, |
|
"loss": 1.045, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.3125120270946388, |
|
"grad_norm": 0.6777410855607223, |
|
"learning_rate": 0.00017372168036994566, |
|
"loss": 1.0263, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.3140514952084055, |
|
"grad_norm": 0.5530544091806374, |
|
"learning_rate": 0.00017335740047067972, |
|
"loss": 1.0307, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.3155909633221722, |
|
"grad_norm": 0.45671110148384275, |
|
"learning_rate": 0.0001729910006380102, |
|
"loss": 1.055, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.3171304314359389, |
|
"grad_norm": 0.6812995390238387, |
|
"learning_rate": 0.00017262249146041546, |
|
"loss": 1.0838, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.3186698995497056, |
|
"grad_norm": 0.568521042510064, |
|
"learning_rate": 0.00017225188358733107, |
|
"loss": 1.0265, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.32020936766347224, |
|
"grad_norm": 0.8025629235004524, |
|
"learning_rate": 0.00017187918772884232, |
|
"loss": 1.0721, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.32174883577723895, |
|
"grad_norm": 0.6004044742833335, |
|
"learning_rate": 0.00017150441465537447, |
|
"loss": 1.0655, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.32328830389100566, |
|
"grad_norm": 0.5635765966408569, |
|
"learning_rate": 0.00017112757519738154, |
|
"loss": 1.0396, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.32482777200477236, |
|
"grad_norm": 0.5043399412628455, |
|
"learning_rate": 0.0001707486802450335, |
|
"loss": 1.0536, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.32636724011853907, |
|
"grad_norm": 0.5106615244506089, |
|
"learning_rate": 0.00017036774074790132, |
|
"loss": 1.0135, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.3279067082323057, |
|
"grad_norm": 0.6069038358467338, |
|
"learning_rate": 0.00016998476771464072, |
|
"loss": 1.0488, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.32944617634607243, |
|
"grad_norm": 0.5481314050474333, |
|
"learning_rate": 0.00016959977221267392, |
|
"loss": 1.0226, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.33098564445983913, |
|
"grad_norm": 0.5692030442740773, |
|
"learning_rate": 0.0001692127653678699, |
|
"loss": 1.0545, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.33252511257360584, |
|
"grad_norm": 0.5422422595812953, |
|
"learning_rate": 0.00016882375836422284, |
|
"loss": 1.04, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.3340645806873725, |
|
"grad_norm": 0.8241400248029749, |
|
"learning_rate": 0.00016843276244352885, |
|
"loss": 1.0667, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.3356040488011392, |
|
"grad_norm": 0.5205208851186217, |
|
"learning_rate": 0.00016803978890506113, |
|
"loss": 1.0783, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.3371435169149059, |
|
"grad_norm": 0.6117364442740371, |
|
"learning_rate": 0.00016764484910524358, |
|
"loss": 1.0339, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.3386829850286726, |
|
"grad_norm": 0.6490741063813862, |
|
"learning_rate": 0.00016724795445732243, |
|
"loss": 1.0068, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.34022245314243926, |
|
"grad_norm": 0.6013736527570045, |
|
"learning_rate": 0.00016684911643103642, |
|
"loss": 1.0665, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.34176192125620597, |
|
"grad_norm": 0.6118721687785333, |
|
"learning_rate": 0.0001664483465522855, |
|
"loss": 1.0423, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.3433013893699727, |
|
"grad_norm": 0.5419797734028425, |
|
"learning_rate": 0.00016604565640279754, |
|
"loss": 1.0564, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.3448408574837394, |
|
"grad_norm": 0.5495541176613125, |
|
"learning_rate": 0.0001656410576197938, |
|
"loss": 1.0636, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.3463803255975061, |
|
"grad_norm": 0.4477184453911253, |
|
"learning_rate": 0.0001652345618956526, |
|
"loss": 1.0797, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.34791979371127274, |
|
"grad_norm": 0.5912654457171957, |
|
"learning_rate": 0.00016482618097757122, |
|
"loss": 1.0553, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.34945926182503945, |
|
"grad_norm": 0.5561325327054591, |
|
"learning_rate": 0.00016441592666722684, |
|
"loss": 1.0459, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.35099872993880615, |
|
"grad_norm": 0.6091265051455126, |
|
"learning_rate": 0.00016400381082043507, |
|
"loss": 1.0916, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.35253819805257286, |
|
"grad_norm": 0.7050858387246796, |
|
"learning_rate": 0.00016358984534680748, |
|
"loss": 1.0605, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.3540776661663395, |
|
"grad_norm": 0.592579238142578, |
|
"learning_rate": 0.00016317404220940758, |
|
"loss": 1.0319, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.3556171342801062, |
|
"grad_norm": 0.4991456345649348, |
|
"learning_rate": 0.00016275641342440483, |
|
"loss": 1.0671, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.3571566023938729, |
|
"grad_norm": 0.5675708757017103, |
|
"learning_rate": 0.0001623369710607277, |
|
"loss": 1.0136, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.35869607050763963, |
|
"grad_norm": 0.45324587662080057, |
|
"learning_rate": 0.00016191572723971455, |
|
"loss": 1.0752, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.3602355386214063, |
|
"grad_norm": 0.4578223763710829, |
|
"learning_rate": 0.00016149269413476353, |
|
"loss": 1.0162, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.361775006735173, |
|
"grad_norm": 0.4787021798925449, |
|
"learning_rate": 0.00016106788397098095, |
|
"loss": 1.0031, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.3633144748489397, |
|
"grad_norm": 0.49337377711536057, |
|
"learning_rate": 0.0001606413090248276, |
|
"loss": 1.0042, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.3648539429627064, |
|
"grad_norm": 0.549765500927778, |
|
"learning_rate": 0.00016021298162376428, |
|
"loss": 1.0301, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.36639341107647305, |
|
"grad_norm": 0.6910911024000683, |
|
"learning_rate": 0.00015978291414589542, |
|
"loss": 1.049, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.36793287919023976, |
|
"grad_norm": 0.5099057113980399, |
|
"learning_rate": 0.0001593511190196115, |
|
"loss": 1.0613, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.36947234730400647, |
|
"grad_norm": 0.5326314131616275, |
|
"learning_rate": 0.00015891760872322963, |
|
"loss": 1.0177, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.37101181541777317, |
|
"grad_norm": 0.44876418883849256, |
|
"learning_rate": 0.00015848239578463325, |
|
"loss": 1.0594, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.3725512835315399, |
|
"grad_norm": 0.518140271684293, |
|
"learning_rate": 0.00015804549278090982, |
|
"loss": 1.0228, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.37409075164530653, |
|
"grad_norm": 0.5085879960253272, |
|
"learning_rate": 0.00015760691233798757, |
|
"loss": 1.0187, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.37563021975907324, |
|
"grad_norm": 0.5007291726473487, |
|
"learning_rate": 0.00015716666713027055, |
|
"loss": 1.0433, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.37716968787283994, |
|
"grad_norm": 0.739316075498731, |
|
"learning_rate": 0.00015672476988027228, |
|
"loss": 1.0478, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.37870915598660665, |
|
"grad_norm": 0.7639785434460002, |
|
"learning_rate": 0.0001562812333582482, |
|
"loss": 1.0117, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.3802486241003733, |
|
"grad_norm": 0.5823718950453733, |
|
"learning_rate": 0.00015583607038182655, |
|
"loss": 1.0366, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.38178809221414, |
|
"grad_norm": 0.6893848055474053, |
|
"learning_rate": 0.000155389293815638, |
|
"loss": 1.0381, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.3833275603279067, |
|
"grad_norm": 0.5063333022122084, |
|
"learning_rate": 0.00015494091657094385, |
|
"loss": 1.0419, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.3848670284416734, |
|
"grad_norm": 0.44024803486149566, |
|
"learning_rate": 0.00015449095160526292, |
|
"loss": 1.0645, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.38640649655544007, |
|
"grad_norm": 0.5599556807381372, |
|
"learning_rate": 0.00015403941192199718, |
|
"loss": 1.0443, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.3879459646692068, |
|
"grad_norm": 0.4886207710717129, |
|
"learning_rate": 0.0001535863105700558, |
|
"loss": 1.0556, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.3894854327829735, |
|
"grad_norm": 0.5246689722873303, |
|
"learning_rate": 0.00015313166064347814, |
|
"loss": 1.0256, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.3910249008967402, |
|
"grad_norm": 0.4204231295176996, |
|
"learning_rate": 0.00015267547528105538, |
|
"loss": 1.0423, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.3925643690105069, |
|
"grad_norm": 0.43365441716522, |
|
"learning_rate": 0.0001522177676659508, |
|
"loss": 1.0706, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.39410383712427355, |
|
"grad_norm": 0.5576831403247644, |
|
"learning_rate": 0.00015175855102531887, |
|
"loss": 1.0199, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.39564330523804025, |
|
"grad_norm": 0.4746796767312794, |
|
"learning_rate": 0.00015129783862992283, |
|
"loss": 1.029, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.39718277335180696, |
|
"grad_norm": 0.5397433381233686, |
|
"learning_rate": 0.0001508356437937512, |
|
"loss": 1.0527, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.39872224146557367, |
|
"grad_norm": 0.7141429909374478, |
|
"learning_rate": 0.00015037197987363338, |
|
"loss": 1.0348, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.4002617095793403, |
|
"grad_norm": 0.4435301474074775, |
|
"learning_rate": 0.0001499068602688532, |
|
"loss": 1.0702, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.401801177693107, |
|
"grad_norm": 0.5168064407110803, |
|
"learning_rate": 0.00014944029842076185, |
|
"loss": 1.0367, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.40334064580687373, |
|
"grad_norm": 0.5215356272036379, |
|
"learning_rate": 0.0001489723078123896, |
|
"loss": 1.0458, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.40488011392064044, |
|
"grad_norm": 0.47530585507691386, |
|
"learning_rate": 0.00014850290196805594, |
|
"loss": 1.0496, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.4064195820344071, |
|
"grad_norm": 0.4940892436164985, |
|
"learning_rate": 0.00014803209445297887, |
|
"loss": 1.0142, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.4079590501481738, |
|
"grad_norm": 0.5452384723470196, |
|
"learning_rate": 0.00014755989887288285, |
|
"loss": 1.0494, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.4094985182619405, |
|
"grad_norm": 0.6302906153858793, |
|
"learning_rate": 0.00014708632887360564, |
|
"loss": 1.0474, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.4110379863757072, |
|
"grad_norm": 0.5314488438447993, |
|
"learning_rate": 0.0001466113981407039, |
|
"loss": 1.0539, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.41257745448947386, |
|
"grad_norm": 0.5583239486533321, |
|
"learning_rate": 0.00014613512039905765, |
|
"loss": 1.0425, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.41411692260324057, |
|
"grad_norm": 0.5284692971206317, |
|
"learning_rate": 0.00014565750941247386, |
|
"loss": 1.02, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.4156563907170073, |
|
"grad_norm": 0.5504329155738187, |
|
"learning_rate": 0.0001451785789832884, |
|
"loss": 1.0266, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.417195858830774, |
|
"grad_norm": 0.7377959516300412, |
|
"learning_rate": 0.00014469834295196743, |
|
"loss": 1.0567, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.4187353269445407, |
|
"grad_norm": 0.5133810725829329, |
|
"learning_rate": 0.00014421681519670722, |
|
"loss": 1.0516, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.42027479505830734, |
|
"grad_norm": 0.5092592418186027, |
|
"learning_rate": 0.0001437340096330332, |
|
"loss": 1.058, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.42181426317207404, |
|
"grad_norm": 0.6808151766449716, |
|
"learning_rate": 0.0001432499402133979, |
|
"loss": 1.046, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.42335373128584075, |
|
"grad_norm": 0.49715289560889164, |
|
"learning_rate": 0.0001427646209267775, |
|
"loss": 1.0384, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.42489319939960746, |
|
"grad_norm": 0.4073699217300379, |
|
"learning_rate": 0.00014227806579826774, |
|
"loss": 1.0181, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.4264326675133741, |
|
"grad_norm": 0.5386638584236078, |
|
"learning_rate": 0.00014179028888867867, |
|
"loss": 1.0816, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.4279721356271408, |
|
"grad_norm": 0.7007555607650736, |
|
"learning_rate": 0.00014130130429412815, |
|
"loss": 1.0364, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.4295116037409075, |
|
"grad_norm": 0.6291522129011135, |
|
"learning_rate": 0.0001408111261456346, |
|
"loss": 1.018, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.43105107185467423, |
|
"grad_norm": 0.4183183608173097, |
|
"learning_rate": 0.00014031976860870855, |
|
"loss": 1.0159, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.4325905399684409, |
|
"grad_norm": 0.42462992648226483, |
|
"learning_rate": 0.00013982724588294335, |
|
"loss": 1.026, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.4341300080822076, |
|
"grad_norm": 0.5342525321314037, |
|
"learning_rate": 0.00013933357220160476, |
|
"loss": 1.0668, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.4356694761959743, |
|
"grad_norm": 0.5175732551515752, |
|
"learning_rate": 0.00013883876183121973, |
|
"loss": 1.0613, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.437208944309741, |
|
"grad_norm": 0.5236448642390229, |
|
"learning_rate": 0.000138342829071164, |
|
"loss": 1.0505, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.4387484124235077, |
|
"grad_norm": 0.47885357545447377, |
|
"learning_rate": 0.00013784578825324885, |
|
"loss": 1.0181, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.44028788053727436, |
|
"grad_norm": 0.567561554601715, |
|
"learning_rate": 0.00013734765374130717, |
|
"loss": 1.0337, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.44182734865104106, |
|
"grad_norm": 0.4640525016441619, |
|
"learning_rate": 0.00013684843993077788, |
|
"loss": 1.0195, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.44336681676480777, |
|
"grad_norm": 0.6957136092069967, |
|
"learning_rate": 0.00013634816124829063, |
|
"loss": 1.0252, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.4449062848785745, |
|
"grad_norm": 0.414100639900688, |
|
"learning_rate": 0.0001358468321512481, |
|
"loss": 1.0625, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.4464457529923411, |
|
"grad_norm": 0.4358981186515927, |
|
"learning_rate": 0.00013534446712740877, |
|
"loss": 1.0336, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.44798522110610783, |
|
"grad_norm": 0.7000263701281964, |
|
"learning_rate": 0.0001348410806944681, |
|
"loss": 1.0247, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.44952468921987454, |
|
"grad_norm": 0.48487218580357255, |
|
"learning_rate": 0.00013433668739963882, |
|
"loss": 1.0306, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.45106415733364125, |
|
"grad_norm": 0.48692168534098523, |
|
"learning_rate": 0.00013383130181923071, |
|
"loss": 1.0311, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.4526036254474079, |
|
"grad_norm": 0.6087744044400206, |
|
"learning_rate": 0.00013332493855822936, |
|
"loss": 1.0138, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.4541430935611746, |
|
"grad_norm": 0.5570901501341506, |
|
"learning_rate": 0.00013281761224987398, |
|
"loss": 1.0121, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.4556825616749413, |
|
"grad_norm": 0.49929406825113404, |
|
"learning_rate": 0.00013230933755523466, |
|
"loss": 1.0345, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.457222029788708, |
|
"grad_norm": 0.7306093657667501, |
|
"learning_rate": 0.00013180012916278854, |
|
"loss": 1.0472, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.45876149790247467, |
|
"grad_norm": 0.46118596282419816, |
|
"learning_rate": 0.00013129000178799548, |
|
"loss": 1.0441, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.4603009660162414, |
|
"grad_norm": 0.4542565239229393, |
|
"learning_rate": 0.00013077897017287272, |
|
"loss": 1.0092, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.4618404341300081, |
|
"grad_norm": 0.4921710657691845, |
|
"learning_rate": 0.00013026704908556888, |
|
"loss": 1.0637, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4633799022437748, |
|
"grad_norm": 0.5001776822700446, |
|
"learning_rate": 0.0001297542533199371, |
|
"loss": 1.0483, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.4649193703575415, |
|
"grad_norm": 0.463427010769576, |
|
"learning_rate": 0.00012924059769510768, |
|
"loss": 1.0388, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.46645883847130815, |
|
"grad_norm": 0.5055392500122334, |
|
"learning_rate": 0.00012872609705505964, |
|
"loss": 1.0576, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.46799830658507485, |
|
"grad_norm": 0.4980518480504977, |
|
"learning_rate": 0.00012821076626819196, |
|
"loss": 1.0485, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.46953777469884156, |
|
"grad_norm": 0.4390801115545415, |
|
"learning_rate": 0.00012769462022689363, |
|
"loss": 1.029, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.47107724281260827, |
|
"grad_norm": 0.48684411734981153, |
|
"learning_rate": 0.0001271776738471136, |
|
"loss": 1.0353, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.4726167109263749, |
|
"grad_norm": 0.6254296920632078, |
|
"learning_rate": 0.00012665994206792938, |
|
"loss": 1.0662, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.4741561790401416, |
|
"grad_norm": 0.4828153968193146, |
|
"learning_rate": 0.00012614143985111565, |
|
"loss": 1.1062, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.47569564715390833, |
|
"grad_norm": 0.6105347585674029, |
|
"learning_rate": 0.00012562218218071164, |
|
"loss": 1.0285, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.47723511526767504, |
|
"grad_norm": 0.4580317847852175, |
|
"learning_rate": 0.0001251021840625883, |
|
"loss": 1.0257, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.4787745833814417, |
|
"grad_norm": 0.49671748534126925, |
|
"learning_rate": 0.00012458146052401442, |
|
"loss": 1.0347, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.4803140514952084, |
|
"grad_norm": 0.5504358336120402, |
|
"learning_rate": 0.00012406002661322264, |
|
"loss": 1.0246, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.4818535196089751, |
|
"grad_norm": 0.4998876160614867, |
|
"learning_rate": 0.00012353789739897437, |
|
"loss": 1.0574, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.4833929877227418, |
|
"grad_norm": 0.5729717160065313, |
|
"learning_rate": 0.00012301508797012432, |
|
"loss": 1.0365, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.4849324558365085, |
|
"grad_norm": 0.47052381273276284, |
|
"learning_rate": 0.00012249161343518466, |
|
"loss": 1.0173, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.48647192395027516, |
|
"grad_norm": 0.46095882989505704, |
|
"learning_rate": 0.00012196748892188816, |
|
"loss": 1.0497, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.48801139206404187, |
|
"grad_norm": 0.654430908611628, |
|
"learning_rate": 0.00012144272957675108, |
|
"loss": 1.0312, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.4895508601778086, |
|
"grad_norm": 0.45228923273407706, |
|
"learning_rate": 0.00012091735056463562, |
|
"loss": 1.0093, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.4910903282915753, |
|
"grad_norm": 0.6127718568028115, |
|
"learning_rate": 0.00012039136706831145, |
|
"loss": 1.0671, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.49262979640534194, |
|
"grad_norm": 0.6260979638098746, |
|
"learning_rate": 0.00011986479428801709, |
|
"loss": 1.0436, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.49416926451910864, |
|
"grad_norm": 0.4570968434668229, |
|
"learning_rate": 0.00011933764744102058, |
|
"loss": 1.0229, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.49570873263287535, |
|
"grad_norm": 0.5515514688898091, |
|
"learning_rate": 0.00011880994176117976, |
|
"loss": 1.0449, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.49724820074664206, |
|
"grad_norm": 0.42355848666393986, |
|
"learning_rate": 0.00011828169249850201, |
|
"loss": 1.0453, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.4987876688604087, |
|
"grad_norm": 0.5463051906159438, |
|
"learning_rate": 0.00011775291491870351, |
|
"loss": 1.0271, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.5003271369741754, |
|
"grad_norm": 0.4043160515062089, |
|
"learning_rate": 0.00011722362430276816, |
|
"loss": 1.0377, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.5018666050879421, |
|
"grad_norm": 0.8243302494042759, |
|
"learning_rate": 0.00011669383594650593, |
|
"loss": 1.0663, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.5034060732017088, |
|
"grad_norm": 0.4547900229781827, |
|
"learning_rate": 0.00011616356516011083, |
|
"loss": 1.0159, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.5049455413154755, |
|
"grad_norm": 0.5227895926154218, |
|
"learning_rate": 0.00011563282726771847, |
|
"loss": 1.0515, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.5064850094292422, |
|
"grad_norm": 0.5635603323984416, |
|
"learning_rate": 0.0001151016376069632, |
|
"loss": 1.0701, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.508024477543009, |
|
"grad_norm": 0.47093920101316833, |
|
"learning_rate": 0.00011457001152853493, |
|
"loss": 1.0199, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.5095639456567755, |
|
"grad_norm": 0.4344516158624125, |
|
"learning_rate": 0.00011403796439573544, |
|
"loss": 1.038, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.5111034137705422, |
|
"grad_norm": 0.49922332593006863, |
|
"learning_rate": 0.00011350551158403442, |
|
"loss": 1.0593, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.512642881884309, |
|
"grad_norm": 0.42072128316053065, |
|
"learning_rate": 0.0001129726684806252, |
|
"loss": 1.0147, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.5141823499980757, |
|
"grad_norm": 0.4531404971038023, |
|
"learning_rate": 0.00011243945048398003, |
|
"loss": 1.0215, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.5157218181118424, |
|
"grad_norm": 0.4470522441927525, |
|
"learning_rate": 0.000111905873003405, |
|
"loss": 1.0323, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.5172612862256091, |
|
"grad_norm": 0.5867123309400825, |
|
"learning_rate": 0.00011137195145859494, |
|
"loss": 1.0046, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.5188007543393758, |
|
"grad_norm": 0.3962300365929646, |
|
"learning_rate": 0.00011083770127918762, |
|
"loss": 1.0043, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.5203402224531425, |
|
"grad_norm": 0.7925921747531262, |
|
"learning_rate": 0.00011030313790431788, |
|
"loss": 1.049, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.5218796905669091, |
|
"grad_norm": 0.5078998609582865, |
|
"learning_rate": 0.00010976827678217161, |
|
"loss": 1.0109, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.5234191586806758, |
|
"grad_norm": 0.4109138516526712, |
|
"learning_rate": 0.00010923313336953913, |
|
"loss": 1.0172, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.5249586267944425, |
|
"grad_norm": 0.5367265822698736, |
|
"learning_rate": 0.00010869772313136861, |
|
"loss": 1.0285, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.5264980949082092, |
|
"grad_norm": 0.45736224715965773, |
|
"learning_rate": 0.00010816206154031916, |
|
"loss": 1.0146, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.5280375630219759, |
|
"grad_norm": 0.46461493826686345, |
|
"learning_rate": 0.00010762616407631356, |
|
"loss": 1.0783, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.5295770311357426, |
|
"grad_norm": 0.6140868832303202, |
|
"learning_rate": 0.00010709004622609116, |
|
"loss": 1.0748, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.5311164992495093, |
|
"grad_norm": 0.6800071721536182, |
|
"learning_rate": 0.00010655372348276006, |
|
"loss": 1.0272, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.532655967363276, |
|
"grad_norm": 0.5202497163301416, |
|
"learning_rate": 0.00010601721134534959, |
|
"loss": 1.0379, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.5341954354770427, |
|
"grad_norm": 0.4826223398674556, |
|
"learning_rate": 0.00010548052531836223, |
|
"loss": 1.0357, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.5357349035908093, |
|
"grad_norm": 1.0784079166978915, |
|
"learning_rate": 0.00010494368091132576, |
|
"loss": 1.0378, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.537274371704576, |
|
"grad_norm": 0.4567747436804465, |
|
"learning_rate": 0.00010440669363834483, |
|
"loss": 1.0188, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.5388138398183427, |
|
"grad_norm": 0.48032495281062815, |
|
"learning_rate": 0.00010386957901765277, |
|
"loss": 1.034, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.5403533079321095, |
|
"grad_norm": 0.5026624164429033, |
|
"learning_rate": 0.00010333235257116313, |
|
"loss": 0.9786, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.5418927760458762, |
|
"grad_norm": 0.48003816426131585, |
|
"learning_rate": 0.00010279502982402103, |
|
"loss": 1.0486, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.5434322441596429, |
|
"grad_norm": 0.5389477916705987, |
|
"learning_rate": 0.00010225762630415457, |
|
"loss": 1.017, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.5449717122734096, |
|
"grad_norm": 0.537004045922141, |
|
"learning_rate": 0.00010172015754182607, |
|
"loss": 1.0444, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.5465111803871763, |
|
"grad_norm": 1.929543100464396, |
|
"learning_rate": 0.00010118263906918331, |
|
"loss": 1.0451, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.5480506485009429, |
|
"grad_norm": 0.4940854946649758, |
|
"learning_rate": 0.00010064508641981054, |
|
"loss": 1.0013, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.5495901166147096, |
|
"grad_norm": 0.4816239235355272, |
|
"learning_rate": 0.0001001075151282798, |
|
"loss": 1.0568, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.5511295847284763, |
|
"grad_norm": 0.4644540038274215, |
|
"learning_rate": 9.956994072970179e-05, |
|
"loss": 1.0332, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.552669052842243, |
|
"grad_norm": 0.39735664952249866, |
|
"learning_rate": 9.903237875927698e-05, |
|
"loss": 1.0716, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.5542085209560097, |
|
"grad_norm": 0.42204715257283004, |
|
"learning_rate": 9.849484475184672e-05, |
|
"loss": 1.0204, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.5557479890697764, |
|
"grad_norm": 0.5226725559414837, |
|
"learning_rate": 9.795735424144428e-05, |
|
"loss": 1.0156, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.5572874571835431, |
|
"grad_norm": 0.5721667039150645, |
|
"learning_rate": 9.74199227608459e-05, |
|
"loss": 1.0381, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.5588269252973098, |
|
"grad_norm": 0.6792294345813789, |
|
"learning_rate": 9.688256584112192e-05, |
|
"loss": 1.0216, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.5603663934110765, |
|
"grad_norm": 0.5038747704164618, |
|
"learning_rate": 9.634529901118799e-05, |
|
"loss": 1.0309, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.5619058615248431, |
|
"grad_norm": 1.0905768705043635, |
|
"learning_rate": 9.580813779735624e-05, |
|
"loss": 1.0417, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.5634453296386098, |
|
"grad_norm": 0.7259613249848627, |
|
"learning_rate": 9.52710977228867e-05, |
|
"loss": 1.0348, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.5649847977523765, |
|
"grad_norm": 0.48026375265768084, |
|
"learning_rate": 9.473419430753864e-05, |
|
"loss": 0.979, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.5665242658661432, |
|
"grad_norm": 0.5445245479851468, |
|
"learning_rate": 9.419744306712197e-05, |
|
"loss": 1.0099, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.56806373397991, |
|
"grad_norm": 0.4377954309966568, |
|
"learning_rate": 9.3660859513049e-05, |
|
"loss": 1.0681, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.5696032020936767, |
|
"grad_norm": 0.7151357125545018, |
|
"learning_rate": 9.312445915188609e-05, |
|
"loss": 1.033, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.5711426702074434, |
|
"grad_norm": 0.5258015505597993, |
|
"learning_rate": 9.258825748490558e-05, |
|
"loss": 1.0492, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.5726821383212101, |
|
"grad_norm": 0.5535249863969953, |
|
"learning_rate": 9.205227000763788e-05, |
|
"loss": 1.0444, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.5742216064349767, |
|
"grad_norm": 0.5852809671349769, |
|
"learning_rate": 9.151651220942349e-05, |
|
"loss": 1.0532, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.5757610745487434, |
|
"grad_norm": 0.5927067468229194, |
|
"learning_rate": 9.098099957296552e-05, |
|
"loss": 1.0419, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.5773005426625101, |
|
"grad_norm": 0.5038197549382673, |
|
"learning_rate": 9.044574757388224e-05, |
|
"loss": 1.0342, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.5788400107762768, |
|
"grad_norm": 0.45976069318212914, |
|
"learning_rate": 8.991077168025976e-05, |
|
"loss": 1.0346, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.5803794788900435, |
|
"grad_norm": 0.6004992124353027, |
|
"learning_rate": 8.937608735220527e-05, |
|
"loss": 1.0457, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.5819189470038102, |
|
"grad_norm": 0.6603686161055189, |
|
"learning_rate": 8.884171004139996e-05, |
|
"loss": 1.0233, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.5834584151175769, |
|
"grad_norm": 0.502662627180952, |
|
"learning_rate": 8.830765519065262e-05, |
|
"loss": 0.9875, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.5849978832313436, |
|
"grad_norm": 0.7865449254332316, |
|
"learning_rate": 8.777393823345343e-05, |
|
"loss": 1.0493, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5865373513451103, |
|
"grad_norm": 0.4226972216471069, |
|
"learning_rate": 8.724057459352784e-05, |
|
"loss": 0.9935, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.5880768194588769, |
|
"grad_norm": 0.5958711278623102, |
|
"learning_rate": 8.670757968439086e-05, |
|
"loss": 1.0631, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.5896162875726436, |
|
"grad_norm": 0.5199697414979809, |
|
"learning_rate": 8.617496890890179e-05, |
|
"loss": 1.0343, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.5911557556864103, |
|
"grad_norm": 0.6008327984030076, |
|
"learning_rate": 8.564275765881887e-05, |
|
"loss": 1.0423, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.592695223800177, |
|
"grad_norm": 0.44604940325807296, |
|
"learning_rate": 8.511096131435454e-05, |
|
"loss": 1.0175, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.5942346919139437, |
|
"grad_norm": 0.42675758280608317, |
|
"learning_rate": 8.457959524373109e-05, |
|
"loss": 1.0307, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.5957741600277104, |
|
"grad_norm": 0.5597345800622264, |
|
"learning_rate": 8.404867480273636e-05, |
|
"loss": 1.058, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.5973136281414771, |
|
"grad_norm": 0.4451211247529823, |
|
"learning_rate": 8.351821533428023e-05, |
|
"loss": 1.0313, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.5988530962552439, |
|
"grad_norm": 0.407092060921353, |
|
"learning_rate": 8.298823216795093e-05, |
|
"loss": 1.0506, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 0.6003925643690106, |
|
"grad_norm": 0.43348547367002177, |
|
"learning_rate": 8.245874061957224e-05, |
|
"loss": 1.0402, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.6019320324827772, |
|
"grad_norm": 0.4472151967667229, |
|
"learning_rate": 8.192975599076078e-05, |
|
"loss": 1.0168, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 0.6034715005965439, |
|
"grad_norm": 0.3853019563837129, |
|
"learning_rate": 8.140129356848387e-05, |
|
"loss": 1.0206, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.6050109687103106, |
|
"grad_norm": 0.4721720358418525, |
|
"learning_rate": 8.087336862461783e-05, |
|
"loss": 1.0112, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 0.6065504368240773, |
|
"grad_norm": 0.46617796491326624, |
|
"learning_rate": 8.034599641550642e-05, |
|
"loss": 1.0484, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.608089904937844, |
|
"grad_norm": 0.5930150370566495, |
|
"learning_rate": 7.981919218152016e-05, |
|
"loss": 1.0019, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.6096293730516107, |
|
"grad_norm": 0.515110909779576, |
|
"learning_rate": 7.929297114661581e-05, |
|
"loss": 1.0182, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.6111688411653774, |
|
"grad_norm": 0.45067927700399973, |
|
"learning_rate": 7.876734851789643e-05, |
|
"loss": 1.048, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.6127083092791441, |
|
"grad_norm": 0.48025135310003386, |
|
"learning_rate": 7.824233948517185e-05, |
|
"loss": 1.0499, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.6142477773929107, |
|
"grad_norm": 0.3688608602701041, |
|
"learning_rate": 7.771795922051999e-05, |
|
"loss": 1.0493, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.6157872455066774, |
|
"grad_norm": 0.47692666795291627, |
|
"learning_rate": 7.719422287784798e-05, |
|
"loss": 1.018, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6173267136204441, |
|
"grad_norm": 0.43950677355297135, |
|
"learning_rate": 7.667114559245451e-05, |
|
"loss": 1.0086, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 0.6188661817342108, |
|
"grad_norm": 0.5992589739511112, |
|
"learning_rate": 7.614874248059238e-05, |
|
"loss": 1.0286, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.6204056498479775, |
|
"grad_norm": 0.5113558280370261, |
|
"learning_rate": 7.56270286390316e-05, |
|
"loss": 1.0196, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.6219451179617442, |
|
"grad_norm": 0.48971205943506146, |
|
"learning_rate": 7.510601914462331e-05, |
|
"loss": 1.0021, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.6234845860755109, |
|
"grad_norm": 2.031201004984545, |
|
"learning_rate": 7.458572905386381e-05, |
|
"loss": 1.0068, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.6250240541892776, |
|
"grad_norm": 0.43332008516546355, |
|
"learning_rate": 7.406617340245957e-05, |
|
"loss": 1.0573, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.6265635223030444, |
|
"grad_norm": 0.40359150277813904, |
|
"learning_rate": 7.354736720489273e-05, |
|
"loss": 1.0114, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 0.628102990416811, |
|
"grad_norm": 0.5109830787118577, |
|
"learning_rate": 7.302932545398721e-05, |
|
"loss": 1.0325, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.6296424585305777, |
|
"grad_norm": 0.33695770975399714, |
|
"learning_rate": 7.251206312047547e-05, |
|
"loss": 0.9971, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 0.6311819266443444, |
|
"grad_norm": 0.4478834681612594, |
|
"learning_rate": 7.199559515256573e-05, |
|
"loss": 1.0551, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.6327213947581111, |
|
"grad_norm": 0.49179933979909946, |
|
"learning_rate": 7.14799364755101e-05, |
|
"loss": 1.0181, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 0.6342608628718778, |
|
"grad_norm": 0.476264555556864, |
|
"learning_rate": 7.096510199117327e-05, |
|
"loss": 1.0346, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.6358003309856445, |
|
"grad_norm": 0.45265771641362673, |
|
"learning_rate": 7.045110657760179e-05, |
|
"loss": 1.0442, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 0.6373397990994112, |
|
"grad_norm": 0.4988192869036288, |
|
"learning_rate": 6.993796508859418e-05, |
|
"loss": 1.029, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.6388792672131779, |
|
"grad_norm": 0.4886571640531873, |
|
"learning_rate": 6.942569235327167e-05, |
|
"loss": 1.0275, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.6404187353269445, |
|
"grad_norm": 0.43831855733672936, |
|
"learning_rate": 6.891430317564964e-05, |
|
"loss": 1.0307, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.6419582034407112, |
|
"grad_norm": 0.5860619060156503, |
|
"learning_rate": 6.840381233420973e-05, |
|
"loss": 1.0265, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 0.6434976715544779, |
|
"grad_norm": 0.44236844600518777, |
|
"learning_rate": 6.789423458147292e-05, |
|
"loss": 1.0283, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.6450371396682446, |
|
"grad_norm": 0.4876716811387609, |
|
"learning_rate": 6.738558464357305e-05, |
|
"loss": 1.0504, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 0.6465766077820113, |
|
"grad_norm": 0.41217817376284266, |
|
"learning_rate": 6.687787721983136e-05, |
|
"loss": 1.0725, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.648116075895778, |
|
"grad_norm": 0.5066902107865301, |
|
"learning_rate": 6.63711269823317e-05, |
|
"loss": 1.0168, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 0.6496555440095447, |
|
"grad_norm": 0.8959396144411056, |
|
"learning_rate": 6.586534857549638e-05, |
|
"loss": 1.0367, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.6511950121233114, |
|
"grad_norm": 0.41740196707008226, |
|
"learning_rate": 6.536055661566312e-05, |
|
"loss": 1.0603, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 0.6527344802370781, |
|
"grad_norm": 0.4285108933692639, |
|
"learning_rate": 6.485676569066258e-05, |
|
"loss": 1.0701, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.6542739483508447, |
|
"grad_norm": 0.490878565864062, |
|
"learning_rate": 6.43539903593969e-05, |
|
"loss": 1.0274, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.6558134164646114, |
|
"grad_norm": 0.4955255588450408, |
|
"learning_rate": 6.385224515141879e-05, |
|
"loss": 1.0213, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.6573528845783781, |
|
"grad_norm": 0.4943547579484523, |
|
"learning_rate": 6.335154456651178e-05, |
|
"loss": 1.0424, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 0.6588923526921449, |
|
"grad_norm": 0.7266332089943804, |
|
"learning_rate": 6.285190307427114e-05, |
|
"loss": 1.0646, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.6604318208059116, |
|
"grad_norm": 0.4121244632336502, |
|
"learning_rate": 6.235333511368573e-05, |
|
"loss": 1.0555, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 0.6619712889196783, |
|
"grad_norm": 0.5185060524362, |
|
"learning_rate": 6.185585509272078e-05, |
|
"loss": 1.0469, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.663510757033445, |
|
"grad_norm": 0.4631669349204426, |
|
"learning_rate": 6.135947738790145e-05, |
|
"loss": 1.0567, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 0.6650502251472117, |
|
"grad_norm": 0.5057075839366066, |
|
"learning_rate": 6.0864216343897365e-05, |
|
"loss": 1.0298, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.6665896932609783, |
|
"grad_norm": 0.5634732740375421, |
|
"learning_rate": 6.0370086273108205e-05, |
|
"loss": 1.0629, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 0.668129161374745, |
|
"grad_norm": 0.5200825511831939, |
|
"learning_rate": 5.987710145524992e-05, |
|
"loss": 1.0519, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.6696686294885117, |
|
"grad_norm": 0.4345129610373104, |
|
"learning_rate": 5.938527613694214e-05, |
|
"loss": 1.0321, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.6712080976022784, |
|
"grad_norm": 0.7454797877228255, |
|
"learning_rate": 5.8894624531296486e-05, |
|
"loss": 0.9871, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.6727475657160451, |
|
"grad_norm": 0.38956096747028734, |
|
"learning_rate": 5.840516081750583e-05, |
|
"loss": 1.0224, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 0.6742870338298118, |
|
"grad_norm": 0.4637705509612328, |
|
"learning_rate": 5.791689914043447e-05, |
|
"loss": 1.066, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.6758265019435785, |
|
"grad_norm": 0.4296286070627721, |
|
"learning_rate": 5.742985361020945e-05, |
|
"loss": 1.0286, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 0.6773659700573452, |
|
"grad_norm": 0.4514049815518666, |
|
"learning_rate": 5.69440383018127e-05, |
|
"loss": 1.0443, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.6789054381711119, |
|
"grad_norm": 0.4614392154051119, |
|
"learning_rate": 5.6459467254674435e-05, |
|
"loss": 0.9877, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.6804449062848785, |
|
"grad_norm": 0.5704518491798019, |
|
"learning_rate": 5.597615447226724e-05, |
|
"loss": 1.0337, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.6819843743986452, |
|
"grad_norm": 0.39999150350123325, |
|
"learning_rate": 5.549411392170154e-05, |
|
"loss": 1.0088, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 0.6835238425124119, |
|
"grad_norm": 0.42287100632286095, |
|
"learning_rate": 5.501335953332187e-05, |
|
"loss": 1.0358, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.6850633106261786, |
|
"grad_norm": 0.4444385205318582, |
|
"learning_rate": 5.453390520030439e-05, |
|
"loss": 1.0274, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.6866027787399454, |
|
"grad_norm": 0.4681868371661994, |
|
"learning_rate": 5.405576477825538e-05, |
|
"loss": 1.0135, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.6881422468537121, |
|
"grad_norm": 0.3647614554162627, |
|
"learning_rate": 5.3578952084810765e-05, |
|
"loss": 1.0201, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 0.6896817149674788, |
|
"grad_norm": 0.4675279093656959, |
|
"learning_rate": 5.310348089923681e-05, |
|
"loss": 1.0641, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.6912211830812455, |
|
"grad_norm": 0.5708390560359091, |
|
"learning_rate": 5.2629364962032004e-05, |
|
"loss": 1.0365, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 0.6927606511950122, |
|
"grad_norm": 5.18192671698193, |
|
"learning_rate": 5.2156617974529886e-05, |
|
"loss": 1.0407, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.6943001193087788, |
|
"grad_norm": 0.4187190501217835, |
|
"learning_rate": 5.1685253598503116e-05, |
|
"loss": 1.0471, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 0.6958395874225455, |
|
"grad_norm": 0.6110758500884396, |
|
"learning_rate": 5.1215285455768794e-05, |
|
"loss": 1.0067, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.6973790555363122, |
|
"grad_norm": 1.2232875510104708, |
|
"learning_rate": 5.074672712779456e-05, |
|
"loss": 1.0186, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 0.6989185236500789, |
|
"grad_norm": 0.4924706604640269, |
|
"learning_rate": 5.0279592155306286e-05, |
|
"loss": 1.013, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.7004579917638456, |
|
"grad_norm": 0.5351327628863634, |
|
"learning_rate": 4.9813894037896747e-05, |
|
"loss": 1.0147, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.7019974598776123, |
|
"grad_norm": 0.5691385515488996, |
|
"learning_rate": 4.93496462336354e-05, |
|
"loss": 1.0543, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.703536927991379, |
|
"grad_norm": 0.45370659861849166, |
|
"learning_rate": 4.8886862158679714e-05, |
|
"loss": 1.0502, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 0.7050763961051457, |
|
"grad_norm": 0.39906039100573537, |
|
"learning_rate": 4.8425555186887096e-05, |
|
"loss": 1.027, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.7066158642189123, |
|
"grad_norm": 0.48115067130308187, |
|
"learning_rate": 4.796573864942868e-05, |
|
"loss": 1.0515, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 0.708155332332679, |
|
"grad_norm": 0.43162848352113553, |
|
"learning_rate": 4.750742583440397e-05, |
|
"loss": 1.0054, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.7096948004464457, |
|
"grad_norm": 0.40152697201685444, |
|
"learning_rate": 4.7050629986456873e-05, |
|
"loss": 1.0279, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 0.7112342685602124, |
|
"grad_norm": 0.43021780150976113, |
|
"learning_rate": 4.65953643063929e-05, |
|
"loss": 1.0391, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.7127737366739791, |
|
"grad_norm": 0.42088619664607274, |
|
"learning_rate": 4.6141641950797645e-05, |
|
"loss": 0.988, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 0.7143132047877458, |
|
"grad_norm": 0.4851450074591337, |
|
"learning_rate": 4.5689476031656784e-05, |
|
"loss": 1.0581, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.7158526729015126, |
|
"grad_norm": 0.5755620737371696, |
|
"learning_rate": 4.523887961597688e-05, |
|
"loss": 1.0462, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.7173921410152793, |
|
"grad_norm": 0.41663849058693325, |
|
"learning_rate": 4.4789865725407934e-05, |
|
"loss": 1.0329, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.718931609129046, |
|
"grad_norm": 0.47615081033464657, |
|
"learning_rate": 4.434244733586699e-05, |
|
"loss": 1.0076, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 0.7204710772428126, |
|
"grad_norm": 0.534330404158238, |
|
"learning_rate": 4.389663737716324e-05, |
|
"loss": 1.0224, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.7220105453565793, |
|
"grad_norm": 0.439745239024214, |
|
"learning_rate": 4.3452448732624264e-05, |
|
"loss": 1.0034, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 0.723550013470346, |
|
"grad_norm": 0.46055944759568124, |
|
"learning_rate": 4.3009894238723856e-05, |
|
"loss": 1.0089, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.7250894815841127, |
|
"grad_norm": 0.5413958198748791, |
|
"learning_rate": 4.256898668471092e-05, |
|
"loss": 1.0401, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 0.7266289496978794, |
|
"grad_norm": 0.460713247825251, |
|
"learning_rate": 4.212973881223994e-05, |
|
"loss": 1.0205, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.7281684178116461, |
|
"grad_norm": 0.4721070044498834, |
|
"learning_rate": 4.1692163315002784e-05, |
|
"loss": 1.0292, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 0.7297078859254128, |
|
"grad_norm": 0.448843218109221, |
|
"learning_rate": 4.125627283836184e-05, |
|
"loss": 1.0274, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.7312473540391795, |
|
"grad_norm": 0.4152647845453433, |
|
"learning_rate": 4.082207997898457e-05, |
|
"loss": 0.9903, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.7327868221529461, |
|
"grad_norm": 0.386277666412621, |
|
"learning_rate": 4.0389597284479595e-05, |
|
"loss": 1.0103, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.7343262902667128, |
|
"grad_norm": 0.4141845583933384, |
|
"learning_rate": 3.995883725303392e-05, |
|
"loss": 1.0356, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 0.7358657583804795, |
|
"grad_norm": 0.4038330356544352, |
|
"learning_rate": 3.952981233305183e-05, |
|
"loss": 1.0368, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.7374052264942462, |
|
"grad_norm": 0.4159685288158201, |
|
"learning_rate": 3.9102534922795166e-05, |
|
"loss": 1.0223, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 0.7389446946080129, |
|
"grad_norm": 0.44365052943948013, |
|
"learning_rate": 3.867701737002502e-05, |
|
"loss": 1.0125, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.7404841627217796, |
|
"grad_norm": 0.40830415825064736, |
|
"learning_rate": 3.825327197164483e-05, |
|
"loss": 1.0027, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 0.7420236308355463, |
|
"grad_norm": 0.6118151945433313, |
|
"learning_rate": 3.7831310973345216e-05, |
|
"loss": 1.014, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.743563098949313, |
|
"grad_norm": 0.43923835412327034, |
|
"learning_rate": 3.741114656924983e-05, |
|
"loss": 1.0389, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 0.7451025670630798, |
|
"grad_norm": 0.4961592786809209, |
|
"learning_rate": 3.699279090156315e-05, |
|
"loss": 1.0488, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.7466420351768464, |
|
"grad_norm": 0.5382708745735688, |
|
"learning_rate": 3.6576256060219486e-05, |
|
"loss": 1.0083, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.7481815032906131, |
|
"grad_norm": 0.44692774623072656, |
|
"learning_rate": 3.616155408253367e-05, |
|
"loss": 1.0106, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.7497209714043798, |
|
"grad_norm": 0.42729189430033954, |
|
"learning_rate": 3.574869695285315e-05, |
|
"loss": 1.0088, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 0.7512604395181465, |
|
"grad_norm": 0.4272983775590473, |
|
"learning_rate": 3.5337696602211614e-05, |
|
"loss": 0.9977, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.7527999076319132, |
|
"grad_norm": 0.41113864383958393, |
|
"learning_rate": 3.492856490798439e-05, |
|
"loss": 1.0322, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 0.7543393757456799, |
|
"grad_norm": 0.44309109594960205, |
|
"learning_rate": 3.4521313693544966e-05, |
|
"loss": 1.0556, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.7558788438594466, |
|
"grad_norm": 0.4196493142920476, |
|
"learning_rate": 3.4115954727923395e-05, |
|
"loss": 1.0295, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 0.7574183119732133, |
|
"grad_norm": 0.512441618937211, |
|
"learning_rate": 3.371249972546624e-05, |
|
"loss": 1.0165, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.7589577800869799, |
|
"grad_norm": 1.9609700442905864, |
|
"learning_rate": 3.3310960345497974e-05, |
|
"loss": 1.0584, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 0.7604972482007466, |
|
"grad_norm": 0.4558093754389953, |
|
"learning_rate": 3.291134819198417e-05, |
|
"loss": 1.0328, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.7620367163145133, |
|
"grad_norm": 0.39208217735355977, |
|
"learning_rate": 3.251367481319596e-05, |
|
"loss": 1.039, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.76357618442828, |
|
"grad_norm": 0.4433705457387255, |
|
"learning_rate": 3.2117951701376436e-05, |
|
"loss": 1.0065, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.7651156525420467, |
|
"grad_norm": 0.3825862067647675, |
|
"learning_rate": 3.172419029240853e-05, |
|
"loss": 1.0403, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 0.7666551206558134, |
|
"grad_norm": 0.4572258364515658, |
|
"learning_rate": 3.133240196548447e-05, |
|
"loss": 1.0303, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.7681945887695801, |
|
"grad_norm": 0.84283447972151, |
|
"learning_rate": 3.0942598042777073e-05, |
|
"loss": 1.0669, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 0.7697340568833468, |
|
"grad_norm": 0.4610127698483277, |
|
"learning_rate": 3.0554789789112385e-05, |
|
"loss": 0.9836, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.7712735249971135, |
|
"grad_norm": 0.4325409044446896, |
|
"learning_rate": 3.0168988411644205e-05, |
|
"loss": 1.0065, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 0.7728129931108801, |
|
"grad_norm": 0.45733195048796693, |
|
"learning_rate": 2.9785205059530263e-05, |
|
"loss": 1.0286, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.7743524612246468, |
|
"grad_norm": 0.4692226922974537, |
|
"learning_rate": 2.940345082360997e-05, |
|
"loss": 1.0233, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 0.7758919293384136, |
|
"grad_norm": 0.40703032879120454, |
|
"learning_rate": 2.9023736736083872e-05, |
|
"loss": 1.0503, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.7774313974521803, |
|
"grad_norm": 0.603733310574897, |
|
"learning_rate": 2.864607377019498e-05, |
|
"loss": 1.0381, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.778970865565947, |
|
"grad_norm": 0.44557037962518165, |
|
"learning_rate": 2.82704728399115e-05, |
|
"loss": 1.0297, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.7805103336797137, |
|
"grad_norm": 0.4138521931758361, |
|
"learning_rate": 2.789694479961147e-05, |
|
"loss": 1.0195, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 0.7820498017934804, |
|
"grad_norm": 0.494990799259698, |
|
"learning_rate": 2.7525500443769136e-05, |
|
"loss": 1.0257, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.7835892699072471, |
|
"grad_norm": 0.4425172941594563, |
|
"learning_rate": 2.715615050664294e-05, |
|
"loss": 1.0242, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 0.7851287380210138, |
|
"grad_norm": 0.3752870060929472, |
|
"learning_rate": 2.6788905661965458e-05, |
|
"loss": 1.0265, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.7866682061347804, |
|
"grad_norm": 0.4116609510103765, |
|
"learning_rate": 2.64237765226347e-05, |
|
"loss": 1.0158, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 0.7882076742485471, |
|
"grad_norm": 0.46202655193696285, |
|
"learning_rate": 2.606077364040762e-05, |
|
"loss": 1.0167, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.7897471423623138, |
|
"grad_norm": 0.6202191711943372, |
|
"learning_rate": 2.5699907505595068e-05, |
|
"loss": 0.9672, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 0.7912866104760805, |
|
"grad_norm": 0.480489604669568, |
|
"learning_rate": 2.5341188546758688e-05, |
|
"loss": 0.9945, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.7928260785898472, |
|
"grad_norm": 0.4305543882648283, |
|
"learning_rate": 2.4984627130409577e-05, |
|
"loss": 1.0108, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.7943655467036139, |
|
"grad_norm": 0.37712645766524694, |
|
"learning_rate": 2.4630233560708615e-05, |
|
"loss": 0.9991, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.7959050148173806, |
|
"grad_norm": 0.4457882679562566, |
|
"learning_rate": 2.427801807916874e-05, |
|
"loss": 1.0353, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 0.7974444829311473, |
|
"grad_norm": 0.43372798791794065, |
|
"learning_rate": 2.3927990864358984e-05, |
|
"loss": 1.016, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.7989839510449139, |
|
"grad_norm": 0.5674499912471999, |
|
"learning_rate": 2.358016203161031e-05, |
|
"loss": 1.035, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 0.8005234191586806, |
|
"grad_norm": 0.5654406491640428, |
|
"learning_rate": 2.3234541632723272e-05, |
|
"loss": 1.018, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.8020628872724473, |
|
"grad_norm": 0.3806698162491372, |
|
"learning_rate": 2.2891139655677673e-05, |
|
"loss": 1.0353, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 0.803602355386214, |
|
"grad_norm": 0.41395000872395227, |
|
"learning_rate": 2.2549966024343682e-05, |
|
"loss": 1.0198, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.8051418234999808, |
|
"grad_norm": 1.0022245903452656, |
|
"learning_rate": 2.2211030598195247e-05, |
|
"loss": 1.0366, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 0.8066812916137475, |
|
"grad_norm": 0.37470132686684005, |
|
"learning_rate": 2.187434317202508e-05, |
|
"loss": 1.0154, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.8082207597275142, |
|
"grad_norm": 0.5250486396448188, |
|
"learning_rate": 2.1539913475661576e-05, |
|
"loss": 1.0053, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.8097602278412809, |
|
"grad_norm": 0.4181640130676048, |
|
"learning_rate": 2.1207751173687785e-05, |
|
"loss": 1.0723, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.8112996959550476, |
|
"grad_norm": 0.455626609677034, |
|
"learning_rate": 2.0877865865161915e-05, |
|
"loss": 1.0264, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 0.8128391640688142, |
|
"grad_norm": 0.6557137988990834, |
|
"learning_rate": 2.0550267083340068e-05, |
|
"loss": 1.0192, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.8143786321825809, |
|
"grad_norm": 0.3947380563744468, |
|
"learning_rate": 2.0224964295400682e-05, |
|
"loss": 1.008, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 0.8159181002963476, |
|
"grad_norm": 0.4190129497380854, |
|
"learning_rate": 1.9901966902170944e-05, |
|
"loss": 1.0024, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.8174575684101143, |
|
"grad_norm": 0.4733124256368494, |
|
"learning_rate": 1.95812842378552e-05, |
|
"loss": 1.0224, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 0.818997036523881, |
|
"grad_norm": 0.4565926045173295, |
|
"learning_rate": 1.9262925569765087e-05, |
|
"loss": 1.0503, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.8205365046376477, |
|
"grad_norm": 0.4253658988474481, |
|
"learning_rate": 1.8946900098051778e-05, |
|
"loss": 1.0738, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 0.8220759727514144, |
|
"grad_norm": 0.43002381136104756, |
|
"learning_rate": 1.8633216955440137e-05, |
|
"loss": 1.0504, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.8236154408651811, |
|
"grad_norm": 0.4015805803027289, |
|
"learning_rate": 1.832188520696472e-05, |
|
"loss": 1.0397, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.8251549089789477, |
|
"grad_norm": 0.46899175056681264, |
|
"learning_rate": 1.8012913849707868e-05, |
|
"loss": 1.0154, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.8266943770927144, |
|
"grad_norm": 0.4232662691268248, |
|
"learning_rate": 1.7706311812539757e-05, |
|
"loss": 1.0188, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 0.8282338452064811, |
|
"grad_norm": 0.3588739754281993, |
|
"learning_rate": 1.7402087955860193e-05, |
|
"loss": 1.0213, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.8297733133202478, |
|
"grad_norm": 0.4423853417849867, |
|
"learning_rate": 1.710025107134272e-05, |
|
"loss": 1.0223, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 0.8313127814340145, |
|
"grad_norm": 0.41804031171029893, |
|
"learning_rate": 1.680080988168049e-05, |
|
"loss": 1.0171, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.8328522495477813, |
|
"grad_norm": 0.42006267775596445, |
|
"learning_rate": 1.6503773040334126e-05, |
|
"loss": 0.9947, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 0.834391717661548, |
|
"grad_norm": 0.43670340739758357, |
|
"learning_rate": 1.620914913128184e-05, |
|
"loss": 1.0432, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.8359311857753147, |
|
"grad_norm": 0.4821381027661066, |
|
"learning_rate": 1.591694666877114e-05, |
|
"loss": 1.004, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 0.8374706538890814, |
|
"grad_norm": 0.3935605580571529, |
|
"learning_rate": 1.5627174097072904e-05, |
|
"loss": 0.9947, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.839010122002848, |
|
"grad_norm": 0.5080429213031332, |
|
"learning_rate": 1.533983979023733e-05, |
|
"loss": 1.0572, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.8405495901166147, |
|
"grad_norm": 0.4159778200626771, |
|
"learning_rate": 1.5054952051851934e-05, |
|
"loss": 1.0207, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.8420890582303814, |
|
"grad_norm": 0.4033999184219861, |
|
"learning_rate": 1.477251911480162e-05, |
|
"loss": 1.0353, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 0.8436285263441481, |
|
"grad_norm": 0.4355088544418655, |
|
"learning_rate": 1.4492549141030687e-05, |
|
"loss": 1.0475, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.8451679944579148, |
|
"grad_norm": 0.48537370762792753, |
|
"learning_rate": 1.4215050221307002e-05, |
|
"loss": 1.0206, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 0.8467074625716815, |
|
"grad_norm": 0.37095462049211186, |
|
"learning_rate": 1.394003037498821e-05, |
|
"loss": 1.025, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.8482469306854482, |
|
"grad_norm": 0.4437528225326038, |
|
"learning_rate": 1.3667497549789932e-05, |
|
"loss": 1.0128, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 0.8497863987992149, |
|
"grad_norm": 0.4192896798172941, |
|
"learning_rate": 1.339745962155613e-05, |
|
"loss": 1.0333, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.8513258669129815, |
|
"grad_norm": 0.4323329881502678, |
|
"learning_rate": 1.3129924394031535e-05, |
|
"loss": 0.999, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 0.8528653350267482, |
|
"grad_norm": 0.5312159218939685, |
|
"learning_rate": 1.2864899598636004e-05, |
|
"loss": 1.019, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.8544048031405149, |
|
"grad_norm": 0.37079351875448874, |
|
"learning_rate": 1.2602392894241222e-05, |
|
"loss": 1.0349, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.8559442712542816, |
|
"grad_norm": 0.4245080748700514, |
|
"learning_rate": 1.234241186694931e-05, |
|
"loss": 1.0131, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.8574837393680483, |
|
"grad_norm": 0.4340531910995576, |
|
"learning_rate": 1.208496402987358e-05, |
|
"loss": 1.0245, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 0.859023207481815, |
|
"grad_norm": 0.37626279842019295, |
|
"learning_rate": 1.1830056822921521e-05, |
|
"loss": 1.0043, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.8605626755955817, |
|
"grad_norm": 0.5175737600246689, |
|
"learning_rate": 1.1577697612579641e-05, |
|
"loss": 1.0232, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 0.8621021437093485, |
|
"grad_norm": 0.44974494562352096, |
|
"learning_rate": 1.1327893691700698e-05, |
|
"loss": 0.9889, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.8636416118231152, |
|
"grad_norm": 0.430436147584834, |
|
"learning_rate": 1.1080652279292891e-05, |
|
"loss": 1.0236, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 0.8651810799368818, |
|
"grad_norm": 0.4728221888964366, |
|
"learning_rate": 1.0835980520311251e-05, |
|
"loss": 1.0155, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.8667205480506485, |
|
"grad_norm": 0.397220073379638, |
|
"learning_rate": 1.0593885485451237e-05, |
|
"loss": 1.0099, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 0.8682600161644152, |
|
"grad_norm": 0.5348645223247931, |
|
"learning_rate": 1.0354374170944258e-05, |
|
"loss": 1.0416, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.8697994842781819, |
|
"grad_norm": 0.4043554078765482, |
|
"learning_rate": 1.011745349835559e-05, |
|
"loss": 1.0121, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.8713389523919486, |
|
"grad_norm": 0.468593340879125, |
|
"learning_rate": 9.883130314384348e-06, |
|
"loss": 1.0231, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.8728784205057153, |
|
"grad_norm": 0.46465265214459056, |
|
"learning_rate": 9.651411390665577e-06, |
|
"loss": 0.9918, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 0.874417888619482, |
|
"grad_norm": 0.5605586645440735, |
|
"learning_rate": 9.422303423574596e-06, |
|
"loss": 1.0176, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.8759573567332487, |
|
"grad_norm": 1.775692528658347, |
|
"learning_rate": 9.195813034033508e-06, |
|
"loss": 1.0134, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 0.8774968248470154, |
|
"grad_norm": 0.44922271095321187, |
|
"learning_rate": 8.971946767319805e-06, |
|
"loss": 0.9981, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.879036292960782, |
|
"grad_norm": 0.5098548100487027, |
|
"learning_rate": 8.75071109287724e-06, |
|
"loss": 1.0052, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 0.8805757610745487, |
|
"grad_norm": 0.41578211906898727, |
|
"learning_rate": 8.532112404128877e-06, |
|
"loss": 1.0128, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.8821152291883154, |
|
"grad_norm": 0.3496924486384235, |
|
"learning_rate": 8.316157018292326e-06, |
|
"loss": 1.0479, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 0.8836546973020821, |
|
"grad_norm": 0.3816021309698796, |
|
"learning_rate": 8.102851176197201e-06, |
|
"loss": 1.0103, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.8851941654158488, |
|
"grad_norm": 0.3838865693994203, |
|
"learning_rate": 7.892201042104718e-06, |
|
"loss": 1.0326, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.8867336335296155, |
|
"grad_norm": 0.4294597636569908, |
|
"learning_rate": 7.684212703529624e-06, |
|
"loss": 1.0323, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.8882731016433822, |
|
"grad_norm": 0.43018290128127384, |
|
"learning_rate": 7.4788921710642e-06, |
|
"loss": 1.0188, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 0.889812569757149, |
|
"grad_norm": 0.6183016933222353, |
|
"learning_rate": 7.276245378204616e-06, |
|
"loss": 1.0259, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.8913520378709155, |
|
"grad_norm": 0.46596709036144063, |
|
"learning_rate": 7.076278181179485e-06, |
|
"loss": 1.0316, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 0.8928915059846823, |
|
"grad_norm": 0.45497909906909056, |
|
"learning_rate": 6.878996358780532e-06, |
|
"loss": 1.0326, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.894430974098449, |
|
"grad_norm": 0.4605589314545624, |
|
"learning_rate": 6.684405612195688e-06, |
|
"loss": 1.0184, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 0.8959704422122157, |
|
"grad_norm": 0.35212400219047424, |
|
"learning_rate": 6.492511564844273e-06, |
|
"loss": 0.9976, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.8975099103259824, |
|
"grad_norm": 0.431075612216501, |
|
"learning_rate": 6.303319762214499e-06, |
|
"loss": 1.0282, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 0.8990493784397491, |
|
"grad_norm": 0.49886462139074744, |
|
"learning_rate": 6.11683567170328e-06, |
|
"loss": 1.0453, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.9005888465535158, |
|
"grad_norm": 0.5001791356037993, |
|
"learning_rate": 5.933064682458122e-06, |
|
"loss": 0.9974, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.9021283146672825, |
|
"grad_norm": 0.4334035137279178, |
|
"learning_rate": 5.7520121052214275e-06, |
|
"loss": 1.002, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.9036677827810492, |
|
"grad_norm": 0.45835892877048534, |
|
"learning_rate": 5.57368317217708e-06, |
|
"loss": 1.0524, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 0.9052072508948158, |
|
"grad_norm": 0.3752767008874508, |
|
"learning_rate": 5.398083036799129e-06, |
|
"loss": 0.9691, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.9067467190085825, |
|
"grad_norm": 0.4935328531459081, |
|
"learning_rate": 5.225216773702968e-06, |
|
"loss": 1.0252, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 0.9082861871223492, |
|
"grad_norm": 0.5210304589005964, |
|
"learning_rate": 5.055089378498634e-06, |
|
"loss": 1.0199, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.9098256552361159, |
|
"grad_norm": 0.4254020055130796, |
|
"learning_rate": 4.887705767646434e-06, |
|
"loss": 0.9989, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 0.9113651233498826, |
|
"grad_norm": 0.4643941731342843, |
|
"learning_rate": 4.7230707783148864e-06, |
|
"loss": 1.0074, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.9129045914636493, |
|
"grad_norm": 0.3715160286597917, |
|
"learning_rate": 4.561189168240909e-06, |
|
"loss": 1.0682, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 0.914444059577416, |
|
"grad_norm": 0.36108361076071477, |
|
"learning_rate": 4.402065615592344e-06, |
|
"loss": 0.9939, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.9159835276911827, |
|
"grad_norm": 0.460057449616597, |
|
"learning_rate": 4.245704718832811e-06, |
|
"loss": 1.029, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.9175229958049493, |
|
"grad_norm": 0.42151771172138136, |
|
"learning_rate": 4.092110996588705e-06, |
|
"loss": 1.0416, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.919062463918716, |
|
"grad_norm": 0.4208982471816418, |
|
"learning_rate": 3.941288887518713e-06, |
|
"loss": 1.0083, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 0.9206019320324828, |
|
"grad_norm": 0.3754941722458392, |
|
"learning_rate": 3.7932427501854996e-06, |
|
"loss": 1.0317, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.9221414001462495, |
|
"grad_norm": 0.5370873955203277, |
|
"learning_rate": 3.647976862929747e-06, |
|
"loss": 1.054, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 0.9236808682600162, |
|
"grad_norm": 0.4426665180986752, |
|
"learning_rate": 3.505495423746574e-06, |
|
"loss": 1.0287, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.9252203363737829, |
|
"grad_norm": 0.39355763646149944, |
|
"learning_rate": 3.3658025501641323e-06, |
|
"loss": 1.0538, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 0.9267598044875496, |
|
"grad_norm": 0.3926350659174442, |
|
"learning_rate": 3.228902279124657e-06, |
|
"loss": 1.0269, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.9282992726013163, |
|
"grad_norm": 0.5307688367772885, |
|
"learning_rate": 3.094798566867818e-06, |
|
"loss": 1.0098, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 0.929838740715083, |
|
"grad_norm": 0.5463010628237303, |
|
"learning_rate": 2.963495288816376e-06, |
|
"loss": 1.0698, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.9313782088288496, |
|
"grad_norm": 0.4767217195493804, |
|
"learning_rate": 2.8349962394641605e-06, |
|
"loss": 1.0652, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.9329176769426163, |
|
"grad_norm": 0.46383675807120084, |
|
"learning_rate": 2.709305132266493e-06, |
|
"loss": 1.0566, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.934457145056383, |
|
"grad_norm": 0.406764613161873, |
|
"learning_rate": 2.5864255995327936e-06, |
|
"loss": 1.02, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 0.9359966131701497, |
|
"grad_norm": 0.5667072139952383, |
|
"learning_rate": 2.46636119232162e-06, |
|
"loss": 1.0293, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.9375360812839164, |
|
"grad_norm": 0.4123820408065487, |
|
"learning_rate": 2.349115380338096e-06, |
|
"loss": 1.0528, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 0.9390755493976831, |
|
"grad_norm": 0.42212082973648146, |
|
"learning_rate": 2.2346915518335786e-06, |
|
"loss": 0.9751, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.9406150175114498, |
|
"grad_norm": 0.44633003035441593, |
|
"learning_rate": 2.1230930135078373e-06, |
|
"loss": 1.0071, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 0.9421544856252165, |
|
"grad_norm": 0.6009896163488582, |
|
"learning_rate": 2.014322990413353e-06, |
|
"loss": 1.0275, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.9436939537389831, |
|
"grad_norm": 0.3529405684211488, |
|
"learning_rate": 1.9083846258622586e-06, |
|
"loss": 1.0176, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 0.9452334218527498, |
|
"grad_norm": 0.7896994457794821, |
|
"learning_rate": 1.8052809813354111e-06, |
|
"loss": 1.0063, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.9467728899665165, |
|
"grad_norm": 0.48377625576699135, |
|
"learning_rate": 1.705015036393931e-06, |
|
"loss": 1.057, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.9483123580802832, |
|
"grad_norm": 0.43904259295920206, |
|
"learning_rate": 1.6075896885931807e-06, |
|
"loss": 1.0227, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.94985182619405, |
|
"grad_norm": 0.4480774273472164, |
|
"learning_rate": 1.5130077533988873e-06, |
|
"loss": 1.0369, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 0.9513912943078167, |
|
"grad_norm": 0.4679465739342283, |
|
"learning_rate": 1.421271964105908e-06, |
|
"loss": 1.0623, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.9529307624215834, |
|
"grad_norm": 0.44630980531648395, |
|
"learning_rate": 1.3323849717591376e-06, |
|
"loss": 1.0166, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 0.9544702305353501, |
|
"grad_norm": 0.5287384335577812, |
|
"learning_rate": 1.2463493450769915e-06, |
|
"loss": 1.0425, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.9560096986491168, |
|
"grad_norm": 0.40230009571762976, |
|
"learning_rate": 1.1631675703771105e-06, |
|
"loss": 1.0385, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 0.9575491667628834, |
|
"grad_norm": 0.49211013441821094, |
|
"learning_rate": 1.0828420515045178e-06, |
|
"loss": 1.0234, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.9590886348766501, |
|
"grad_norm": 0.48292762219822727, |
|
"learning_rate": 1.0053751097621856e-06, |
|
"loss": 0.9869, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 0.9606281029904168, |
|
"grad_norm": 0.4431002985495294, |
|
"learning_rate": 9.307689838439104e-07, |
|
"loss": 1.0363, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.9621675711041835, |
|
"grad_norm": 0.3771405426388914, |
|
"learning_rate": 8.590258297696108e-07, |
|
"loss": 0.978, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.9637070392179502, |
|
"grad_norm": 0.4242241866712476, |
|
"learning_rate": 7.901477208230979e-07, |
|
"loss": 1.0173, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.9652465073317169, |
|
"grad_norm": 0.42765029098432733, |
|
"learning_rate": 7.241366474920797e-07, |
|
"loss": 1.0021, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 0.9667859754454836, |
|
"grad_norm": 0.4026446330368183, |
|
"learning_rate": 6.609945174106402e-07, |
|
"loss": 1.0352, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.9683254435592503, |
|
"grad_norm": 0.4586738339921203, |
|
"learning_rate": 6.007231553041837e-07, |
|
"loss": 1.0087, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 0.969864911673017, |
|
"grad_norm": 0.41482262786645374, |
|
"learning_rate": 5.43324302936643e-07, |
|
"loss": 1.0914, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.9714043797867836, |
|
"grad_norm": 0.42202537226167186, |
|
"learning_rate": 4.887996190601318e-07, |
|
"loss": 1.0419, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 0.9729438479005503, |
|
"grad_norm": 0.37960996848907147, |
|
"learning_rate": 4.3715067936705987e-07, |
|
"loss": 1.0171, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.974483316014317, |
|
"grad_norm": 0.7530238458576849, |
|
"learning_rate": 3.8837897644457e-07, |
|
"loss": 1.0446, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 0.9760227841280837, |
|
"grad_norm": 0.3574506914494524, |
|
"learning_rate": 3.4248591973140566e-07, |
|
"loss": 1.0402, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.9775622522418504, |
|
"grad_norm": 0.4403626626769491, |
|
"learning_rate": 2.994728354771659e-07, |
|
"loss": 1.0618, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.9791017203556172, |
|
"grad_norm": 0.43643836321504664, |
|
"learning_rate": 2.593409667040247e-07, |
|
"loss": 0.9829, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.9806411884693839, |
|
"grad_norm": 0.44879821672356546, |
|
"learning_rate": 2.2209147317074908e-07, |
|
"loss": 0.9989, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 0.9821806565831506, |
|
"grad_norm": 0.464478069097993, |
|
"learning_rate": 1.8772543133922515e-07, |
|
"loss": 1.0269, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.9837201246969172, |
|
"grad_norm": 0.3840629750152777, |
|
"learning_rate": 1.5624383434333923e-07, |
|
"loss": 1.0007, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 0.9852595928106839, |
|
"grad_norm": 0.4819476738232411, |
|
"learning_rate": 1.276475919602671e-07, |
|
"loss": 1.0183, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.9867990609244506, |
|
"grad_norm": 0.4679445982996664, |
|
"learning_rate": 1.019375305842063e-07, |
|
"loss": 1.0093, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 0.9883385290382173, |
|
"grad_norm": 0.608255287884667, |
|
"learning_rate": 7.911439320247294e-08, |
|
"loss": 1.0133, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.989877997151984, |
|
"grad_norm": 0.49744148279623623, |
|
"learning_rate": 5.9178839374018914e-08, |
|
"loss": 1.0323, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 0.9914174652657507, |
|
"grad_norm": 0.4874179505714964, |
|
"learning_rate": 4.213144521042489e-08, |
|
"loss": 1.0355, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.9929569333795174, |
|
"grad_norm": 0.42907270925432495, |
|
"learning_rate": 2.797270335916924e-08, |
|
"loss": 1.0004, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.9944964014932841, |
|
"grad_norm": 0.36896334930948343, |
|
"learning_rate": 1.6703022989494887e-08, |
|
"loss": 1.0378, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.9960358696070508, |
|
"grad_norm": 0.369776577000758, |
|
"learning_rate": 8.322729780474436e-09, |
|
"loss": 1.0181, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 0.9975753377208174, |
|
"grad_norm": 0.381178979843484, |
|
"learning_rate": 2.8320659116953806e-09, |
|
"loss": 1.016, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.9991148058345841, |
|
"grad_norm": 1.063968939156568, |
|
"learning_rate": 2.311900561768887e-10, |
|
"loss": 1.008, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 0.9997305930800908, |
|
"eval_loss": 1.0235061645507812, |
|
"eval_runtime": 3775.83, |
|
"eval_samples_per_second": 6.12, |
|
"eval_steps_per_second": 0.383, |
|
"step": 3247 |
|
}, |
|
{ |
|
"epoch": 0.9997305930800908, |
|
"step": 3247, |
|
"total_flos": 4.15615026158633e+16, |
|
"train_loss": 1.0426253444200522, |
|
"train_runtime": 139956.5138, |
|
"train_samples_per_second": 1.485, |
|
"train_steps_per_second": 0.023 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 3247, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 4.15615026158633e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|