|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.2668889106775174, |
|
"eval_steps": 341, |
|
"global_step": 1361, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00019609765663300324, |
|
"grad_norm": 18.01366424560547, |
|
"learning_rate": 2e-05, |
|
"loss": 3.0843, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00019609765663300324, |
|
"eval_loss": 1.1017773151397705, |
|
"eval_runtime": 79.0856, |
|
"eval_samples_per_second": 27.16, |
|
"eval_steps_per_second": 13.58, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0003921953132660065, |
|
"grad_norm": 16.80318260192871, |
|
"learning_rate": 4e-05, |
|
"loss": 3.2221, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0005882929698990097, |
|
"grad_norm": 16.65929412841797, |
|
"learning_rate": 6e-05, |
|
"loss": 3.8956, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.000784390626532013, |
|
"grad_norm": 37.782188415527344, |
|
"learning_rate": 8e-05, |
|
"loss": 5.185, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0009804882831650162, |
|
"grad_norm": 19.226940155029297, |
|
"learning_rate": 0.0001, |
|
"loss": 3.1542, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0011765859397980193, |
|
"grad_norm": 26.570402145385742, |
|
"learning_rate": 0.00012, |
|
"loss": 4.5356, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0013726835964310226, |
|
"grad_norm": 22.43348503112793, |
|
"learning_rate": 0.00014, |
|
"loss": 3.8177, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.001568781253064026, |
|
"grad_norm": 29.817941665649414, |
|
"learning_rate": 0.00016, |
|
"loss": 4.4013, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0017648789096970292, |
|
"grad_norm": 18.46044158935547, |
|
"learning_rate": 0.00018, |
|
"loss": 3.374, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0019609765663300325, |
|
"grad_norm": 24.337013244628906, |
|
"learning_rate": 0.0002, |
|
"loss": 3.2509, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0021570742229630358, |
|
"grad_norm": 18.9931640625, |
|
"learning_rate": 0.00019999972962977903, |
|
"loss": 2.933, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0023531718795960386, |
|
"grad_norm": 17.7039852142334, |
|
"learning_rate": 0.00019999891852057812, |
|
"loss": 3.2867, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.002549269536229042, |
|
"grad_norm": 15.128417015075684, |
|
"learning_rate": 0.0001999975666767833, |
|
"loss": 2.2303, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.002745367192862045, |
|
"grad_norm": 8.747583389282227, |
|
"learning_rate": 0.00019999567410570446, |
|
"loss": 1.7348, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0029414648494950485, |
|
"grad_norm": 20.570377349853516, |
|
"learning_rate": 0.00019999324081757555, |
|
"loss": 3.9284, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.003137562506128052, |
|
"grad_norm": 7.86672306060791, |
|
"learning_rate": 0.00019999026682555434, |
|
"loss": 1.8121, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.003333660162761055, |
|
"grad_norm": 10.341567039489746, |
|
"learning_rate": 0.0001999867521457224, |
|
"loss": 1.5864, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0035297578193940584, |
|
"grad_norm": 7.888909339904785, |
|
"learning_rate": 0.00019998269679708504, |
|
"loss": 3.1584, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0037258554760270617, |
|
"grad_norm": 15.346771240234375, |
|
"learning_rate": 0.00019997810080157113, |
|
"loss": 2.0858, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.003921953132660065, |
|
"grad_norm": 11.417168617248535, |
|
"learning_rate": 0.0001999729641840331, |
|
"loss": 3.5646, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.004118050789293068, |
|
"grad_norm": 12.86220932006836, |
|
"learning_rate": 0.00019996728697224675, |
|
"loss": 3.0594, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0043141484459260715, |
|
"grad_norm": 7.418736457824707, |
|
"learning_rate": 0.00019996106919691102, |
|
"loss": 2.1349, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.004510246102559075, |
|
"grad_norm": 4.803528308868408, |
|
"learning_rate": 0.00019995431089164795, |
|
"loss": 2.3151, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.004706343759192077, |
|
"grad_norm": 8.667632102966309, |
|
"learning_rate": 0.00019994701209300245, |
|
"loss": 1.6791, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0049024414158250805, |
|
"grad_norm": 5.3950724601745605, |
|
"learning_rate": 0.00019993917284044202, |
|
"loss": 2.7265, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.005098539072458084, |
|
"grad_norm": 18.16863441467285, |
|
"learning_rate": 0.0001999307931763567, |
|
"loss": 1.7289, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.005294636729091087, |
|
"grad_norm": 10.66887378692627, |
|
"learning_rate": 0.00019992187314605872, |
|
"loss": 2.645, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.00549073438572409, |
|
"grad_norm": 26.42878532409668, |
|
"learning_rate": 0.00019991241279778232, |
|
"loss": 3.4603, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.005686832042357094, |
|
"grad_norm": 10.017987251281738, |
|
"learning_rate": 0.0001999024121826834, |
|
"loss": 1.2001, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.005882929698990097, |
|
"grad_norm": 11.155564308166504, |
|
"learning_rate": 0.00019989187135483933, |
|
"loss": 1.5102, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0060790273556231, |
|
"grad_norm": 3.7590131759643555, |
|
"learning_rate": 0.00019988079037124864, |
|
"loss": 1.0619, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.006275125012256104, |
|
"grad_norm": 8.47985553741455, |
|
"learning_rate": 0.00019986916929183067, |
|
"loss": 2.6256, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.006471222668889107, |
|
"grad_norm": 16.655269622802734, |
|
"learning_rate": 0.00019985700817942533, |
|
"loss": 2.2039, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.00666732032552211, |
|
"grad_norm": 9.003829956054688, |
|
"learning_rate": 0.00019984430709979264, |
|
"loss": 1.7281, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0068634179821551134, |
|
"grad_norm": 36.33080291748047, |
|
"learning_rate": 0.0001998310661216125, |
|
"loss": 3.0575, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.007059515638788117, |
|
"grad_norm": 11.909663200378418, |
|
"learning_rate": 0.00019981728531648423, |
|
"loss": 1.9623, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.00725561329542112, |
|
"grad_norm": 10.967493057250977, |
|
"learning_rate": 0.00019980296475892616, |
|
"loss": 2.8071, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.007451710952054123, |
|
"grad_norm": 11.050918579101562, |
|
"learning_rate": 0.00019978810452637543, |
|
"loss": 1.8019, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.007647808608687127, |
|
"grad_norm": 17.752105712890625, |
|
"learning_rate": 0.00019977270469918727, |
|
"loss": 3.0332, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.00784390626532013, |
|
"grad_norm": 6.67478609085083, |
|
"learning_rate": 0.0001997567653606348, |
|
"loss": 1.32, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.008040003921953132, |
|
"grad_norm": 7.402947902679443, |
|
"learning_rate": 0.00019974028659690843, |
|
"loss": 1.4442, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.008236101578586136, |
|
"grad_norm": 13.006294250488281, |
|
"learning_rate": 0.00019972326849711553, |
|
"loss": 2.2418, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.008432199235219139, |
|
"grad_norm": 5.940983295440674, |
|
"learning_rate": 0.00019970571115327985, |
|
"loss": 0.9049, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.008628296891852143, |
|
"grad_norm": 9.866477012634277, |
|
"learning_rate": 0.00019968761466034103, |
|
"loss": 2.7203, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.008824394548485145, |
|
"grad_norm": 8.239157676696777, |
|
"learning_rate": 0.00019966897911615416, |
|
"loss": 1.9653, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.00902049220511815, |
|
"grad_norm": 16.142181396484375, |
|
"learning_rate": 0.0001996498046214891, |
|
"loss": 2.7509, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.009216589861751152, |
|
"grad_norm": 9.295431137084961, |
|
"learning_rate": 0.00019963009128003018, |
|
"loss": 2.0133, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.009412687518384154, |
|
"grad_norm": 10.362130165100098, |
|
"learning_rate": 0.00019960983919837535, |
|
"loss": 1.716, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.009608785175017159, |
|
"grad_norm": 28.2889461517334, |
|
"learning_rate": 0.00019958904848603584, |
|
"loss": 2.8961, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.009804882831650161, |
|
"grad_norm": 13.199325561523438, |
|
"learning_rate": 0.0001995677192554354, |
|
"loss": 2.62, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.010000980488283165, |
|
"grad_norm": 11.95132827758789, |
|
"learning_rate": 0.00019954585162190985, |
|
"loss": 2.792, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.010197078144916168, |
|
"grad_norm": 16.581575393676758, |
|
"learning_rate": 0.0001995234457037063, |
|
"loss": 2.7239, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.010393175801549172, |
|
"grad_norm": 12.048559188842773, |
|
"learning_rate": 0.00019950050162198258, |
|
"loss": 1.9892, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.010589273458182174, |
|
"grad_norm": 9.297942161560059, |
|
"learning_rate": 0.00019947701950080672, |
|
"loss": 1.8015, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.010785371114815178, |
|
"grad_norm": 6.09962797164917, |
|
"learning_rate": 0.00019945299946715596, |
|
"loss": 1.1493, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.01098146877144818, |
|
"grad_norm": 6.668224811553955, |
|
"learning_rate": 0.00019942844165091633, |
|
"loss": 0.9968, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.011177566428081185, |
|
"grad_norm": 17.000507354736328, |
|
"learning_rate": 0.00019940334618488194, |
|
"loss": 1.5857, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.011373664084714187, |
|
"grad_norm": 11.42551326751709, |
|
"learning_rate": 0.00019937771320475406, |
|
"loss": 1.5236, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.011569761741347192, |
|
"grad_norm": 13.255271911621094, |
|
"learning_rate": 0.00019935154284914065, |
|
"loss": 1.6174, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.011765859397980194, |
|
"grad_norm": 12.664427757263184, |
|
"learning_rate": 0.00019932483525955533, |
|
"loss": 1.476, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.011961957054613198, |
|
"grad_norm": 19.540660858154297, |
|
"learning_rate": 0.00019929759058041687, |
|
"loss": 1.5251, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.0121580547112462, |
|
"grad_norm": 10.44942855834961, |
|
"learning_rate": 0.0001992698089590483, |
|
"loss": 1.7865, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.012354152367879203, |
|
"grad_norm": 13.294017791748047, |
|
"learning_rate": 0.00019924149054567606, |
|
"loss": 4.1284, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.012550250024512207, |
|
"grad_norm": 13.700861930847168, |
|
"learning_rate": 0.00019921263549342922, |
|
"loss": 2.8987, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.01274634768114521, |
|
"grad_norm": 51.7520866394043, |
|
"learning_rate": 0.00019918324395833877, |
|
"loss": 1.7335, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.012942445337778214, |
|
"grad_norm": 25.415748596191406, |
|
"learning_rate": 0.00019915331609933657, |
|
"loss": 2.6131, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.013138542994411216, |
|
"grad_norm": 10.575088500976562, |
|
"learning_rate": 0.00019912285207825475, |
|
"loss": 1.202, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.01333464065104422, |
|
"grad_norm": 7.0860772132873535, |
|
"learning_rate": 0.00019909185205982453, |
|
"loss": 1.5077, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.013530738307677223, |
|
"grad_norm": 8.169393539428711, |
|
"learning_rate": 0.00019906031621167553, |
|
"loss": 2.4139, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.013726835964310227, |
|
"grad_norm": 15.750587463378906, |
|
"learning_rate": 0.00019902824470433489, |
|
"loss": 2.8999, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01392293362094323, |
|
"grad_norm": 19.615734100341797, |
|
"learning_rate": 0.00019899563771122618, |
|
"loss": 3.3154, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.014119031277576233, |
|
"grad_norm": 24.015810012817383, |
|
"learning_rate": 0.0001989624954086686, |
|
"loss": 2.6689, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.014315128934209236, |
|
"grad_norm": 7.955759525299072, |
|
"learning_rate": 0.00019892881797587601, |
|
"loss": 1.8847, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.01451122659084224, |
|
"grad_norm": 13.804747581481934, |
|
"learning_rate": 0.00019889460559495588, |
|
"loss": 2.2221, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.014707324247475242, |
|
"grad_norm": 19.74148941040039, |
|
"learning_rate": 0.0001988598584509084, |
|
"loss": 1.9316, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.014903421904108247, |
|
"grad_norm": 7.9701385498046875, |
|
"learning_rate": 0.00019882457673162543, |
|
"loss": 2.1958, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.015099519560741249, |
|
"grad_norm": 4.7594170570373535, |
|
"learning_rate": 0.00019878876062788954, |
|
"loss": 1.3551, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.015295617217374253, |
|
"grad_norm": 7.796854496002197, |
|
"learning_rate": 0.0001987524103333728, |
|
"loss": 2.616, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.015491714874007256, |
|
"grad_norm": 6.997422218322754, |
|
"learning_rate": 0.00019871552604463602, |
|
"loss": 2.908, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.01568781253064026, |
|
"grad_norm": 6.436347484588623, |
|
"learning_rate": 0.00019867810796112744, |
|
"loss": 3.6826, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01588391018727326, |
|
"grad_norm": 10.243407249450684, |
|
"learning_rate": 0.00019864015628518175, |
|
"loss": 1.3711, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.016080007843906265, |
|
"grad_norm": 11.830530166625977, |
|
"learning_rate": 0.00019860167122201904, |
|
"loss": 2.2325, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.01627610550053927, |
|
"grad_norm": 7.456768989562988, |
|
"learning_rate": 0.0001985626529797436, |
|
"loss": 1.9991, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.016472203157172273, |
|
"grad_norm": 50.91776657104492, |
|
"learning_rate": 0.00019852310176934288, |
|
"loss": 1.294, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.016668300813805274, |
|
"grad_norm": 6.4661054611206055, |
|
"learning_rate": 0.00019848301780468622, |
|
"loss": 2.4052, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.016864398470438278, |
|
"grad_norm": 5.4749674797058105, |
|
"learning_rate": 0.00019844240130252385, |
|
"loss": 2.1508, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.017060496127071282, |
|
"grad_norm": 9.457857131958008, |
|
"learning_rate": 0.00019840125248248564, |
|
"loss": 2.1732, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.017256593783704286, |
|
"grad_norm": 6.932736396789551, |
|
"learning_rate": 0.00019835957156707988, |
|
"loss": 1.0618, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.017452691440337287, |
|
"grad_norm": 10.447488784790039, |
|
"learning_rate": 0.00019831735878169212, |
|
"loss": 1.1214, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.01764878909697029, |
|
"grad_norm": 7.159199237823486, |
|
"learning_rate": 0.000198274614354584, |
|
"loss": 2.495, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.017844886753603295, |
|
"grad_norm": 8.753946304321289, |
|
"learning_rate": 0.00019823133851689187, |
|
"loss": 2.343, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.0180409844102363, |
|
"grad_norm": 9.124870300292969, |
|
"learning_rate": 0.00019818753150262574, |
|
"loss": 1.6556, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.0182370820668693, |
|
"grad_norm": 5.470896244049072, |
|
"learning_rate": 0.00019814319354866786, |
|
"loss": 1.2787, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.018433179723502304, |
|
"grad_norm": 8.39749526977539, |
|
"learning_rate": 0.00019809832489477142, |
|
"loss": 1.6804, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.01862927738013531, |
|
"grad_norm": 6.869524955749512, |
|
"learning_rate": 0.0001980529257835594, |
|
"loss": 1.6563, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.01882537503676831, |
|
"grad_norm": 11.144633293151855, |
|
"learning_rate": 0.0001980069964605232, |
|
"loss": 1.9428, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.019021472693401313, |
|
"grad_norm": 11.564960479736328, |
|
"learning_rate": 0.00019796053717402118, |
|
"loss": 2.2905, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.019217570350034317, |
|
"grad_norm": 4.628671169281006, |
|
"learning_rate": 0.00019791354817527755, |
|
"loss": 1.0654, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.01941366800666732, |
|
"grad_norm": 15.970938682556152, |
|
"learning_rate": 0.00019786602971838074, |
|
"loss": 2.9314, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.019609765663300322, |
|
"grad_norm": 5.425403594970703, |
|
"learning_rate": 0.00019781798206028239, |
|
"loss": 1.7236, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.019805863319933326, |
|
"grad_norm": 5.114929676055908, |
|
"learning_rate": 0.0001977694054607955, |
|
"loss": 2.1674, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.02000196097656633, |
|
"grad_norm": 9.564162254333496, |
|
"learning_rate": 0.0001977203001825935, |
|
"loss": 2.3091, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.020198058633199335, |
|
"grad_norm": 7.184217929840088, |
|
"learning_rate": 0.00019767066649120838, |
|
"loss": 2.5231, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.020394156289832335, |
|
"grad_norm": 8.926065444946289, |
|
"learning_rate": 0.00019762050465502965, |
|
"loss": 1.3343, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.02059025394646534, |
|
"grad_norm": 5.511591911315918, |
|
"learning_rate": 0.0001975698149453026, |
|
"loss": 1.7055, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.020786351603098344, |
|
"grad_norm": 8.730749130249023, |
|
"learning_rate": 0.00019751859763612704, |
|
"loss": 1.7862, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.020982449259731348, |
|
"grad_norm": 10.519305229187012, |
|
"learning_rate": 0.00019746685300445565, |
|
"loss": 1.9938, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.02117854691636435, |
|
"grad_norm": 6.307378768920898, |
|
"learning_rate": 0.00019741458133009258, |
|
"loss": 1.5184, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.021374644572997353, |
|
"grad_norm": 10.66562557220459, |
|
"learning_rate": 0.00019736178289569186, |
|
"loss": 2.0555, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.021570742229630357, |
|
"grad_norm": 6.235299587249756, |
|
"learning_rate": 0.0001973084579867561, |
|
"loss": 1.4547, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.021766839886263357, |
|
"grad_norm": 12.061420440673828, |
|
"learning_rate": 0.00019725460689163455, |
|
"loss": 1.9823, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.02196293754289636, |
|
"grad_norm": 8.891043663024902, |
|
"learning_rate": 0.0001972002299015219, |
|
"loss": 2.4757, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.022159035199529366, |
|
"grad_norm": 8.200632095336914, |
|
"learning_rate": 0.00019714532731045649, |
|
"loss": 1.4906, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.02235513285616237, |
|
"grad_norm": 8.106672286987305, |
|
"learning_rate": 0.00019708989941531887, |
|
"loss": 0.8863, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.02255123051279537, |
|
"grad_norm": 6.451066493988037, |
|
"learning_rate": 0.0001970339465158301, |
|
"loss": 2.1244, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.022747328169428375, |
|
"grad_norm": 6.3670220375061035, |
|
"learning_rate": 0.0001969774689145501, |
|
"loss": 1.3732, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.02294342582606138, |
|
"grad_norm": 6.401678562164307, |
|
"learning_rate": 0.0001969204669168761, |
|
"loss": 1.3963, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.023139523482694383, |
|
"grad_norm": 6.917253494262695, |
|
"learning_rate": 0.00019686294083104094, |
|
"loss": 1.3802, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.023335621139327384, |
|
"grad_norm": 4.852232456207275, |
|
"learning_rate": 0.00019680489096811149, |
|
"loss": 0.7538, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.023531718795960388, |
|
"grad_norm": 5.08411169052124, |
|
"learning_rate": 0.00019674631764198677, |
|
"loss": 1.5128, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.023727816452593392, |
|
"grad_norm": 13.64840316772461, |
|
"learning_rate": 0.00019668722116939649, |
|
"loss": 1.2718, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.023923914109226396, |
|
"grad_norm": 7.075209617614746, |
|
"learning_rate": 0.00019662760186989913, |
|
"loss": 2.4186, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.024120011765859397, |
|
"grad_norm": 12.186111450195312, |
|
"learning_rate": 0.00019656746006588044, |
|
"loss": 2.8495, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.0243161094224924, |
|
"grad_norm": 13.221094131469727, |
|
"learning_rate": 0.00019650679608255138, |
|
"loss": 1.1832, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.024512207079125405, |
|
"grad_norm": 4.765728950500488, |
|
"learning_rate": 0.0001964456102479467, |
|
"loss": 1.1407, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.024708304735758406, |
|
"grad_norm": 4.750761032104492, |
|
"learning_rate": 0.00019638390289292295, |
|
"loss": 1.1096, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.02490440239239141, |
|
"grad_norm": 5.133049011230469, |
|
"learning_rate": 0.0001963216743511567, |
|
"loss": 2.6525, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.025100500049024414, |
|
"grad_norm": 14.541696548461914, |
|
"learning_rate": 0.0001962589249591429, |
|
"loss": 3.3739, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.02529659770565742, |
|
"grad_norm": 7.252123832702637, |
|
"learning_rate": 0.00019619565505619288, |
|
"loss": 2.0899, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.02549269536229042, |
|
"grad_norm": 7.14664888381958, |
|
"learning_rate": 0.00019613186498443257, |
|
"loss": 1.4538, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.025688793018923423, |
|
"grad_norm": 7.936334609985352, |
|
"learning_rate": 0.0001960675550888007, |
|
"loss": 1.351, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.025884890675556427, |
|
"grad_norm": 7.465827465057373, |
|
"learning_rate": 0.00019600272571704687, |
|
"loss": 1.0752, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.02608098833218943, |
|
"grad_norm": 12.85190486907959, |
|
"learning_rate": 0.00019593737721972977, |
|
"loss": 2.5674, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.026277085988822432, |
|
"grad_norm": 8.640382766723633, |
|
"learning_rate": 0.00019587150995021505, |
|
"loss": 2.5631, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.026473183645455436, |
|
"grad_norm": 5.070466995239258, |
|
"learning_rate": 0.00019580512426467376, |
|
"loss": 0.8935, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.02666928130208844, |
|
"grad_norm": 5.741654872894287, |
|
"learning_rate": 0.00019573822052208013, |
|
"loss": 2.1005, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.026865378958721445, |
|
"grad_norm": 8.615095138549805, |
|
"learning_rate": 0.00019567079908420972, |
|
"loss": 2.7478, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.027061476615354445, |
|
"grad_norm": 15.559739112854004, |
|
"learning_rate": 0.00019560286031563754, |
|
"loss": 1.8455, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.02725757427198745, |
|
"grad_norm": 5.683060169219971, |
|
"learning_rate": 0.000195534404583736, |
|
"loss": 1.6896, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.027453671928620454, |
|
"grad_norm": 5.791153430938721, |
|
"learning_rate": 0.00019546543225867292, |
|
"loss": 1.9291, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.027649769585253458, |
|
"grad_norm": 6.800760269165039, |
|
"learning_rate": 0.0001953959437134095, |
|
"loss": 1.789, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.02784586724188646, |
|
"grad_norm": 6.912458896636963, |
|
"learning_rate": 0.00019532593932369849, |
|
"loss": 2.2544, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.028041964898519463, |
|
"grad_norm": 6.928767681121826, |
|
"learning_rate": 0.00019525541946808188, |
|
"loss": 1.2531, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.028238062555152467, |
|
"grad_norm": 8.96179485321045, |
|
"learning_rate": 0.00019518438452788907, |
|
"loss": 2.3403, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.028434160211785468, |
|
"grad_norm": 7.004507064819336, |
|
"learning_rate": 0.00019511283488723473, |
|
"loss": 1.1211, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.028630257868418472, |
|
"grad_norm": 8.655360221862793, |
|
"learning_rate": 0.00019504077093301665, |
|
"loss": 1.6074, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.028826355525051476, |
|
"grad_norm": 7.188142776489258, |
|
"learning_rate": 0.00019496819305491383, |
|
"loss": 1.3564, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.02902245318168448, |
|
"grad_norm": 5.618204116821289, |
|
"learning_rate": 0.00019489510164538416, |
|
"loss": 2.5936, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.02921855083831748, |
|
"grad_norm": 6.578945159912109, |
|
"learning_rate": 0.00019482149709966246, |
|
"loss": 0.9577, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.029414648494950485, |
|
"grad_norm": 11.468759536743164, |
|
"learning_rate": 0.00019474737981575832, |
|
"loss": 2.1746, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.02961074615158349, |
|
"grad_norm": 5.663360595703125, |
|
"learning_rate": 0.00019467275019445385, |
|
"loss": 1.2751, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.029806843808216493, |
|
"grad_norm": 6.297888278961182, |
|
"learning_rate": 0.00019459760863930155, |
|
"loss": 1.574, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.030002941464849494, |
|
"grad_norm": 8.760517120361328, |
|
"learning_rate": 0.00019452195555662224, |
|
"loss": 1.1148, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.030199039121482498, |
|
"grad_norm": 8.745152473449707, |
|
"learning_rate": 0.00019444579135550273, |
|
"loss": 1.4212, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.030395136778115502, |
|
"grad_norm": 7.594636917114258, |
|
"learning_rate": 0.00019436911644779366, |
|
"loss": 0.9161, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.030591234434748506, |
|
"grad_norm": 8.563089370727539, |
|
"learning_rate": 0.00019429193124810725, |
|
"loss": 1.5844, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.030787332091381507, |
|
"grad_norm": 16.4445743560791, |
|
"learning_rate": 0.00019421423617381508, |
|
"loss": 1.4798, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.03098342974801451, |
|
"grad_norm": 6.86074161529541, |
|
"learning_rate": 0.00019413603164504588, |
|
"loss": 2.3323, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.031179527404647515, |
|
"grad_norm": 11.293970108032227, |
|
"learning_rate": 0.0001940573180846831, |
|
"loss": 2.0304, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.03137562506128052, |
|
"grad_norm": 9.772544860839844, |
|
"learning_rate": 0.00019397809591836286, |
|
"loss": 3.1622, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.031571722717913524, |
|
"grad_norm": 8.489032745361328, |
|
"learning_rate": 0.00019389836557447143, |
|
"loss": 1.0113, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.03176782037454652, |
|
"grad_norm": 12.992219924926758, |
|
"learning_rate": 0.000193818127484143, |
|
"loss": 2.5478, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.031963918031179525, |
|
"grad_norm": 6.758875846862793, |
|
"learning_rate": 0.0001937373820812574, |
|
"loss": 1.8935, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.03216001568781253, |
|
"grad_norm": 11.88957405090332, |
|
"learning_rate": 0.0001936561298024377, |
|
"loss": 3.1899, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.03235611334444553, |
|
"grad_norm": 8.777862548828125, |
|
"learning_rate": 0.00019357437108704777, |
|
"loss": 2.7038, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.03255221100107854, |
|
"grad_norm": 5.135364532470703, |
|
"learning_rate": 0.0001934921063771901, |
|
"loss": 0.7644, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.03274830865771154, |
|
"grad_norm": 9.942065238952637, |
|
"learning_rate": 0.00019340933611770321, |
|
"loss": 1.4148, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.032944406314344546, |
|
"grad_norm": 7.881731033325195, |
|
"learning_rate": 0.0001933260607561594, |
|
"loss": 1.7402, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.03314050397097755, |
|
"grad_norm": 6.189711570739746, |
|
"learning_rate": 0.00019324228074286222, |
|
"loss": 1.642, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.03333660162761055, |
|
"grad_norm": 4.979406356811523, |
|
"learning_rate": 0.00019315799653084404, |
|
"loss": 2.425, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.03353269928424355, |
|
"grad_norm": 8.461871147155762, |
|
"learning_rate": 0.00019307320857586376, |
|
"loss": 2.6563, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.033728796940876556, |
|
"grad_norm": 8.992694854736328, |
|
"learning_rate": 0.00019298791733640406, |
|
"loss": 1.7962, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.03392489459750956, |
|
"grad_norm": 8.074629783630371, |
|
"learning_rate": 0.00019290212327366924, |
|
"loss": 1.5342, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.034120992254142564, |
|
"grad_norm": 6.5658111572265625, |
|
"learning_rate": 0.00019281582685158247, |
|
"loss": 0.7919, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.03431708991077557, |
|
"grad_norm": 9.946451187133789, |
|
"learning_rate": 0.00019272902853678336, |
|
"loss": 1.5664, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.03451318756740857, |
|
"grad_norm": 6.764862060546875, |
|
"learning_rate": 0.00019264172879862552, |
|
"loss": 2.1083, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.03470928522404157, |
|
"grad_norm": 11.195146560668945, |
|
"learning_rate": 0.000192553928109174, |
|
"loss": 1.3371, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.034905382880674574, |
|
"grad_norm": 6.352316856384277, |
|
"learning_rate": 0.00019246562694320255, |
|
"loss": 2.543, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.03510148053730758, |
|
"grad_norm": 4.030996799468994, |
|
"learning_rate": 0.00019237682577819137, |
|
"loss": 0.9273, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.03529757819394058, |
|
"grad_norm": 6.009339809417725, |
|
"learning_rate": 0.00019228752509432417, |
|
"loss": 2.1444, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.035493675850573586, |
|
"grad_norm": 14.911331176757812, |
|
"learning_rate": 0.00019219772537448597, |
|
"loss": 1.5989, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.03568977350720659, |
|
"grad_norm": 6.454592227935791, |
|
"learning_rate": 0.00019210742710426012, |
|
"loss": 1.0608, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.035885871163839594, |
|
"grad_norm": 11.354242324829102, |
|
"learning_rate": 0.00019201663077192586, |
|
"loss": 1.7558, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.0360819688204726, |
|
"grad_norm": 5.804329872131348, |
|
"learning_rate": 0.0001919253368684557, |
|
"loss": 1.411, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.036278066477105596, |
|
"grad_norm": 6.735583305358887, |
|
"learning_rate": 0.00019183354588751271, |
|
"loss": 2.4473, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.0364741641337386, |
|
"grad_norm": 9.169321060180664, |
|
"learning_rate": 0.00019174125832544786, |
|
"loss": 1.8947, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.036670261790371604, |
|
"grad_norm": 3.465175151824951, |
|
"learning_rate": 0.0001916484746812973, |
|
"loss": 1.1306, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.03686635944700461, |
|
"grad_norm": 10.71740436553955, |
|
"learning_rate": 0.0001915551954567797, |
|
"loss": 1.0832, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.03706245710363761, |
|
"grad_norm": 10.946560859680176, |
|
"learning_rate": 0.0001914614211562936, |
|
"loss": 1.9391, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.03725855476027062, |
|
"grad_norm": 6.847762584686279, |
|
"learning_rate": 0.0001913671522869145, |
|
"loss": 2.197, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.03745465241690362, |
|
"grad_norm": 13.330089569091797, |
|
"learning_rate": 0.00019127238935839235, |
|
"loss": 2.3539, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.03765075007353662, |
|
"grad_norm": 22.94158172607422, |
|
"learning_rate": 0.00019117713288314863, |
|
"loss": 2.868, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.03784684773016962, |
|
"grad_norm": 7.080881595611572, |
|
"learning_rate": 0.00019108138337627358, |
|
"loss": 1.7925, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.038042945386802626, |
|
"grad_norm": 4.726489067077637, |
|
"learning_rate": 0.00019098514135552357, |
|
"loss": 1.008, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.03823904304343563, |
|
"grad_norm": 5.7414870262146, |
|
"learning_rate": 0.00019088840734131807, |
|
"loss": 0.8934, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.038435140700068635, |
|
"grad_norm": 6.781312465667725, |
|
"learning_rate": 0.00019079118185673705, |
|
"loss": 1.6637, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.03863123835670164, |
|
"grad_norm": 12.264861106872559, |
|
"learning_rate": 0.00019069346542751803, |
|
"loss": 1.6055, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.03882733601333464, |
|
"grad_norm": 8.975689888000488, |
|
"learning_rate": 0.00019059525858205323, |
|
"loss": 2.6467, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.03902343366996765, |
|
"grad_norm": 9.027484893798828, |
|
"learning_rate": 0.0001904965618513868, |
|
"loss": 1.9813, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.039219531326600644, |
|
"grad_norm": 9.491011619567871, |
|
"learning_rate": 0.0001903973757692119, |
|
"loss": 1.4225, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03941562898323365, |
|
"grad_norm": 5.202075481414795, |
|
"learning_rate": 0.00019029770087186773, |
|
"loss": 1.3524, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.03961172663986665, |
|
"grad_norm": 11.139556884765625, |
|
"learning_rate": 0.00019019753769833678, |
|
"loss": 2.0723, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.03980782429649966, |
|
"grad_norm": 11.383284568786621, |
|
"learning_rate": 0.0001900968867902419, |
|
"loss": 1.5843, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.04000392195313266, |
|
"grad_norm": 7.112687110900879, |
|
"learning_rate": 0.00018999574869184324, |
|
"loss": 1.3899, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.040200019609765665, |
|
"grad_norm": 13.50672721862793, |
|
"learning_rate": 0.00018989412395003537, |
|
"loss": 1.7484, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.04039611726639867, |
|
"grad_norm": 13.107057571411133, |
|
"learning_rate": 0.00018979201311434434, |
|
"loss": 1.6412, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.040592214923031666, |
|
"grad_norm": 11.221402168273926, |
|
"learning_rate": 0.0001896894167369248, |
|
"loss": 2.608, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.04078831257966467, |
|
"grad_norm": 4.945010662078857, |
|
"learning_rate": 0.0001895863353725568, |
|
"loss": 1.3582, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.040984410236297675, |
|
"grad_norm": 15.066801071166992, |
|
"learning_rate": 0.00018948276957864299, |
|
"loss": 1.7296, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.04118050789293068, |
|
"grad_norm": 7.497617244720459, |
|
"learning_rate": 0.0001893787199152055, |
|
"loss": 1.4961, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.04137660554956368, |
|
"grad_norm": 4.299473762512207, |
|
"learning_rate": 0.00018927418694488296, |
|
"loss": 1.7403, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.04157270320619669, |
|
"grad_norm": 6.988886833190918, |
|
"learning_rate": 0.00018916917123292738, |
|
"loss": 2.6546, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.04176880086282969, |
|
"grad_norm": 9.461721420288086, |
|
"learning_rate": 0.00018906367334720124, |
|
"loss": 2.067, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.041964898519462696, |
|
"grad_norm": 4.021213054656982, |
|
"learning_rate": 0.0001889576938581742, |
|
"loss": 1.0274, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.04216099617609569, |
|
"grad_norm": 7.254751205444336, |
|
"learning_rate": 0.00018885123333892026, |
|
"loss": 1.7091, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.0423570938327287, |
|
"grad_norm": 10.783350944519043, |
|
"learning_rate": 0.00018874429236511448, |
|
"loss": 1.3779, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.0425531914893617, |
|
"grad_norm": 16.38484764099121, |
|
"learning_rate": 0.00018863687151503, |
|
"loss": 2.8516, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.042749289145994705, |
|
"grad_norm": 5.848017692565918, |
|
"learning_rate": 0.00018852897136953473, |
|
"loss": 1.6814, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.04294538680262771, |
|
"grad_norm": 5.781267166137695, |
|
"learning_rate": 0.00018842059251208845, |
|
"loss": 2.1672, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.043141484459260714, |
|
"grad_norm": 6.244543552398682, |
|
"learning_rate": 0.00018831173552873946, |
|
"loss": 0.6934, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.04333758211589372, |
|
"grad_norm": 6.526815414428711, |
|
"learning_rate": 0.0001882024010081215, |
|
"loss": 1.6071, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.043533679772526715, |
|
"grad_norm": 6.064664363861084, |
|
"learning_rate": 0.00018809258954145052, |
|
"loss": 1.8964, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.04372977742915972, |
|
"grad_norm": 6.710343837738037, |
|
"learning_rate": 0.0001879823017225215, |
|
"loss": 2.063, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.04392587508579272, |
|
"grad_norm": 9.060622215270996, |
|
"learning_rate": 0.00018787153814770537, |
|
"loss": 1.568, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.04412197274242573, |
|
"grad_norm": 4.686062335968018, |
|
"learning_rate": 0.00018776029941594552, |
|
"loss": 1.1178, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.04431807039905873, |
|
"grad_norm": 6.257881164550781, |
|
"learning_rate": 0.00018764858612875472, |
|
"loss": 2.1195, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.044514168055691736, |
|
"grad_norm": 9.814234733581543, |
|
"learning_rate": 0.00018753639889021196, |
|
"loss": 1.1679, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.04471026571232474, |
|
"grad_norm": 13.946937561035156, |
|
"learning_rate": 0.00018742373830695898, |
|
"loss": 1.6899, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.044906363368957744, |
|
"grad_norm": 6.277964115142822, |
|
"learning_rate": 0.0001873106049881971, |
|
"loss": 1.5076, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.04510246102559074, |
|
"grad_norm": 14.092109680175781, |
|
"learning_rate": 0.00018719699954568398, |
|
"loss": 1.9726, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.045298558682223745, |
|
"grad_norm": 6.5708231925964355, |
|
"learning_rate": 0.00018708292259373015, |
|
"loss": 0.6623, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.04549465633885675, |
|
"grad_norm": 6.839974880218506, |
|
"learning_rate": 0.00018696837474919582, |
|
"loss": 1.8836, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.045690753995489754, |
|
"grad_norm": 17.919775009155273, |
|
"learning_rate": 0.00018685335663148753, |
|
"loss": 2.2343, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.04588685165212276, |
|
"grad_norm": 7.140262603759766, |
|
"learning_rate": 0.00018673786886255476, |
|
"loss": 1.9653, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.04608294930875576, |
|
"grad_norm": 11.049707412719727, |
|
"learning_rate": 0.00018662191206688658, |
|
"loss": 1.8658, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.046279046965388766, |
|
"grad_norm": 5.617869853973389, |
|
"learning_rate": 0.00018650548687150823, |
|
"loss": 1.862, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.04647514462202176, |
|
"grad_norm": 6.494004726409912, |
|
"learning_rate": 0.00018638859390597792, |
|
"loss": 3.787, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.04667124227865477, |
|
"grad_norm": 17.628082275390625, |
|
"learning_rate": 0.00018627123380238314, |
|
"loss": 1.9129, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.04686733993528777, |
|
"grad_norm": 5.634758949279785, |
|
"learning_rate": 0.0001861534071953374, |
|
"loss": 1.4702, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.047063437591920776, |
|
"grad_norm": 20.99329948425293, |
|
"learning_rate": 0.00018603511472197685, |
|
"loss": 2.2196, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.04725953524855378, |
|
"grad_norm": 6.650235652923584, |
|
"learning_rate": 0.00018591635702195673, |
|
"loss": 0.986, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.047455632905186784, |
|
"grad_norm": 4.837536334991455, |
|
"learning_rate": 0.00018579713473744795, |
|
"loss": 1.2033, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.04765173056181979, |
|
"grad_norm": 9.390655517578125, |
|
"learning_rate": 0.00018567744851313362, |
|
"loss": 1.7356, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.04784782821845279, |
|
"grad_norm": 8.881529808044434, |
|
"learning_rate": 0.0001855572989962056, |
|
"loss": 1.0063, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.04804392587508579, |
|
"grad_norm": 6.325936317443848, |
|
"learning_rate": 0.00018543668683636085, |
|
"loss": 1.5957, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.048240023531718794, |
|
"grad_norm": 10.866408348083496, |
|
"learning_rate": 0.0001853156126857981, |
|
"loss": 2.0472, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.0484361211883518, |
|
"grad_norm": 6.741912364959717, |
|
"learning_rate": 0.00018519407719921427, |
|
"loss": 1.6029, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.0486322188449848, |
|
"grad_norm": 5.979978561401367, |
|
"learning_rate": 0.00018507208103380092, |
|
"loss": 0.9353, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.048828316501617806, |
|
"grad_norm": 6.375998020172119, |
|
"learning_rate": 0.00018494962484924058, |
|
"loss": 2.5973, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.04902441415825081, |
|
"grad_norm": 8.528829574584961, |
|
"learning_rate": 0.00018482670930770342, |
|
"loss": 1.1618, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.049220511814883815, |
|
"grad_norm": 7.874660491943359, |
|
"learning_rate": 0.0001847033350738435, |
|
"loss": 1.3043, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.04941660947151681, |
|
"grad_norm": 4.421433925628662, |
|
"learning_rate": 0.00018457950281479513, |
|
"loss": 2.0768, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.049612707128149816, |
|
"grad_norm": 8.347919464111328, |
|
"learning_rate": 0.00018445521320016944, |
|
"loss": 1.0983, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.04980880478478282, |
|
"grad_norm": 6.713651657104492, |
|
"learning_rate": 0.00018433046690205068, |
|
"loss": 0.9891, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.050004902441415824, |
|
"grad_norm": 12.359843254089355, |
|
"learning_rate": 0.0001842052645949925, |
|
"loss": 2.5271, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.05020100009804883, |
|
"grad_norm": 4.7271199226379395, |
|
"learning_rate": 0.00018407960695601442, |
|
"loss": 1.394, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.05039709775468183, |
|
"grad_norm": 5.810708522796631, |
|
"learning_rate": 0.0001839534946645981, |
|
"loss": 1.7354, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.05059319541131484, |
|
"grad_norm": 28.575908660888672, |
|
"learning_rate": 0.00018382692840268367, |
|
"loss": 3.4793, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.05078929306794784, |
|
"grad_norm": 5.775376319885254, |
|
"learning_rate": 0.00018369990885466617, |
|
"loss": 1.4695, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.05098539072458084, |
|
"grad_norm": 7.531515121459961, |
|
"learning_rate": 0.0001835724367073916, |
|
"loss": 2.0772, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.05118148838121384, |
|
"grad_norm": 5.099686145782471, |
|
"learning_rate": 0.00018344451265015348, |
|
"loss": 3.2733, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.05137758603784685, |
|
"grad_norm": 5.558218479156494, |
|
"learning_rate": 0.00018331613737468887, |
|
"loss": 1.7578, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.05157368369447985, |
|
"grad_norm": 7.837512016296387, |
|
"learning_rate": 0.00018318731157517478, |
|
"loss": 2.7413, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.051769781351112855, |
|
"grad_norm": 9.013374328613281, |
|
"learning_rate": 0.00018305803594822448, |
|
"loss": 1.7722, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.05196587900774586, |
|
"grad_norm": 7.108828067779541, |
|
"learning_rate": 0.00018292831119288348, |
|
"loss": 1.31, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.05216197666437886, |
|
"grad_norm": 6.387202739715576, |
|
"learning_rate": 0.0001827981380106261, |
|
"loss": 1.0588, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.05235807432101187, |
|
"grad_norm": 13.306784629821777, |
|
"learning_rate": 0.00018266751710535131, |
|
"loss": 2.6092, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.052554171977644865, |
|
"grad_norm": 7.774232864379883, |
|
"learning_rate": 0.00018253644918337915, |
|
"loss": 1.2318, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.05275026963427787, |
|
"grad_norm": 5.396208763122559, |
|
"learning_rate": 0.00018240493495344694, |
|
"loss": 1.2144, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.05294636729091087, |
|
"grad_norm": 13.393839836120605, |
|
"learning_rate": 0.0001822729751267053, |
|
"loss": 2.4278, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.05314246494754388, |
|
"grad_norm": 14.873156547546387, |
|
"learning_rate": 0.00018214057041671434, |
|
"loss": 2.4015, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.05333856260417688, |
|
"grad_norm": 7.224187850952148, |
|
"learning_rate": 0.00018200772153943988, |
|
"loss": 1.578, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.053534660260809885, |
|
"grad_norm": 16.03417205810547, |
|
"learning_rate": 0.00018187442921324958, |
|
"loss": 2.4634, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.05373075791744289, |
|
"grad_norm": 6.284254550933838, |
|
"learning_rate": 0.00018174069415890888, |
|
"loss": 1.1995, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.05392685557407589, |
|
"grad_norm": 5.986271858215332, |
|
"learning_rate": 0.00018160651709957736, |
|
"loss": 1.8718, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.05412295323070889, |
|
"grad_norm": 13.663339614868164, |
|
"learning_rate": 0.00018147189876080463, |
|
"loss": 1.8664, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.054319050887341895, |
|
"grad_norm": 8.843401908874512, |
|
"learning_rate": 0.0001813368398705265, |
|
"loss": 4.1059, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.0545151485439749, |
|
"grad_norm": 6.658843994140625, |
|
"learning_rate": 0.00018120134115906096, |
|
"loss": 0.8269, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.0547112462006079, |
|
"grad_norm": 14.153685569763184, |
|
"learning_rate": 0.0001810654033591044, |
|
"loss": 1.9537, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.05490734385724091, |
|
"grad_norm": 5.585552215576172, |
|
"learning_rate": 0.00018092902720572745, |
|
"loss": 1.2531, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.05510344151387391, |
|
"grad_norm": 6.414156436920166, |
|
"learning_rate": 0.00018079221343637113, |
|
"loss": 0.9456, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.055299539170506916, |
|
"grad_norm": 6.674518585205078, |
|
"learning_rate": 0.00018065496279084283, |
|
"loss": 1.1899, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.05549563682713991, |
|
"grad_norm": 16.878883361816406, |
|
"learning_rate": 0.00018051727601131227, |
|
"loss": 0.8761, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.05569173448377292, |
|
"grad_norm": 31.99020004272461, |
|
"learning_rate": 0.0001803791538423076, |
|
"loss": 1.7235, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.05588783214040592, |
|
"grad_norm": 4.821253776550293, |
|
"learning_rate": 0.0001802405970307112, |
|
"loss": 1.2902, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.056083929797038926, |
|
"grad_norm": 7.2293267250061035, |
|
"learning_rate": 0.00018010160632575577, |
|
"loss": 1.0864, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.05628002745367193, |
|
"grad_norm": 8.262434959411621, |
|
"learning_rate": 0.00017996218247902035, |
|
"loss": 1.6246, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.056476125110304934, |
|
"grad_norm": 7.845222473144531, |
|
"learning_rate": 0.00017982232624442595, |
|
"loss": 1.7249, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.05667222276693794, |
|
"grad_norm": 9.781503677368164, |
|
"learning_rate": 0.0001796820383782319, |
|
"loss": 1.9911, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.056868320423570935, |
|
"grad_norm": 10.7435884475708, |
|
"learning_rate": 0.00017954131963903133, |
|
"loss": 1.4689, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.05706441808020394, |
|
"grad_norm": 8.709835052490234, |
|
"learning_rate": 0.00017940017078774747, |
|
"loss": 2.3939, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.057260515736836944, |
|
"grad_norm": 6.140249252319336, |
|
"learning_rate": 0.00017925859258762915, |
|
"loss": 2.0753, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.05745661339346995, |
|
"grad_norm": 9.725993156433105, |
|
"learning_rate": 0.00017911658580424704, |
|
"loss": 1.5315, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.05765271105010295, |
|
"grad_norm": 20.413393020629883, |
|
"learning_rate": 0.00017897415120548917, |
|
"loss": 2.4083, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.057848808706735956, |
|
"grad_norm": 7.881735324859619, |
|
"learning_rate": 0.00017883128956155706, |
|
"loss": 3.1099, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.05804490636336896, |
|
"grad_norm": 8.856921195983887, |
|
"learning_rate": 0.0001786880016449614, |
|
"loss": 0.9566, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.058241004020001964, |
|
"grad_norm": 25.80061912536621, |
|
"learning_rate": 0.0001785442882305179, |
|
"loss": 2.6205, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.05843710167663496, |
|
"grad_norm": 13.293787956237793, |
|
"learning_rate": 0.00017840015009534308, |
|
"loss": 1.7317, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.058633199333267966, |
|
"grad_norm": 11.313176155090332, |
|
"learning_rate": 0.00017825558801885016, |
|
"loss": 1.5122, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.05882929698990097, |
|
"grad_norm": 9.636054039001465, |
|
"learning_rate": 0.00017811060278274474, |
|
"loss": 2.1976, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.059025394646533974, |
|
"grad_norm": 11.10985279083252, |
|
"learning_rate": 0.00017796519517102066, |
|
"loss": 2.5926, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.05922149230316698, |
|
"grad_norm": 6.27458381652832, |
|
"learning_rate": 0.00017781936596995563, |
|
"loss": 2.5326, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.05941758995979998, |
|
"grad_norm": 10.134029388427734, |
|
"learning_rate": 0.00017767311596810715, |
|
"loss": 1.4142, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.059613687616432987, |
|
"grad_norm": 10.580477714538574, |
|
"learning_rate": 0.0001775264459563081, |
|
"loss": 2.3389, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.059809785273065984, |
|
"grad_norm": 7.545969009399414, |
|
"learning_rate": 0.00017737935672766257, |
|
"loss": 2.3728, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.06000588292969899, |
|
"grad_norm": 6.5107622146606445, |
|
"learning_rate": 0.00017723184907754154, |
|
"loss": 1.0087, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.06020198058633199, |
|
"grad_norm": 10.663747787475586, |
|
"learning_rate": 0.00017708392380357845, |
|
"loss": 2.2001, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.060398078242964996, |
|
"grad_norm": 10.211285591125488, |
|
"learning_rate": 0.0001769355817056651, |
|
"loss": 2.2537, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.060594175899598, |
|
"grad_norm": 8.512606620788574, |
|
"learning_rate": 0.00017678682358594728, |
|
"loss": 1.0687, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.060790273556231005, |
|
"grad_norm": 10.64330768585205, |
|
"learning_rate": 0.0001766376502488202, |
|
"loss": 2.4672, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.06098637121286401, |
|
"grad_norm": 9.972363471984863, |
|
"learning_rate": 0.0001764880625009245, |
|
"loss": 2.2277, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.06118246886949701, |
|
"grad_norm": 10.01198959350586, |
|
"learning_rate": 0.0001763380611511416, |
|
"loss": 1.906, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.06137856652613001, |
|
"grad_norm": 11.719582557678223, |
|
"learning_rate": 0.00017618764701058949, |
|
"loss": 2.4849, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.061574664182763014, |
|
"grad_norm": 9.08901309967041, |
|
"learning_rate": 0.0001760368208926182, |
|
"loss": 1.4476, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.06177076183939602, |
|
"grad_norm": 11.74946403503418, |
|
"learning_rate": 0.00017588558361280557, |
|
"loss": 1.3607, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.06196685949602902, |
|
"grad_norm": 6.549604892730713, |
|
"learning_rate": 0.00017573393598895276, |
|
"loss": 2.066, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.06216295715266203, |
|
"grad_norm": 5.160909652709961, |
|
"learning_rate": 0.00017558187884107978, |
|
"loss": 0.9799, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.06235905480929503, |
|
"grad_norm": 11.00747299194336, |
|
"learning_rate": 0.00017542941299142112, |
|
"loss": 2.3193, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.06255515246592804, |
|
"grad_norm": 4.467062473297119, |
|
"learning_rate": 0.00017527653926442135, |
|
"loss": 1.9208, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.06275125012256104, |
|
"grad_norm": 11.554845809936523, |
|
"learning_rate": 0.00017512325848673043, |
|
"loss": 1.5197, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.06294734777919404, |
|
"grad_norm": 6.626307964324951, |
|
"learning_rate": 0.0001749695714871996, |
|
"loss": 2.0222, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.06314344543582705, |
|
"grad_norm": 5.357675075531006, |
|
"learning_rate": 0.00017481547909687658, |
|
"loss": 2.3264, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.06333954309246005, |
|
"grad_norm": 9.451851844787598, |
|
"learning_rate": 0.00017466098214900124, |
|
"loss": 1.5699, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.06353564074909304, |
|
"grad_norm": 5.937804698944092, |
|
"learning_rate": 0.00017450608147900106, |
|
"loss": 1.832, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.06373173840572605, |
|
"grad_norm": 10.744606971740723, |
|
"learning_rate": 0.00017435077792448664, |
|
"loss": 1.4665, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.06392783606235905, |
|
"grad_norm": 9.900199890136719, |
|
"learning_rate": 0.0001741950723252471, |
|
"loss": 1.4785, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.06412393371899205, |
|
"grad_norm": 13.663531303405762, |
|
"learning_rate": 0.00017403896552324553, |
|
"loss": 0.9564, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.06432003137562506, |
|
"grad_norm": 4.708930015563965, |
|
"learning_rate": 0.00017388245836261464, |
|
"loss": 0.8293, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.06451612903225806, |
|
"grad_norm": 6.656357288360596, |
|
"learning_rate": 0.00017372555168965184, |
|
"loss": 2.2177, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.06471222668889107, |
|
"grad_norm": 6.349363803863525, |
|
"learning_rate": 0.00017356824635281502, |
|
"loss": 3.3319, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.06490832434552407, |
|
"grad_norm": 10.561055183410645, |
|
"learning_rate": 0.00017341054320271776, |
|
"loss": 1.919, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.06510442200215708, |
|
"grad_norm": 10.805617332458496, |
|
"learning_rate": 0.00017325244309212475, |
|
"loss": 2.9898, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.06530051965879008, |
|
"grad_norm": 9.67844009399414, |
|
"learning_rate": 0.0001730939468759472, |
|
"loss": 3.0727, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.06549661731542308, |
|
"grad_norm": 9.487295150756836, |
|
"learning_rate": 0.00017293505541123833, |
|
"loss": 1.2741, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.06569271497205609, |
|
"grad_norm": 5.027645111083984, |
|
"learning_rate": 0.00017277576955718847, |
|
"loss": 1.6962, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.06588881262868909, |
|
"grad_norm": 6.2696685791015625, |
|
"learning_rate": 0.0001726160901751207, |
|
"loss": 1.4388, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.0660849102853221, |
|
"grad_norm": 9.509906768798828, |
|
"learning_rate": 0.000172456018128486, |
|
"loss": 2.3173, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.0662810079419551, |
|
"grad_norm": 5.882188320159912, |
|
"learning_rate": 0.00017229555428285864, |
|
"loss": 1.1832, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.06647710559858809, |
|
"grad_norm": 8.857211112976074, |
|
"learning_rate": 0.00017213469950593156, |
|
"loss": 2.484, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.0666732032552211, |
|
"grad_norm": 10.660289764404297, |
|
"learning_rate": 0.00017197345466751158, |
|
"loss": 2.5169, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.0668693009118541, |
|
"grad_norm": 6.141770839691162, |
|
"learning_rate": 0.00017181182063951474, |
|
"loss": 1.601, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.0668693009118541, |
|
"eval_loss": 0.4308730363845825, |
|
"eval_runtime": 77.758, |
|
"eval_samples_per_second": 27.624, |
|
"eval_steps_per_second": 13.812, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.0670653985684871, |
|
"grad_norm": 9.668785095214844, |
|
"learning_rate": 0.00017164979829596165, |
|
"loss": 3.3758, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.06726149622512011, |
|
"grad_norm": 5.6129255294799805, |
|
"learning_rate": 0.00017148738851297256, |
|
"loss": 1.3369, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.06745759388175311, |
|
"grad_norm": 8.042420387268066, |
|
"learning_rate": 0.0001713245921687629, |
|
"loss": 1.918, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.06765369153838612, |
|
"grad_norm": 6.536214351654053, |
|
"learning_rate": 0.00017116141014363837, |
|
"loss": 0.97, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.06784978919501912, |
|
"grad_norm": 10.576717376708984, |
|
"learning_rate": 0.0001709978433199901, |
|
"loss": 2.0315, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.06804588685165212, |
|
"grad_norm": 13.645353317260742, |
|
"learning_rate": 0.00017083389258229013, |
|
"loss": 2.3226, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.06824198450828513, |
|
"grad_norm": 10.135254859924316, |
|
"learning_rate": 0.00017066955881708636, |
|
"loss": 2.4686, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.06843808216491813, |
|
"grad_norm": 5.7091569900512695, |
|
"learning_rate": 0.0001705048429129979, |
|
"loss": 1.633, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.06863417982155114, |
|
"grad_norm": 7.3178935050964355, |
|
"learning_rate": 0.0001703397457607103, |
|
"loss": 1.5411, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.06883027747818414, |
|
"grad_norm": 10.689863204956055, |
|
"learning_rate": 0.0001701742682529706, |
|
"loss": 2.8238, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.06902637513481714, |
|
"grad_norm": 4.083006381988525, |
|
"learning_rate": 0.00017000841128458265, |
|
"loss": 1.7831, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.06922247279145015, |
|
"grad_norm": 4.592381477355957, |
|
"learning_rate": 0.0001698421757524021, |
|
"loss": 1.1135, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.06941857044808314, |
|
"grad_norm": 9.44848346710205, |
|
"learning_rate": 0.00016967556255533174, |
|
"loss": 1.6301, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.06961466810471614, |
|
"grad_norm": 7.35884428024292, |
|
"learning_rate": 0.0001695085725943165, |
|
"loss": 1.6776, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.06981076576134915, |
|
"grad_norm": 7.0129475593566895, |
|
"learning_rate": 0.00016934120677233863, |
|
"loss": 1.1665, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.07000686341798215, |
|
"grad_norm": 11.612259864807129, |
|
"learning_rate": 0.00016917346599441276, |
|
"loss": 0.9044, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.07020296107461516, |
|
"grad_norm": 5.861693382263184, |
|
"learning_rate": 0.00016900535116758125, |
|
"loss": 1.5738, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.07039905873124816, |
|
"grad_norm": 4.579985618591309, |
|
"learning_rate": 0.00016883686320090884, |
|
"loss": 1.6305, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.07059515638788116, |
|
"grad_norm": 6.142041206359863, |
|
"learning_rate": 0.00016866800300547813, |
|
"loss": 0.6206, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.07079125404451417, |
|
"grad_norm": 3.1343085765838623, |
|
"learning_rate": 0.0001684987714943845, |
|
"loss": 0.7649, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.07098735170114717, |
|
"grad_norm": 5.473539352416992, |
|
"learning_rate": 0.00016832916958273118, |
|
"loss": 1.4839, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.07118344935778018, |
|
"grad_norm": 8.52957534790039, |
|
"learning_rate": 0.00016815919818762427, |
|
"loss": 2.5437, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.07137954701441318, |
|
"grad_norm": 6.140387058258057, |
|
"learning_rate": 0.00016798885822816786, |
|
"loss": 1.2282, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.07157564467104618, |
|
"grad_norm": 9.70067024230957, |
|
"learning_rate": 0.000167818150625459, |
|
"loss": 2.3286, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.07177174232767919, |
|
"grad_norm": 6.14666223526001, |
|
"learning_rate": 0.0001676470763025827, |
|
"loss": 1.3825, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.07196783998431219, |
|
"grad_norm": 10.377222061157227, |
|
"learning_rate": 0.0001674756361846071, |
|
"loss": 1.9278, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.0721639376409452, |
|
"grad_norm": 7.09204626083374, |
|
"learning_rate": 0.00016730383119857817, |
|
"loss": 1.2413, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.07236003529757819, |
|
"grad_norm": 12.448009490966797, |
|
"learning_rate": 0.00016713166227351497, |
|
"loss": 1.7415, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.07255613295421119, |
|
"grad_norm": 4.5050811767578125, |
|
"learning_rate": 0.00016695913034040454, |
|
"loss": 1.7608, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.0727522306108442, |
|
"grad_norm": 8.113517761230469, |
|
"learning_rate": 0.00016678623633219677, |
|
"loss": 1.8336, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.0729483282674772, |
|
"grad_norm": 7.662284851074219, |
|
"learning_rate": 0.00016661298118379948, |
|
"loss": 2.5928, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.0731444259241102, |
|
"grad_norm": 7.647708892822266, |
|
"learning_rate": 0.00016643936583207337, |
|
"loss": 3.006, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.07334052358074321, |
|
"grad_norm": 7.049048900604248, |
|
"learning_rate": 0.00016626539121582685, |
|
"loss": 1.9476, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.07353662123737621, |
|
"grad_norm": 4.551039218902588, |
|
"learning_rate": 0.000166091058275811, |
|
"loss": 2.2753, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.07373271889400922, |
|
"grad_norm": 8.680680274963379, |
|
"learning_rate": 0.00016591636795471455, |
|
"loss": 2.0624, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.07392881655064222, |
|
"grad_norm": 10.324525833129883, |
|
"learning_rate": 0.0001657413211971587, |
|
"loss": 1.6293, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.07412491420727522, |
|
"grad_norm": 5.324687957763672, |
|
"learning_rate": 0.0001655659189496921, |
|
"loss": 1.4987, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.07432101186390823, |
|
"grad_norm": 11.023552894592285, |
|
"learning_rate": 0.00016539016216078557, |
|
"loss": 1.9108, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.07451710952054123, |
|
"grad_norm": 6.6586594581604, |
|
"learning_rate": 0.00016521405178082722, |
|
"loss": 2.4198, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.07471320717717424, |
|
"grad_norm": 9.818254470825195, |
|
"learning_rate": 0.0001650375887621171, |
|
"loss": 2.162, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.07490930483380724, |
|
"grad_norm": 6.547168254852295, |
|
"learning_rate": 0.000164860774058862, |
|
"loss": 1.8307, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.07510540249044025, |
|
"grad_norm": 7.256173610687256, |
|
"learning_rate": 0.00016468360862717066, |
|
"loss": 0.9432, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.07530150014707324, |
|
"grad_norm": 10.348817825317383, |
|
"learning_rate": 0.00016450609342504813, |
|
"loss": 1.7824, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.07549759780370624, |
|
"grad_norm": 14.728826522827148, |
|
"learning_rate": 0.00016432822941239096, |
|
"loss": 3.1737, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.07569369546033924, |
|
"grad_norm": 7.894331932067871, |
|
"learning_rate": 0.00016415001755098175, |
|
"loss": 2.0515, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.07588979311697225, |
|
"grad_norm": 7.474819183349609, |
|
"learning_rate": 0.00016397145880448416, |
|
"loss": 3.1276, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.07608589077360525, |
|
"grad_norm": 6.2442626953125, |
|
"learning_rate": 0.00016379255413843754, |
|
"loss": 0.9294, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.07628198843023826, |
|
"grad_norm": 8.229086875915527, |
|
"learning_rate": 0.0001636133045202517, |
|
"loss": 2.122, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.07647808608687126, |
|
"grad_norm": 5.9484100341796875, |
|
"learning_rate": 0.0001634337109192019, |
|
"loss": 1.3164, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.07667418374350427, |
|
"grad_norm": 6.490477561950684, |
|
"learning_rate": 0.0001632537743064233, |
|
"loss": 1.1598, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.07687028140013727, |
|
"grad_norm": 36.34891128540039, |
|
"learning_rate": 0.000163073495654906, |
|
"loss": 2.4203, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.07706637905677027, |
|
"grad_norm": 5.949688911437988, |
|
"learning_rate": 0.00016289287593948952, |
|
"loss": 2.0893, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.07726247671340328, |
|
"grad_norm": 5.93861198425293, |
|
"learning_rate": 0.00016271191613685776, |
|
"loss": 1.829, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.07745857437003628, |
|
"grad_norm": 8.41703987121582, |
|
"learning_rate": 0.00016253061722553355, |
|
"loss": 2.1544, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.07765467202666929, |
|
"grad_norm": 8.92647647857666, |
|
"learning_rate": 0.00016234898018587337, |
|
"loss": 1.3316, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.07785076968330229, |
|
"grad_norm": 4.4784698486328125, |
|
"learning_rate": 0.0001621670060000622, |
|
"loss": 1.0305, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.0780468673399353, |
|
"grad_norm": 10.080864906311035, |
|
"learning_rate": 0.00016198469565210805, |
|
"loss": 2.1012, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.07824296499656828, |
|
"grad_norm": 8.505258560180664, |
|
"learning_rate": 0.0001618020501278367, |
|
"loss": 2.2348, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.07843906265320129, |
|
"grad_norm": 6.3116559982299805, |
|
"learning_rate": 0.00016161907041488635, |
|
"loss": 1.3726, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.07863516030983429, |
|
"grad_norm": 7.111060619354248, |
|
"learning_rate": 0.00016143575750270233, |
|
"loss": 1.5926, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.0788312579664673, |
|
"grad_norm": 5.481679916381836, |
|
"learning_rate": 0.0001612521123825317, |
|
"loss": 0.8062, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.0790273556231003, |
|
"grad_norm": 9.399847984313965, |
|
"learning_rate": 0.00016106813604741782, |
|
"loss": 1.7107, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.0792234532797333, |
|
"grad_norm": 7.784383296966553, |
|
"learning_rate": 0.00016088382949219524, |
|
"loss": 1.4449, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.07941955093636631, |
|
"grad_norm": 10.075730323791504, |
|
"learning_rate": 0.000160699193713484, |
|
"loss": 2.3277, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.07961564859299931, |
|
"grad_norm": 4.376564979553223, |
|
"learning_rate": 0.00016051422970968438, |
|
"loss": 1.3489, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.07981174624963232, |
|
"grad_norm": 28.959577560424805, |
|
"learning_rate": 0.00016032893848097165, |
|
"loss": 0.9703, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.08000784390626532, |
|
"grad_norm": 9.604166030883789, |
|
"learning_rate": 0.00016014332102929027, |
|
"loss": 2.195, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.08020394156289833, |
|
"grad_norm": 5.949897289276123, |
|
"learning_rate": 0.00015995737835834906, |
|
"loss": 0.8828, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.08040003921953133, |
|
"grad_norm": 9.921558380126953, |
|
"learning_rate": 0.00015977111147361507, |
|
"loss": 2.1569, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.08059613687616433, |
|
"grad_norm": 9.299111366271973, |
|
"learning_rate": 0.00015958452138230877, |
|
"loss": 1.4095, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.08079223453279734, |
|
"grad_norm": 8.806063652038574, |
|
"learning_rate": 0.00015939760909339823, |
|
"loss": 1.7785, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.08098833218943034, |
|
"grad_norm": 5.1091108322143555, |
|
"learning_rate": 0.00015921037561759377, |
|
"loss": 1.8415, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.08118442984606333, |
|
"grad_norm": 12.836872100830078, |
|
"learning_rate": 0.0001590228219673425, |
|
"loss": 1.8644, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.08138052750269634, |
|
"grad_norm": 6.567033767700195, |
|
"learning_rate": 0.00015883494915682289, |
|
"loss": 1.9557, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.08157662515932934, |
|
"grad_norm": 8.996990203857422, |
|
"learning_rate": 0.00015864675820193922, |
|
"loss": 1.888, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.08177272281596235, |
|
"grad_norm": 7.494692802429199, |
|
"learning_rate": 0.00015845825012031605, |
|
"loss": 2.4514, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.08196882047259535, |
|
"grad_norm": 5.421525478363037, |
|
"learning_rate": 0.0001582694259312928, |
|
"loss": 1.0314, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.08216491812922835, |
|
"grad_norm": 25.4990291595459, |
|
"learning_rate": 0.0001580802866559183, |
|
"loss": 0.9981, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.08236101578586136, |
|
"grad_norm": 7.055636405944824, |
|
"learning_rate": 0.00015789083331694506, |
|
"loss": 1.718, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.08255711344249436, |
|
"grad_norm": 8.986098289489746, |
|
"learning_rate": 0.00015770106693882387, |
|
"loss": 1.3962, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.08275321109912737, |
|
"grad_norm": 4.7463788986206055, |
|
"learning_rate": 0.0001575109885476983, |
|
"loss": 1.4039, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.08294930875576037, |
|
"grad_norm": 6.555906295776367, |
|
"learning_rate": 0.00015732059917139912, |
|
"loss": 1.0908, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.08314540641239337, |
|
"grad_norm": 7.233346462249756, |
|
"learning_rate": 0.00015712989983943863, |
|
"loss": 1.9737, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.08334150406902638, |
|
"grad_norm": 4.797744274139404, |
|
"learning_rate": 0.0001569388915830053, |
|
"loss": 1.2191, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.08353760172565938, |
|
"grad_norm": 6.303066253662109, |
|
"learning_rate": 0.000156747575434958, |
|
"loss": 1.8937, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.08373369938229239, |
|
"grad_norm": 12.17996597290039, |
|
"learning_rate": 0.00015655595242982048, |
|
"loss": 4.1888, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.08392979703892539, |
|
"grad_norm": 7.345923900604248, |
|
"learning_rate": 0.00015636402360377587, |
|
"loss": 1.0776, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.08412589469555838, |
|
"grad_norm": 7.688333034515381, |
|
"learning_rate": 0.0001561717899946609, |
|
"loss": 0.9205, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.08432199235219139, |
|
"grad_norm": 5.487595558166504, |
|
"learning_rate": 0.00015597925264196049, |
|
"loss": 1.4256, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.08451809000882439, |
|
"grad_norm": 7.871713161468506, |
|
"learning_rate": 0.0001557864125868019, |
|
"loss": 2.3536, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.0847141876654574, |
|
"grad_norm": 5.0042033195495605, |
|
"learning_rate": 0.00015559327087194942, |
|
"loss": 3.0954, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.0849102853220904, |
|
"grad_norm": 6.770071506500244, |
|
"learning_rate": 0.0001553998285417983, |
|
"loss": 1.5016, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.0851063829787234, |
|
"grad_norm": 6.884315490722656, |
|
"learning_rate": 0.00015520608664236949, |
|
"loss": 0.9995, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.0853024806353564, |
|
"grad_norm": 7.716054439544678, |
|
"learning_rate": 0.00015501204622130377, |
|
"loss": 1.525, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.08549857829198941, |
|
"grad_norm": 9.22014045715332, |
|
"learning_rate": 0.0001548177083278562, |
|
"loss": 1.7368, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.08569467594862241, |
|
"grad_norm": 11.081851959228516, |
|
"learning_rate": 0.0001546230740128904, |
|
"loss": 2.5292, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.08589077360525542, |
|
"grad_norm": 3.8471319675445557, |
|
"learning_rate": 0.0001544281443288728, |
|
"loss": 1.8473, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.08608687126188842, |
|
"grad_norm": 7.554915904998779, |
|
"learning_rate": 0.00015423292032986695, |
|
"loss": 1.4886, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.08628296891852143, |
|
"grad_norm": 5.459715843200684, |
|
"learning_rate": 0.00015403740307152805, |
|
"loss": 2.7721, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.08647906657515443, |
|
"grad_norm": 5.094114780426025, |
|
"learning_rate": 0.00015384159361109698, |
|
"loss": 0.6022, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.08667516423178744, |
|
"grad_norm": 4.084317684173584, |
|
"learning_rate": 0.00015364549300739467, |
|
"loss": 0.8871, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.08687126188842044, |
|
"grad_norm": 3.810145378112793, |
|
"learning_rate": 0.0001534491023208164, |
|
"loss": 2.2058, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.08706735954505343, |
|
"grad_norm": 9.405007362365723, |
|
"learning_rate": 0.000153252422613326, |
|
"loss": 2.414, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.08726345720168643, |
|
"grad_norm": 8.687947273254395, |
|
"learning_rate": 0.00015305545494845023, |
|
"loss": 0.8809, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.08745955485831944, |
|
"grad_norm": 6.396414756774902, |
|
"learning_rate": 0.00015285820039127293, |
|
"loss": 1.488, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.08765565251495244, |
|
"grad_norm": 4.447728157043457, |
|
"learning_rate": 0.00015266066000842917, |
|
"loss": 1.1921, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.08785175017158545, |
|
"grad_norm": 9.093881607055664, |
|
"learning_rate": 0.00015246283486809977, |
|
"loss": 1.6693, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.08804784782821845, |
|
"grad_norm": 8.160088539123535, |
|
"learning_rate": 0.0001522647260400053, |
|
"loss": 2.9597, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.08824394548485145, |
|
"grad_norm": 4.759898662567139, |
|
"learning_rate": 0.00015206633459540023, |
|
"loss": 1.121, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.08844004314148446, |
|
"grad_norm": 6.537527561187744, |
|
"learning_rate": 0.0001518676616070674, |
|
"loss": 1.1842, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.08863614079811746, |
|
"grad_norm": 3.6689343452453613, |
|
"learning_rate": 0.0001516687081493119, |
|
"loss": 1.3464, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.08883223845475047, |
|
"grad_norm": 13.292744636535645, |
|
"learning_rate": 0.00015146947529795567, |
|
"loss": 2.8284, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.08902833611138347, |
|
"grad_norm": 3.7040257453918457, |
|
"learning_rate": 0.0001512699641303312, |
|
"loss": 1.0223, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.08922443376801648, |
|
"grad_norm": 8.529545783996582, |
|
"learning_rate": 0.00015107017572527616, |
|
"loss": 1.9594, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.08942053142464948, |
|
"grad_norm": 11.990189552307129, |
|
"learning_rate": 0.00015087011116312718, |
|
"loss": 1.2631, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.08961662908128248, |
|
"grad_norm": 5.5952534675598145, |
|
"learning_rate": 0.00015066977152571428, |
|
"loss": 0.9935, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.08981272673791549, |
|
"grad_norm": 6.168736457824707, |
|
"learning_rate": 0.0001504691578963549, |
|
"loss": 0.9699, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.09000882439454848, |
|
"grad_norm": 6.674266338348389, |
|
"learning_rate": 0.0001502682713598481, |
|
"loss": 1.3238, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.09020492205118148, |
|
"grad_norm": 7.915739059448242, |
|
"learning_rate": 0.00015006711300246853, |
|
"loss": 1.4752, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.09040101970781449, |
|
"grad_norm": 9.858468055725098, |
|
"learning_rate": 0.00014986568391196092, |
|
"loss": 1.7595, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.09059711736444749, |
|
"grad_norm": 7.993055820465088, |
|
"learning_rate": 0.0001496639851775337, |
|
"loss": 1.6935, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.0907932150210805, |
|
"grad_norm": 8.133615493774414, |
|
"learning_rate": 0.00014946201788985358, |
|
"loss": 1.3022, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.0909893126777135, |
|
"grad_norm": 23.21516990661621, |
|
"learning_rate": 0.0001492597831410393, |
|
"loss": 1.9148, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.0911854103343465, |
|
"grad_norm": 5.873632431030273, |
|
"learning_rate": 0.00014905728202465595, |
|
"loss": 0.8053, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.09138150799097951, |
|
"grad_norm": 9.700281143188477, |
|
"learning_rate": 0.000148854515635709, |
|
"loss": 1.8187, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.09157760564761251, |
|
"grad_norm": 9.361727714538574, |
|
"learning_rate": 0.00014865148507063833, |
|
"loss": 1.5871, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.09177370330424552, |
|
"grad_norm": 6.021685600280762, |
|
"learning_rate": 0.00014844819142731223, |
|
"loss": 0.6632, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.09196980096087852, |
|
"grad_norm": 8.407185554504395, |
|
"learning_rate": 0.0001482446358050217, |
|
"loss": 2.1806, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.09216589861751152, |
|
"grad_norm": 6.5181450843811035, |
|
"learning_rate": 0.00014804081930447433, |
|
"loss": 1.1511, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.09236199627414453, |
|
"grad_norm": 6.047727584838867, |
|
"learning_rate": 0.00014783674302778832, |
|
"loss": 1.4668, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.09255809393077753, |
|
"grad_norm": 5.636353969573975, |
|
"learning_rate": 0.00014763240807848666, |
|
"loss": 1.2796, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.09275419158741054, |
|
"grad_norm": 10.02710247039795, |
|
"learning_rate": 0.00014742781556149103, |
|
"loss": 1.3822, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.09295028924404353, |
|
"grad_norm": 5.736359596252441, |
|
"learning_rate": 0.00014722296658311595, |
|
"loss": 1.6941, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.09314638690067653, |
|
"grad_norm": 4.80971622467041, |
|
"learning_rate": 0.0001470178622510627, |
|
"loss": 2.3505, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.09334248455730954, |
|
"grad_norm": 6.357244491577148, |
|
"learning_rate": 0.00014681250367441328, |
|
"loss": 3.0606, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.09353858221394254, |
|
"grad_norm": 6.115423202514648, |
|
"learning_rate": 0.00014660689196362462, |
|
"loss": 1.4211, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.09373467987057554, |
|
"grad_norm": 10.913143157958984, |
|
"learning_rate": 0.0001464010282305224, |
|
"loss": 1.6531, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.09393077752720855, |
|
"grad_norm": 7.621870517730713, |
|
"learning_rate": 0.000146194913588295, |
|
"loss": 1.964, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.09412687518384155, |
|
"grad_norm": 11.227121353149414, |
|
"learning_rate": 0.0001459885491514878, |
|
"loss": 1.639, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.09432297284047456, |
|
"grad_norm": 4.943283557891846, |
|
"learning_rate": 0.00014578193603599662, |
|
"loss": 0.5043, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.09451907049710756, |
|
"grad_norm": 13.006143569946289, |
|
"learning_rate": 0.0001455750753590622, |
|
"loss": 3.0382, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.09471516815374056, |
|
"grad_norm": 6.181257247924805, |
|
"learning_rate": 0.00014536796823926386, |
|
"loss": 1.3281, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.09491126581037357, |
|
"grad_norm": 4.05950403213501, |
|
"learning_rate": 0.0001451606157965136, |
|
"loss": 1.5701, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.09510736346700657, |
|
"grad_norm": 7.532302379608154, |
|
"learning_rate": 0.0001449530191520499, |
|
"loss": 1.811, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.09530346112363958, |
|
"grad_norm": 7.403939247131348, |
|
"learning_rate": 0.00014474517942843175, |
|
"loss": 1.8269, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.09549955878027258, |
|
"grad_norm": 18.15042495727539, |
|
"learning_rate": 0.0001445370977495326, |
|
"loss": 2.4586, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.09569565643690559, |
|
"grad_norm": 13.836175918579102, |
|
"learning_rate": 0.00014432877524053427, |
|
"loss": 1.7828, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.09589175409353858, |
|
"grad_norm": 5.9050822257995605, |
|
"learning_rate": 0.0001441202130279208, |
|
"loss": 2.7068, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.09608785175017158, |
|
"grad_norm": 7.173941612243652, |
|
"learning_rate": 0.0001439114122394724, |
|
"loss": 2.7424, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.09628394940680458, |
|
"grad_norm": 17.888187408447266, |
|
"learning_rate": 0.0001437023740042594, |
|
"loss": 2.0253, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.09648004706343759, |
|
"grad_norm": 6.492856025695801, |
|
"learning_rate": 0.00014349309945263606, |
|
"loss": 2.1604, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.09667614472007059, |
|
"grad_norm": 11.6004638671875, |
|
"learning_rate": 0.00014328358971623455, |
|
"loss": 2.1492, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.0968722423767036, |
|
"grad_norm": 6.262972354888916, |
|
"learning_rate": 0.00014307384592795872, |
|
"loss": 2.3007, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.0970683400333366, |
|
"grad_norm": 6.768704891204834, |
|
"learning_rate": 0.00014286386922197805, |
|
"loss": 0.6546, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.0972644376899696, |
|
"grad_norm": 4.372698783874512, |
|
"learning_rate": 0.0001426536607337215, |
|
"loss": 2.3647, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.09746053534660261, |
|
"grad_norm": 12.042864799499512, |
|
"learning_rate": 0.00014244322159987145, |
|
"loss": 2.1466, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.09765663300323561, |
|
"grad_norm": 5.374109745025635, |
|
"learning_rate": 0.0001422325529583573, |
|
"loss": 0.8599, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.09785273065986862, |
|
"grad_norm": 7.459904193878174, |
|
"learning_rate": 0.00014202165594834963, |
|
"loss": 1.7119, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.09804882831650162, |
|
"grad_norm": 8.25338363647461, |
|
"learning_rate": 0.00014181053171025392, |
|
"loss": 1.3014, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09824492597313463, |
|
"grad_norm": 8.594246864318848, |
|
"learning_rate": 0.00014159918138570424, |
|
"loss": 1.9332, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.09844102362976763, |
|
"grad_norm": 4.802338600158691, |
|
"learning_rate": 0.00014138760611755727, |
|
"loss": 1.6591, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.09863712128640063, |
|
"grad_norm": 5.049933433532715, |
|
"learning_rate": 0.00014117580704988612, |
|
"loss": 1.3722, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.09883321894303362, |
|
"grad_norm": 9.438610076904297, |
|
"learning_rate": 0.00014096378532797393, |
|
"loss": 2.1888, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.09902931659966663, |
|
"grad_norm": 11.144261360168457, |
|
"learning_rate": 0.00014075154209830792, |
|
"loss": 1.2526, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.09922541425629963, |
|
"grad_norm": 12.456216812133789, |
|
"learning_rate": 0.0001405390785085731, |
|
"loss": 2.8251, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.09942151191293264, |
|
"grad_norm": 13.78912353515625, |
|
"learning_rate": 0.00014032639570764593, |
|
"loss": 2.3312, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.09961760956956564, |
|
"grad_norm": 22.588031768798828, |
|
"learning_rate": 0.00014011349484558847, |
|
"loss": 1.8865, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.09981370722619864, |
|
"grad_norm": 5.357258319854736, |
|
"learning_rate": 0.00013990037707364166, |
|
"loss": 2.1296, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.10000980488283165, |
|
"grad_norm": 13.293752670288086, |
|
"learning_rate": 0.00013968704354421952, |
|
"loss": 2.0479, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.10020590253946465, |
|
"grad_norm": 6.764191150665283, |
|
"learning_rate": 0.00013947349541090274, |
|
"loss": 1.3908, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.10040200019609766, |
|
"grad_norm": 5.701320648193359, |
|
"learning_rate": 0.00013925973382843246, |
|
"loss": 1.1343, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.10059809785273066, |
|
"grad_norm": 6.728299140930176, |
|
"learning_rate": 0.0001390457599527039, |
|
"loss": 1.3219, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.10079419550936367, |
|
"grad_norm": 9.094908714294434, |
|
"learning_rate": 0.00013883157494076046, |
|
"loss": 1.1282, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.10099029316599667, |
|
"grad_norm": 7.321430683135986, |
|
"learning_rate": 0.00013861717995078708, |
|
"loss": 1.0628, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.10118639082262967, |
|
"grad_norm": 5.976141452789307, |
|
"learning_rate": 0.00013840257614210414, |
|
"loss": 1.5372, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.10138248847926268, |
|
"grad_norm": 7.5356926918029785, |
|
"learning_rate": 0.00013818776467516125, |
|
"loss": 2.0415, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.10157858613589568, |
|
"grad_norm": 5.656033992767334, |
|
"learning_rate": 0.00013797274671153092, |
|
"loss": 1.9171, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.10177468379252867, |
|
"grad_norm": 4.822179317474365, |
|
"learning_rate": 0.0001377575234139022, |
|
"loss": 1.1379, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.10197078144916168, |
|
"grad_norm": 2.5650954246520996, |
|
"learning_rate": 0.0001375420959460745, |
|
"loss": 0.3984, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.10216687910579468, |
|
"grad_norm": 6.997866630554199, |
|
"learning_rate": 0.00013732646547295126, |
|
"loss": 1.2149, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.10236297676242768, |
|
"grad_norm": 9.925884246826172, |
|
"learning_rate": 0.00013711063316053368, |
|
"loss": 1.9542, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.10255907441906069, |
|
"grad_norm": 7.913006782531738, |
|
"learning_rate": 0.00013689460017591432, |
|
"loss": 1.3162, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.1027551720756937, |
|
"grad_norm": 8.868382453918457, |
|
"learning_rate": 0.00013667836768727092, |
|
"loss": 2.8749, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.1029512697323267, |
|
"grad_norm": 7.418759346008301, |
|
"learning_rate": 0.00013646193686386, |
|
"loss": 1.2697, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.1031473673889597, |
|
"grad_norm": 9.501752853393555, |
|
"learning_rate": 0.00013624530887601055, |
|
"loss": 1.1473, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.1033434650455927, |
|
"grad_norm": 6.225274562835693, |
|
"learning_rate": 0.0001360284848951177, |
|
"loss": 1.0762, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.10353956270222571, |
|
"grad_norm": 6.502389907836914, |
|
"learning_rate": 0.0001358114660936364, |
|
"loss": 1.383, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.10373566035885871, |
|
"grad_norm": 17.53512954711914, |
|
"learning_rate": 0.00013559425364507508, |
|
"loss": 1.7968, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.10393175801549172, |
|
"grad_norm": 6.0248122215271, |
|
"learning_rate": 0.00013537684872398927, |
|
"loss": 1.2982, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.10412785567212472, |
|
"grad_norm": 4.621517658233643, |
|
"learning_rate": 0.00013515925250597537, |
|
"loss": 1.5811, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.10432395332875773, |
|
"grad_norm": 6.690089702606201, |
|
"learning_rate": 0.00013494146616766406, |
|
"loss": 1.3376, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.10452005098539073, |
|
"grad_norm": 6.418732643127441, |
|
"learning_rate": 0.00013472349088671418, |
|
"loss": 1.507, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.10471614864202373, |
|
"grad_norm": 12.271800994873047, |
|
"learning_rate": 0.0001345053278418062, |
|
"loss": 1.6329, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.10491224629865673, |
|
"grad_norm": 10.394608497619629, |
|
"learning_rate": 0.000134286978212636, |
|
"loss": 1.8793, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.10510834395528973, |
|
"grad_norm": 5.690524101257324, |
|
"learning_rate": 0.00013406844317990826, |
|
"loss": 2.2874, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.10530444161192273, |
|
"grad_norm": 4.313036918640137, |
|
"learning_rate": 0.00013384972392533034, |
|
"loss": 2.2905, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.10550053926855574, |
|
"grad_norm": 6.115211009979248, |
|
"learning_rate": 0.0001336308216316056, |
|
"loss": 2.3146, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.10569663692518874, |
|
"grad_norm": 7.43934965133667, |
|
"learning_rate": 0.0001334117374824274, |
|
"loss": 2.5837, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.10589273458182175, |
|
"grad_norm": 8.397297859191895, |
|
"learning_rate": 0.00013319247266247225, |
|
"loss": 2.1631, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.10608883223845475, |
|
"grad_norm": 5.273848056793213, |
|
"learning_rate": 0.00013297302835739376, |
|
"loss": 1.261, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.10628492989508775, |
|
"grad_norm": 5.789434909820557, |
|
"learning_rate": 0.00013275340575381598, |
|
"loss": 1.8465, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.10648102755172076, |
|
"grad_norm": 8.769668579101562, |
|
"learning_rate": 0.0001325336060393272, |
|
"loss": 1.3243, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.10667712520835376, |
|
"grad_norm": 7.573061943054199, |
|
"learning_rate": 0.00013231363040247334, |
|
"loss": 1.243, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.10687322286498677, |
|
"grad_norm": 9.284170150756836, |
|
"learning_rate": 0.00013209348003275165, |
|
"loss": 1.6408, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.10706932052161977, |
|
"grad_norm": 4.322136878967285, |
|
"learning_rate": 0.00013187315612060415, |
|
"loss": 3.2532, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.10726541817825277, |
|
"grad_norm": 7.865033149719238, |
|
"learning_rate": 0.00013165265985741142, |
|
"loss": 3.5798, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.10746151583488578, |
|
"grad_norm": 3.1747426986694336, |
|
"learning_rate": 0.00013143199243548587, |
|
"loss": 1.1817, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.10765761349151878, |
|
"grad_norm": 6.890042781829834, |
|
"learning_rate": 0.00013121115504806553, |
|
"loss": 2.0318, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.10785371114815177, |
|
"grad_norm": 7.632523059844971, |
|
"learning_rate": 0.00013099014888930748, |
|
"loss": 1.3925, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.10804980880478478, |
|
"grad_norm": 4.939361095428467, |
|
"learning_rate": 0.00013076897515428132, |
|
"loss": 1.3661, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.10824590646141778, |
|
"grad_norm": 6.433002948760986, |
|
"learning_rate": 0.00013054763503896294, |
|
"loss": 0.9917, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.10844200411805079, |
|
"grad_norm": 4.507363319396973, |
|
"learning_rate": 0.00013032612974022784, |
|
"loss": 0.9992, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.10863810177468379, |
|
"grad_norm": 9.477548599243164, |
|
"learning_rate": 0.00013010446045584479, |
|
"loss": 1.1149, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.1088341994313168, |
|
"grad_norm": 5.909473419189453, |
|
"learning_rate": 0.00012988262838446922, |
|
"loss": 2.0005, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.1090302970879498, |
|
"grad_norm": 8.154961585998535, |
|
"learning_rate": 0.00012966063472563685, |
|
"loss": 2.1173, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.1092263947445828, |
|
"grad_norm": 4.8719868659973145, |
|
"learning_rate": 0.00012943848067975718, |
|
"loss": 2.925, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.1094224924012158, |
|
"grad_norm": 5.895204544067383, |
|
"learning_rate": 0.00012921616744810698, |
|
"loss": 2.0375, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.10961859005784881, |
|
"grad_norm": 5.050942897796631, |
|
"learning_rate": 0.0001289936962328238, |
|
"loss": 1.6125, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.10981468771448182, |
|
"grad_norm": 5.890807151794434, |
|
"learning_rate": 0.0001287710682368995, |
|
"loss": 0.991, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.11001078537111482, |
|
"grad_norm": 5.316340446472168, |
|
"learning_rate": 0.00012854828466417364, |
|
"loss": 1.8846, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.11020688302774782, |
|
"grad_norm": 14.81187629699707, |
|
"learning_rate": 0.00012832534671932715, |
|
"loss": 2.141, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.11040298068438083, |
|
"grad_norm": 11.379783630371094, |
|
"learning_rate": 0.0001281022556078756, |
|
"loss": 2.9432, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.11059907834101383, |
|
"grad_norm": 17.28523826599121, |
|
"learning_rate": 0.0001278790125361629, |
|
"loss": 2.4028, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.11079517599764682, |
|
"grad_norm": 6.626383304595947, |
|
"learning_rate": 0.0001276556187113546, |
|
"loss": 1.1534, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.11099127365427983, |
|
"grad_norm": 7.6215128898620605, |
|
"learning_rate": 0.00012743207534143144, |
|
"loss": 1.6464, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.11118737131091283, |
|
"grad_norm": 8.348710060119629, |
|
"learning_rate": 0.00012720838363518286, |
|
"loss": 1.1823, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.11138346896754583, |
|
"grad_norm": 5.307929039001465, |
|
"learning_rate": 0.00012698454480220039, |
|
"loss": 2.3759, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.11157956662417884, |
|
"grad_norm": 4.690614223480225, |
|
"learning_rate": 0.00012676056005287106, |
|
"loss": 1.3698, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.11177566428081184, |
|
"grad_norm": 5.7534871101379395, |
|
"learning_rate": 0.00012653643059837107, |
|
"loss": 1.9587, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.11197176193744485, |
|
"grad_norm": 5.088260650634766, |
|
"learning_rate": 0.00012631215765065897, |
|
"loss": 1.0195, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.11216785959407785, |
|
"grad_norm": 5.987957000732422, |
|
"learning_rate": 0.0001260877424224693, |
|
"loss": 2.3612, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.11236395725071086, |
|
"grad_norm": 5.895928859710693, |
|
"learning_rate": 0.000125863186127306, |
|
"loss": 2.0393, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.11256005490734386, |
|
"grad_norm": 6.654543876647949, |
|
"learning_rate": 0.00012563848997943568, |
|
"loss": 1.7165, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.11275615256397686, |
|
"grad_norm": 13.193924903869629, |
|
"learning_rate": 0.00012541365519388138, |
|
"loss": 1.3969, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.11295225022060987, |
|
"grad_norm": 5.471423149108887, |
|
"learning_rate": 0.00012518868298641564, |
|
"loss": 1.091, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.11314834787724287, |
|
"grad_norm": 9.188228607177734, |
|
"learning_rate": 0.00012496357457355422, |
|
"loss": 2.0812, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.11334444553387588, |
|
"grad_norm": 11.622628211975098, |
|
"learning_rate": 0.00012473833117254931, |
|
"loss": 2.1117, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.11354054319050888, |
|
"grad_norm": 4.972383975982666, |
|
"learning_rate": 0.00012451295400138314, |
|
"loss": 1.3126, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.11373664084714187, |
|
"grad_norm": 7.87300968170166, |
|
"learning_rate": 0.0001242874442787611, |
|
"loss": 3.2112, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.11393273850377487, |
|
"grad_norm": 5.145363807678223, |
|
"learning_rate": 0.0001240618032241056, |
|
"loss": 1.2819, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.11412883616040788, |
|
"grad_norm": 12.539934158325195, |
|
"learning_rate": 0.00012383603205754894, |
|
"loss": 2.2262, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.11432493381704088, |
|
"grad_norm": 5.802661895751953, |
|
"learning_rate": 0.00012361013199992726, |
|
"loss": 1.9663, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.11452103147367389, |
|
"grad_norm": 16.773561477661133, |
|
"learning_rate": 0.0001233841042727734, |
|
"loss": 2.342, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.11471712913030689, |
|
"grad_norm": 6.163604259490967, |
|
"learning_rate": 0.0001231579500983108, |
|
"loss": 1.3009, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.1149132267869399, |
|
"grad_norm": 7.877258777618408, |
|
"learning_rate": 0.00012293167069944645, |
|
"loss": 1.9388, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.1151093244435729, |
|
"grad_norm": 13.269306182861328, |
|
"learning_rate": 0.00012270526729976465, |
|
"loss": 2.3028, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.1153054221002059, |
|
"grad_norm": 9.610801696777344, |
|
"learning_rate": 0.00012247874112352009, |
|
"loss": 1.0249, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.11550151975683891, |
|
"grad_norm": 11.20408821105957, |
|
"learning_rate": 0.00012225209339563145, |
|
"loss": 2.2807, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.11569761741347191, |
|
"grad_norm": 13.390026092529297, |
|
"learning_rate": 0.00012202532534167463, |
|
"loss": 1.7141, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.11589371507010492, |
|
"grad_norm": 4.4530229568481445, |
|
"learning_rate": 0.00012179843818787624, |
|
"loss": 1.4108, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.11608981272673792, |
|
"grad_norm": 12.671486854553223, |
|
"learning_rate": 0.00012157143316110684, |
|
"loss": 1.6154, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.11628591038337092, |
|
"grad_norm": 6.195436477661133, |
|
"learning_rate": 0.00012134431148887442, |
|
"loss": 3.4713, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.11648200804000393, |
|
"grad_norm": 5.860586643218994, |
|
"learning_rate": 0.00012111707439931776, |
|
"loss": 2.0938, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.11667810569663692, |
|
"grad_norm": 7.4623260498046875, |
|
"learning_rate": 0.00012088972312119964, |
|
"loss": 1.9674, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.11687420335326992, |
|
"grad_norm": 7.368410110473633, |
|
"learning_rate": 0.00012066225888390042, |
|
"loss": 1.3425, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.11707030100990293, |
|
"grad_norm": 7.527665138244629, |
|
"learning_rate": 0.00012043468291741116, |
|
"loss": 1.6366, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.11726639866653593, |
|
"grad_norm": 6.0890793800354, |
|
"learning_rate": 0.00012020699645232721, |
|
"loss": 1.9652, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.11746249632316894, |
|
"grad_norm": 6.350461959838867, |
|
"learning_rate": 0.00011997920071984133, |
|
"loss": 2.2867, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.11765859397980194, |
|
"grad_norm": 6.444486618041992, |
|
"learning_rate": 0.00011975129695173719, |
|
"loss": 0.7242, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.11785469163643494, |
|
"grad_norm": 7.270365238189697, |
|
"learning_rate": 0.00011952328638038261, |
|
"loss": 2.2346, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.11805078929306795, |
|
"grad_norm": 5.961056709289551, |
|
"learning_rate": 0.00011929517023872298, |
|
"loss": 2.4926, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.11824688694970095, |
|
"grad_norm": 5.347055435180664, |
|
"learning_rate": 0.00011906694976027455, |
|
"loss": 1.3998, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.11844298460633396, |
|
"grad_norm": 11.233309745788574, |
|
"learning_rate": 0.0001188386261791177, |
|
"loss": 2.0185, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.11863908226296696, |
|
"grad_norm": 4.813647747039795, |
|
"learning_rate": 0.0001186102007298904, |
|
"loss": 1.3932, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.11883517991959996, |
|
"grad_norm": 5.281280040740967, |
|
"learning_rate": 0.00011838167464778142, |
|
"loss": 1.7445, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.11903127757623297, |
|
"grad_norm": 8.755961418151855, |
|
"learning_rate": 0.00011815304916852372, |
|
"loss": 1.656, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.11922737523286597, |
|
"grad_norm": 15.501289367675781, |
|
"learning_rate": 0.00011792432552838772, |
|
"loss": 2.0894, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.11942347288949898, |
|
"grad_norm": 6.1117753982543945, |
|
"learning_rate": 0.00011769550496417466, |
|
"loss": 1.0473, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.11961957054613197, |
|
"grad_norm": 9.669829368591309, |
|
"learning_rate": 0.00011746658871320983, |
|
"loss": 0.7436, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.11981566820276497, |
|
"grad_norm": 10.253652572631836, |
|
"learning_rate": 0.00011723757801333601, |
|
"loss": 2.7502, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.12001176585939798, |
|
"grad_norm": 7.084132671356201, |
|
"learning_rate": 0.00011700847410290667, |
|
"loss": 2.5537, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.12020786351603098, |
|
"grad_norm": 5.936439037322998, |
|
"learning_rate": 0.0001167792782207793, |
|
"loss": 1.1551, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.12040396117266398, |
|
"grad_norm": 7.053248405456543, |
|
"learning_rate": 0.0001165499916063087, |
|
"loss": 1.2488, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.12060005882929699, |
|
"grad_norm": 7.141607284545898, |
|
"learning_rate": 0.00011632061549934036, |
|
"loss": 1.4092, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.12079615648592999, |
|
"grad_norm": 7.723854064941406, |
|
"learning_rate": 0.00011609115114020363, |
|
"loss": 1.7669, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.120992254142563, |
|
"grad_norm": 7.095085144042969, |
|
"learning_rate": 0.00011586159976970514, |
|
"loss": 1.5018, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.121188351799196, |
|
"grad_norm": 6.469241142272949, |
|
"learning_rate": 0.00011563196262912193, |
|
"loss": 2.3377, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.121384449455829, |
|
"grad_norm": 6.16892671585083, |
|
"learning_rate": 0.00011540224096019494, |
|
"loss": 2.0011, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.12158054711246201, |
|
"grad_norm": 4.764594554901123, |
|
"learning_rate": 0.00011517243600512209, |
|
"loss": 0.8995, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.12177664476909501, |
|
"grad_norm": 8.256946563720703, |
|
"learning_rate": 0.00011494254900655175, |
|
"loss": 1.5867, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.12197274242572802, |
|
"grad_norm": 9.220061302185059, |
|
"learning_rate": 0.00011471258120757586, |
|
"loss": 1.0146, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.12216884008236102, |
|
"grad_norm": 6.284097194671631, |
|
"learning_rate": 0.00011448253385172335, |
|
"loss": 0.8305, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.12236493773899403, |
|
"grad_norm": 6.183596134185791, |
|
"learning_rate": 0.00011425240818295326, |
|
"loss": 0.7749, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.12256103539562702, |
|
"grad_norm": 7.550481796264648, |
|
"learning_rate": 0.00011402220544564819, |
|
"loss": 1.3157, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.12275713305226002, |
|
"grad_norm": 4.810642719268799, |
|
"learning_rate": 0.0001137919268846074, |
|
"loss": 1.6442, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.12295323070889302, |
|
"grad_norm": 10.603952407836914, |
|
"learning_rate": 0.00011356157374504027, |
|
"loss": 1.7275, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.12314932836552603, |
|
"grad_norm": 13.009543418884277, |
|
"learning_rate": 0.00011333114727255929, |
|
"loss": 2.3718, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.12334542602215903, |
|
"grad_norm": 8.453059196472168, |
|
"learning_rate": 0.00011310064871317366, |
|
"loss": 1.6558, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.12354152367879204, |
|
"grad_norm": 7.005093574523926, |
|
"learning_rate": 0.00011287007931328226, |
|
"loss": 1.9709, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.12373762133542504, |
|
"grad_norm": 5.710804462432861, |
|
"learning_rate": 0.00011263944031966714, |
|
"loss": 1.8595, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.12393371899205805, |
|
"grad_norm": 3.608022689819336, |
|
"learning_rate": 0.00011240873297948653, |
|
"loss": 1.7165, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.12412981664869105, |
|
"grad_norm": 8.973870277404785, |
|
"learning_rate": 0.0001121779585402684, |
|
"loss": 1.4924, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.12432591430532405, |
|
"grad_norm": 4.735976219177246, |
|
"learning_rate": 0.00011194711824990344, |
|
"loss": 1.2721, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.12452201196195706, |
|
"grad_norm": 4.169306755065918, |
|
"learning_rate": 0.00011171621335663844, |
|
"loss": 0.718, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.12471810961859006, |
|
"grad_norm": 14.129206657409668, |
|
"learning_rate": 0.00011148524510906956, |
|
"loss": 1.1904, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.12491420727522307, |
|
"grad_norm": 12.76352310180664, |
|
"learning_rate": 0.00011125421475613554, |
|
"loss": 2.4872, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.12511030493185607, |
|
"grad_norm": 8.636000633239746, |
|
"learning_rate": 0.00011102312354711091, |
|
"loss": 0.7629, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.12530640258848907, |
|
"grad_norm": 6.22203254699707, |
|
"learning_rate": 0.00011079197273159925, |
|
"loss": 1.7372, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.12550250024512208, |
|
"grad_norm": 4.235958576202393, |
|
"learning_rate": 0.0001105607635595266, |
|
"loss": 1.8661, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.12569859790175508, |
|
"grad_norm": 5.598705768585205, |
|
"learning_rate": 0.00011032949728113437, |
|
"loss": 1.7918, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.1258946955583881, |
|
"grad_norm": 9.218809127807617, |
|
"learning_rate": 0.00011009817514697291, |
|
"loss": 1.2968, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.1260907932150211, |
|
"grad_norm": 7.068932056427002, |
|
"learning_rate": 0.00010986679840789451, |
|
"loss": 1.8485, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.1262868908716541, |
|
"grad_norm": 3.891483783721924, |
|
"learning_rate": 0.00010963536831504684, |
|
"loss": 0.3918, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.1264829885282871, |
|
"grad_norm": 5.959576606750488, |
|
"learning_rate": 0.00010940388611986592, |
|
"loss": 0.7057, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.1266790861849201, |
|
"grad_norm": 11.663713455200195, |
|
"learning_rate": 0.00010917235307406966, |
|
"loss": 1.5916, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.12687518384155308, |
|
"grad_norm": 15.05775260925293, |
|
"learning_rate": 0.00010894077042965083, |
|
"loss": 2.979, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.12707128149818608, |
|
"grad_norm": 7.2112650871276855, |
|
"learning_rate": 0.00010870913943887049, |
|
"loss": 1.6293, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.1272673791548191, |
|
"grad_norm": 13.646367073059082, |
|
"learning_rate": 0.00010847746135425102, |
|
"loss": 2.5277, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.1274634768114521, |
|
"grad_norm": 6.29921293258667, |
|
"learning_rate": 0.0001082457374285696, |
|
"loss": 2.4015, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.1276595744680851, |
|
"grad_norm": 6.337920188903809, |
|
"learning_rate": 0.00010801396891485114, |
|
"loss": 0.9928, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.1278556721247181, |
|
"grad_norm": 7.32103157043457, |
|
"learning_rate": 0.00010778215706636177, |
|
"loss": 2.4466, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.1280517697813511, |
|
"grad_norm": 10.154195785522461, |
|
"learning_rate": 0.00010755030313660188, |
|
"loss": 2.4858, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.1282478674379841, |
|
"grad_norm": 7.129855632781982, |
|
"learning_rate": 0.00010731840837929946, |
|
"loss": 1.2546, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.1284439650946171, |
|
"grad_norm": 5.403641223907471, |
|
"learning_rate": 0.00010708647404840319, |
|
"loss": 1.1017, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.12864006275125012, |
|
"grad_norm": 5.554705619812012, |
|
"learning_rate": 0.00010685450139807584, |
|
"loss": 2.0789, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.12883616040788312, |
|
"grad_norm": 7.935024738311768, |
|
"learning_rate": 0.00010662249168268736, |
|
"loss": 1.9944, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.12903225806451613, |
|
"grad_norm": 4.453607559204102, |
|
"learning_rate": 0.00010639044615680809, |
|
"loss": 0.8442, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.12922835572114913, |
|
"grad_norm": 13.252726554870605, |
|
"learning_rate": 0.00010615836607520202, |
|
"loss": 1.1374, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.12942445337778213, |
|
"grad_norm": 4.426390171051025, |
|
"learning_rate": 0.00010592625269282, |
|
"loss": 1.0554, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.12962055103441514, |
|
"grad_norm": 6.692317008972168, |
|
"learning_rate": 0.000105694107264793, |
|
"loss": 0.6669, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.12981664869104814, |
|
"grad_norm": 6.067706108093262, |
|
"learning_rate": 0.00010546193104642519, |
|
"loss": 1.488, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.13001274634768115, |
|
"grad_norm": 7.749240875244141, |
|
"learning_rate": 0.00010522972529318733, |
|
"loss": 1.8632, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.13020884400431415, |
|
"grad_norm": 4.964468955993652, |
|
"learning_rate": 0.00010499749126070979, |
|
"loss": 0.5845, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.13040494166094715, |
|
"grad_norm": 6.432995319366455, |
|
"learning_rate": 0.00010476523020477593, |
|
"loss": 1.1617, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.13060103931758016, |
|
"grad_norm": 6.4099249839782715, |
|
"learning_rate": 0.00010453294338131519, |
|
"loss": 1.2821, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.13079713697421316, |
|
"grad_norm": 5.935438632965088, |
|
"learning_rate": 0.0001043006320463964, |
|
"loss": 1.4453, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.13099323463084617, |
|
"grad_norm": 10.963349342346191, |
|
"learning_rate": 0.00010406829745622085, |
|
"loss": 1.6318, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.13118933228747917, |
|
"grad_norm": 9.157288551330566, |
|
"learning_rate": 0.00010383594086711567, |
|
"loss": 1.6981, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.13138542994411218, |
|
"grad_norm": 17.15442657470703, |
|
"learning_rate": 0.00010360356353552687, |
|
"loss": 1.498, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.13158152760074518, |
|
"grad_norm": 3.763289451599121, |
|
"learning_rate": 0.00010337116671801272, |
|
"loss": 2.425, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.13177762525737818, |
|
"grad_norm": 8.91812801361084, |
|
"learning_rate": 0.00010313875167123672, |
|
"loss": 2.3129, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.1319737229140112, |
|
"grad_norm": 23.093202590942383, |
|
"learning_rate": 0.00010290631965196109, |
|
"loss": 2.3172, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.1321698205706442, |
|
"grad_norm": 6.487079620361328, |
|
"learning_rate": 0.00010267387191703972, |
|
"loss": 1.6108, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.1323659182272772, |
|
"grad_norm": 3.9093501567840576, |
|
"learning_rate": 0.00010244140972341155, |
|
"loss": 0.8954, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.1325620158839102, |
|
"grad_norm": 11.078289985656738, |
|
"learning_rate": 0.00010220893432809365, |
|
"loss": 2.0604, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.13275811354054318, |
|
"grad_norm": 7.805664539337158, |
|
"learning_rate": 0.00010197644698817446, |
|
"loss": 1.834, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.13295421119717618, |
|
"grad_norm": 6.838125705718994, |
|
"learning_rate": 0.00010174394896080713, |
|
"loss": 1.4876, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.13315030885380919, |
|
"grad_norm": 3.9814906120300293, |
|
"learning_rate": 0.00010151144150320247, |
|
"loss": 1.3479, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.1333464065104422, |
|
"grad_norm": 6.396786212921143, |
|
"learning_rate": 0.00010127892587262233, |
|
"loss": 1.7642, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.1335425041670752, |
|
"grad_norm": 10.133893013000488, |
|
"learning_rate": 0.00010104640332637278, |
|
"loss": 0.8957, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.1337386018237082, |
|
"grad_norm": 6.3815412521362305, |
|
"learning_rate": 0.00010081387512179729, |
|
"loss": 1.4104, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.1337386018237082, |
|
"eval_loss": 0.41039586067199707, |
|
"eval_runtime": 77.8442, |
|
"eval_samples_per_second": 27.594, |
|
"eval_steps_per_second": 13.797, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.1339346994803412, |
|
"grad_norm": 11.698119163513184, |
|
"learning_rate": 0.00010058134251626983, |
|
"loss": 2.1025, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.1341307971369742, |
|
"grad_norm": 5.185816287994385, |
|
"learning_rate": 0.00010034880676718832, |
|
"loss": 1.0986, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.1343268947936072, |
|
"grad_norm": 6.936627388000488, |
|
"learning_rate": 0.0001001162691319675, |
|
"loss": 1.3077, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.13452299245024021, |
|
"grad_norm": 4.661152362823486, |
|
"learning_rate": 9.988373086803251e-05, |
|
"loss": 1.6312, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.13471909010687322, |
|
"grad_norm": 4.227341651916504, |
|
"learning_rate": 9.965119323281174e-05, |
|
"loss": 0.6856, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.13491518776350622, |
|
"grad_norm": 24.00637435913086, |
|
"learning_rate": 9.941865748373017e-05, |
|
"loss": 2.8267, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.13511128542013923, |
|
"grad_norm": 4.701898574829102, |
|
"learning_rate": 9.918612487820273e-05, |
|
"loss": 1.7314, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.13530738307677223, |
|
"grad_norm": 7.700753688812256, |
|
"learning_rate": 9.895359667362724e-05, |
|
"loss": 1.1831, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.13550348073340523, |
|
"grad_norm": 5.045268535614014, |
|
"learning_rate": 9.87210741273777e-05, |
|
"loss": 2.6148, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.13569957839003824, |
|
"grad_norm": 8.903031349182129, |
|
"learning_rate": 9.848855849679754e-05, |
|
"loss": 2.3442, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.13589567604667124, |
|
"grad_norm": 6.869392395019531, |
|
"learning_rate": 9.82560510391929e-05, |
|
"loss": 1.7329, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.13609177370330425, |
|
"grad_norm": 5.181933403015137, |
|
"learning_rate": 9.802355301182556e-05, |
|
"loss": 2.2152, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.13628787135993725, |
|
"grad_norm": 9.427200317382812, |
|
"learning_rate": 9.779106567190642e-05, |
|
"loss": 0.9606, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.13648396901657026, |
|
"grad_norm": 5.893560409545898, |
|
"learning_rate": 9.755859027658848e-05, |
|
"loss": 0.7759, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.13668006667320326, |
|
"grad_norm": 3.864348888397217, |
|
"learning_rate": 9.73261280829603e-05, |
|
"loss": 0.714, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.13687616432983626, |
|
"grad_norm": 7.077169418334961, |
|
"learning_rate": 9.709368034803892e-05, |
|
"loss": 1.9489, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.13707226198646927, |
|
"grad_norm": 10.102705955505371, |
|
"learning_rate": 9.686124832876328e-05, |
|
"loss": 2.2884, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.13726835964310227, |
|
"grad_norm": 6.154596328735352, |
|
"learning_rate": 9.662883328198731e-05, |
|
"loss": 1.1614, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.13746445729973528, |
|
"grad_norm": 8.005276679992676, |
|
"learning_rate": 9.639643646447316e-05, |
|
"loss": 2.5097, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.13766055495636828, |
|
"grad_norm": 12.45338249206543, |
|
"learning_rate": 9.616405913288438e-05, |
|
"loss": 0.8686, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.13785665261300128, |
|
"grad_norm": 4.405421257019043, |
|
"learning_rate": 9.593170254377916e-05, |
|
"loss": 0.8499, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.1380527502696343, |
|
"grad_norm": 9.178966522216797, |
|
"learning_rate": 9.569936795360363e-05, |
|
"loss": 1.7378, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.1382488479262673, |
|
"grad_norm": 9.432720184326172, |
|
"learning_rate": 9.546705661868484e-05, |
|
"loss": 1.5712, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.1384449455829003, |
|
"grad_norm": 5.602529048919678, |
|
"learning_rate": 9.523476979522411e-05, |
|
"loss": 1.8486, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.1386410432395333, |
|
"grad_norm": 7.334858417510986, |
|
"learning_rate": 9.50025087392902e-05, |
|
"loss": 1.3093, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.13883714089616628, |
|
"grad_norm": 16.70349884033203, |
|
"learning_rate": 9.477027470681269e-05, |
|
"loss": 1.8117, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.13903323855279928, |
|
"grad_norm": 4.915411472320557, |
|
"learning_rate": 9.453806895357482e-05, |
|
"loss": 1.296, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.1392293362094323, |
|
"grad_norm": 9.790595054626465, |
|
"learning_rate": 9.430589273520703e-05, |
|
"loss": 1.7763, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.1394254338660653, |
|
"grad_norm": 5.754830837249756, |
|
"learning_rate": 9.407374730718e-05, |
|
"loss": 1.2865, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.1396215315226983, |
|
"grad_norm": 6.5165534019470215, |
|
"learning_rate": 9.3841633924798e-05, |
|
"loss": 1.0498, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.1398176291793313, |
|
"grad_norm": 4.821907043457031, |
|
"learning_rate": 9.360955384319195e-05, |
|
"loss": 1.1823, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.1400137268359643, |
|
"grad_norm": 7.226667881011963, |
|
"learning_rate": 9.337750831731266e-05, |
|
"loss": 1.0176, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.1402098244925973, |
|
"grad_norm": 5.560497760772705, |
|
"learning_rate": 9.314549860192417e-05, |
|
"loss": 1.1645, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.1404059221492303, |
|
"grad_norm": 4.834329128265381, |
|
"learning_rate": 9.291352595159682e-05, |
|
"loss": 1.682, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.14060201980586332, |
|
"grad_norm": 6.791970729827881, |
|
"learning_rate": 9.268159162070058e-05, |
|
"loss": 1.5408, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.14079811746249632, |
|
"grad_norm": 5.209107875823975, |
|
"learning_rate": 9.244969686339813e-05, |
|
"loss": 0.6371, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.14099421511912932, |
|
"grad_norm": 7.674073696136475, |
|
"learning_rate": 9.221784293363824e-05, |
|
"loss": 1.9635, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.14119031277576233, |
|
"grad_norm": 13.054336547851562, |
|
"learning_rate": 9.198603108514888e-05, |
|
"loss": 1.2977, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.14138641043239533, |
|
"grad_norm": 7.0277299880981445, |
|
"learning_rate": 9.175426257143045e-05, |
|
"loss": 1.3063, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.14158250808902834, |
|
"grad_norm": 10.085116386413574, |
|
"learning_rate": 9.152253864574898e-05, |
|
"loss": 1.5895, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.14177860574566134, |
|
"grad_norm": 11.588119506835938, |
|
"learning_rate": 9.129086056112955e-05, |
|
"loss": 1.6528, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.14197470340229434, |
|
"grad_norm": 6.269704818725586, |
|
"learning_rate": 9.10592295703492e-05, |
|
"loss": 1.1952, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.14217080105892735, |
|
"grad_norm": 5.680713176727295, |
|
"learning_rate": 9.082764692593039e-05, |
|
"loss": 2.322, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.14236689871556035, |
|
"grad_norm": 6.14839506149292, |
|
"learning_rate": 9.059611388013408e-05, |
|
"loss": 2.6696, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.14256299637219336, |
|
"grad_norm": 13.12667179107666, |
|
"learning_rate": 9.03646316849532e-05, |
|
"loss": 2.0335, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.14275909402882636, |
|
"grad_norm": 7.358575820922852, |
|
"learning_rate": 9.01332015921055e-05, |
|
"loss": 0.8538, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.14295519168545937, |
|
"grad_norm": 11.932861328125, |
|
"learning_rate": 8.990182485302713e-05, |
|
"loss": 1.4472, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.14315128934209237, |
|
"grad_norm": 8.689105987548828, |
|
"learning_rate": 8.967050271886564e-05, |
|
"loss": 1.8314, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.14334738699872537, |
|
"grad_norm": 5.939968585968018, |
|
"learning_rate": 8.943923644047342e-05, |
|
"loss": 0.8076, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.14354348465535838, |
|
"grad_norm": 8.181587219238281, |
|
"learning_rate": 8.920802726840076e-05, |
|
"loss": 1.4664, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.14373958231199138, |
|
"grad_norm": 7.449463844299316, |
|
"learning_rate": 8.897687645288914e-05, |
|
"loss": 1.0997, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.14393567996862439, |
|
"grad_norm": 5.9800848960876465, |
|
"learning_rate": 8.874578524386447e-05, |
|
"loss": 0.9288, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.1441317776252574, |
|
"grad_norm": 8.202332496643066, |
|
"learning_rate": 8.851475489093045e-05, |
|
"loss": 1.5658, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.1443278752818904, |
|
"grad_norm": 4.058600425720215, |
|
"learning_rate": 8.828378664336158e-05, |
|
"loss": 1.1746, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.1445239729385234, |
|
"grad_norm": 5.467014789581299, |
|
"learning_rate": 8.805288175009659e-05, |
|
"loss": 0.9683, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.14472007059515637, |
|
"grad_norm": 8.572188377380371, |
|
"learning_rate": 8.782204145973162e-05, |
|
"loss": 1.3502, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.14491616825178938, |
|
"grad_norm": 6.00791072845459, |
|
"learning_rate": 8.759126702051348e-05, |
|
"loss": 1.5011, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.14511226590842238, |
|
"grad_norm": 7.9531989097595215, |
|
"learning_rate": 8.736055968033289e-05, |
|
"loss": 2.3835, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.1453083635650554, |
|
"grad_norm": 7.390120029449463, |
|
"learning_rate": 8.712992068671774e-05, |
|
"loss": 1.1469, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.1455044612216884, |
|
"grad_norm": 6.503965377807617, |
|
"learning_rate": 8.689935128682635e-05, |
|
"loss": 1.9587, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.1457005588783214, |
|
"grad_norm": 7.921380996704102, |
|
"learning_rate": 8.666885272744073e-05, |
|
"loss": 1.9997, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.1458966565349544, |
|
"grad_norm": 7.979074001312256, |
|
"learning_rate": 8.643842625495978e-05, |
|
"loss": 1.4087, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.1460927541915874, |
|
"grad_norm": 5.653082847595215, |
|
"learning_rate": 8.620807311539259e-05, |
|
"loss": 1.3974, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.1462888518482204, |
|
"grad_norm": 7.764763355255127, |
|
"learning_rate": 8.597779455435183e-05, |
|
"loss": 1.4269, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.1464849495048534, |
|
"grad_norm": 11.947178840637207, |
|
"learning_rate": 8.574759181704676e-05, |
|
"loss": 2.4265, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.14668104716148642, |
|
"grad_norm": 9.1488037109375, |
|
"learning_rate": 8.551746614827669e-05, |
|
"loss": 2.2206, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.14687714481811942, |
|
"grad_norm": 5.008571147918701, |
|
"learning_rate": 8.528741879242415e-05, |
|
"loss": 2.1277, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.14707324247475242, |
|
"grad_norm": 4.040747165679932, |
|
"learning_rate": 8.505745099344827e-05, |
|
"loss": 0.7625, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.14726934013138543, |
|
"grad_norm": 8.826830863952637, |
|
"learning_rate": 8.482756399487793e-05, |
|
"loss": 1.1674, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.14746543778801843, |
|
"grad_norm": 4.931051731109619, |
|
"learning_rate": 8.45977590398051e-05, |
|
"loss": 1.1831, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.14766153544465144, |
|
"grad_norm": 5.232216835021973, |
|
"learning_rate": 8.436803737087806e-05, |
|
"loss": 1.0998, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.14785763310128444, |
|
"grad_norm": 9.30781364440918, |
|
"learning_rate": 8.413840023029488e-05, |
|
"loss": 2.8714, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.14805373075791745, |
|
"grad_norm": 6.524617671966553, |
|
"learning_rate": 8.390884885979639e-05, |
|
"loss": 1.3297, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.14824982841455045, |
|
"grad_norm": 6.3461432456970215, |
|
"learning_rate": 8.367938450065967e-05, |
|
"loss": 1.6513, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.14844592607118345, |
|
"grad_norm": 9.379528999328613, |
|
"learning_rate": 8.345000839369132e-05, |
|
"loss": 1.3968, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.14864202372781646, |
|
"grad_norm": 9.503636360168457, |
|
"learning_rate": 8.322072177922073e-05, |
|
"loss": 1.0089, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.14883812138444946, |
|
"grad_norm": 4.907662868499756, |
|
"learning_rate": 8.299152589709336e-05, |
|
"loss": 2.4323, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.14903421904108247, |
|
"grad_norm": 7.7067084312438965, |
|
"learning_rate": 8.2762421986664e-05, |
|
"loss": 1.4224, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.14923031669771547, |
|
"grad_norm": 5.226773738861084, |
|
"learning_rate": 8.253341128679018e-05, |
|
"loss": 2.0016, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.14942641435434847, |
|
"grad_norm": 8.012574195861816, |
|
"learning_rate": 8.230449503582538e-05, |
|
"loss": 1.8039, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.14962251201098148, |
|
"grad_norm": 6.705967426300049, |
|
"learning_rate": 8.207567447161231e-05, |
|
"loss": 2.5853, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.14981860966761448, |
|
"grad_norm": 9.187888145446777, |
|
"learning_rate": 8.184695083147627e-05, |
|
"loss": 1.0529, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.1500147073242475, |
|
"grad_norm": 4.542299270629883, |
|
"learning_rate": 8.16183253522186e-05, |
|
"loss": 1.2803, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.1502108049808805, |
|
"grad_norm": 5.094648361206055, |
|
"learning_rate": 8.138979927010964e-05, |
|
"loss": 0.9061, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.1504069026375135, |
|
"grad_norm": 13.718326568603516, |
|
"learning_rate": 8.116137382088233e-05, |
|
"loss": 2.0593, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.15060300029414647, |
|
"grad_norm": 6.143137454986572, |
|
"learning_rate": 8.093305023972546e-05, |
|
"loss": 0.8494, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.15079909795077948, |
|
"grad_norm": 6.3516154289245605, |
|
"learning_rate": 8.070482976127703e-05, |
|
"loss": 2.214, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.15099519560741248, |
|
"grad_norm": 9.909046173095703, |
|
"learning_rate": 8.047671361961741e-05, |
|
"loss": 0.928, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.15119129326404548, |
|
"grad_norm": 6.7013373374938965, |
|
"learning_rate": 8.024870304826286e-05, |
|
"loss": 1.2937, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.1513873909206785, |
|
"grad_norm": 7.552654266357422, |
|
"learning_rate": 8.002079928015867e-05, |
|
"loss": 2.3988, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.1515834885773115, |
|
"grad_norm": 4.806221961975098, |
|
"learning_rate": 7.97930035476728e-05, |
|
"loss": 1.9656, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.1517795862339445, |
|
"grad_norm": 13.320466041564941, |
|
"learning_rate": 7.956531708258887e-05, |
|
"loss": 2.1333, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.1519756838905775, |
|
"grad_norm": 7.802790641784668, |
|
"learning_rate": 7.933774111609964e-05, |
|
"loss": 0.766, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.1521717815472105, |
|
"grad_norm": 9.146486282348633, |
|
"learning_rate": 7.911027687880037e-05, |
|
"loss": 1.0215, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.1523678792038435, |
|
"grad_norm": 5.029911518096924, |
|
"learning_rate": 7.888292560068226e-05, |
|
"loss": 0.6491, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.1525639768604765, |
|
"grad_norm": 16.61541175842285, |
|
"learning_rate": 7.865568851112559e-05, |
|
"loss": 2.3993, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.15276007451710952, |
|
"grad_norm": 8.573732376098633, |
|
"learning_rate": 7.842856683889321e-05, |
|
"loss": 1.5773, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.15295617217374252, |
|
"grad_norm": 7.390541076660156, |
|
"learning_rate": 7.820156181212379e-05, |
|
"loss": 0.6461, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.15315226983037553, |
|
"grad_norm": 4.455470085144043, |
|
"learning_rate": 7.79746746583254e-05, |
|
"loss": 1.9557, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.15334836748700853, |
|
"grad_norm": 9.000947952270508, |
|
"learning_rate": 7.774790660436858e-05, |
|
"loss": 2.5368, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.15354446514364153, |
|
"grad_norm": 7.608001232147217, |
|
"learning_rate": 7.752125887647992e-05, |
|
"loss": 1.607, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.15374056280027454, |
|
"grad_norm": 4.594658851623535, |
|
"learning_rate": 7.729473270023537e-05, |
|
"loss": 0.9085, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.15393666045690754, |
|
"grad_norm": 4.750558853149414, |
|
"learning_rate": 7.706832930055357e-05, |
|
"loss": 1.8871, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.15413275811354055, |
|
"grad_norm": 6.5569987297058105, |
|
"learning_rate": 7.684204990168925e-05, |
|
"loss": 1.4386, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.15432885577017355, |
|
"grad_norm": 4.655035495758057, |
|
"learning_rate": 7.66158957272266e-05, |
|
"loss": 1.9621, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.15452495342680655, |
|
"grad_norm": 9.556004524230957, |
|
"learning_rate": 7.638986800007277e-05, |
|
"loss": 0.9825, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.15472105108343956, |
|
"grad_norm": 4.108066082000732, |
|
"learning_rate": 7.616396794245107e-05, |
|
"loss": 2.0709, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.15491714874007256, |
|
"grad_norm": 7.94503116607666, |
|
"learning_rate": 7.593819677589445e-05, |
|
"loss": 1.1699, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.15511324639670557, |
|
"grad_norm": 6.220666408538818, |
|
"learning_rate": 7.571255572123889e-05, |
|
"loss": 1.1849, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.15530934405333857, |
|
"grad_norm": 6.629292964935303, |
|
"learning_rate": 7.54870459986169e-05, |
|
"loss": 1.3573, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.15550544170997158, |
|
"grad_norm": 4.624551296234131, |
|
"learning_rate": 7.52616688274507e-05, |
|
"loss": 1.1043, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.15570153936660458, |
|
"grad_norm": 8.356186866760254, |
|
"learning_rate": 7.503642542644581e-05, |
|
"loss": 1.3448, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.15589763702323758, |
|
"grad_norm": 5.954531192779541, |
|
"learning_rate": 7.481131701358434e-05, |
|
"loss": 1.2289, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.1560937346798706, |
|
"grad_norm": 8.886476516723633, |
|
"learning_rate": 7.458634480611864e-05, |
|
"loss": 2.0398, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.1562898323365036, |
|
"grad_norm": 5.7918524742126465, |
|
"learning_rate": 7.436151002056433e-05, |
|
"loss": 1.4364, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.15648592999313657, |
|
"grad_norm": 9.434075355529785, |
|
"learning_rate": 7.413681387269406e-05, |
|
"loss": 1.8601, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.15668202764976957, |
|
"grad_norm": 5.067231178283691, |
|
"learning_rate": 7.391225757753068e-05, |
|
"loss": 1.414, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.15687812530640258, |
|
"grad_norm": 6.89833927154541, |
|
"learning_rate": 7.368784234934104e-05, |
|
"loss": 1.6815, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.15707422296303558, |
|
"grad_norm": 15.612439155578613, |
|
"learning_rate": 7.346356940162895e-05, |
|
"loss": 3.0339, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.15727032061966859, |
|
"grad_norm": 5.933378219604492, |
|
"learning_rate": 7.323943994712895e-05, |
|
"loss": 1.6531, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.1574664182763016, |
|
"grad_norm": 4.7962727546691895, |
|
"learning_rate": 7.301545519779964e-05, |
|
"loss": 1.8519, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.1576625159329346, |
|
"grad_norm": 7.490151882171631, |
|
"learning_rate": 7.279161636481715e-05, |
|
"loss": 1.429, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.1578586135895676, |
|
"grad_norm": 10.799139976501465, |
|
"learning_rate": 7.256792465856858e-05, |
|
"loss": 1.391, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.1580547112462006, |
|
"grad_norm": 4.731945514678955, |
|
"learning_rate": 7.23443812886454e-05, |
|
"loss": 1.3097, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.1582508089028336, |
|
"grad_norm": 6.669147491455078, |
|
"learning_rate": 7.21209874638371e-05, |
|
"loss": 2.5443, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.1584469065594666, |
|
"grad_norm": 7.046877861022949, |
|
"learning_rate": 7.189774439212442e-05, |
|
"loss": 1.5809, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.15864300421609961, |
|
"grad_norm": 5.848972320556641, |
|
"learning_rate": 7.167465328067289e-05, |
|
"loss": 1.6708, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.15883910187273262, |
|
"grad_norm": 6.136227607727051, |
|
"learning_rate": 7.145171533582636e-05, |
|
"loss": 1.2122, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.15903519952936562, |
|
"grad_norm": 4.7808709144592285, |
|
"learning_rate": 7.122893176310052e-05, |
|
"loss": 1.8965, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.15923129718599863, |
|
"grad_norm": 4.156777381896973, |
|
"learning_rate": 7.100630376717621e-05, |
|
"loss": 1.2742, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.15942739484263163, |
|
"grad_norm": 5.69835090637207, |
|
"learning_rate": 7.078383255189304e-05, |
|
"loss": 0.7664, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.15962349249926464, |
|
"grad_norm": 9.707277297973633, |
|
"learning_rate": 7.056151932024282e-05, |
|
"loss": 1.3492, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.15981959015589764, |
|
"grad_norm": 10.071001052856445, |
|
"learning_rate": 7.033936527436318e-05, |
|
"loss": 1.2432, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.16001568781253064, |
|
"grad_norm": 6.86775541305542, |
|
"learning_rate": 7.011737161553081e-05, |
|
"loss": 1.7583, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.16021178546916365, |
|
"grad_norm": 5.526086330413818, |
|
"learning_rate": 6.989553954415524e-05, |
|
"loss": 1.5891, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.16040788312579665, |
|
"grad_norm": 26.293426513671875, |
|
"learning_rate": 6.967387025977215e-05, |
|
"loss": 1.7835, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.16060398078242966, |
|
"grad_norm": 5.713183879852295, |
|
"learning_rate": 6.945236496103707e-05, |
|
"loss": 1.7831, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.16080007843906266, |
|
"grad_norm": 7.669525146484375, |
|
"learning_rate": 6.92310248457187e-05, |
|
"loss": 1.8961, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.16099617609569566, |
|
"grad_norm": 7.351653099060059, |
|
"learning_rate": 6.900985111069256e-05, |
|
"loss": 1.8533, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.16119227375232867, |
|
"grad_norm": 6.009294509887695, |
|
"learning_rate": 6.878884495193448e-05, |
|
"loss": 1.7683, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.16138837140896167, |
|
"grad_norm": 5.90309476852417, |
|
"learning_rate": 6.856800756451414e-05, |
|
"loss": 1.7174, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.16158446906559468, |
|
"grad_norm": 6.903116703033447, |
|
"learning_rate": 6.83473401425886e-05, |
|
"loss": 1.6063, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.16178056672222768, |
|
"grad_norm": 5.747986793518066, |
|
"learning_rate": 6.812684387939585e-05, |
|
"loss": 1.7352, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.16197666437886069, |
|
"grad_norm": 4.83687686920166, |
|
"learning_rate": 6.79065199672484e-05, |
|
"loss": 1.0662, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.1621727620354937, |
|
"grad_norm": 4.761837482452393, |
|
"learning_rate": 6.768636959752669e-05, |
|
"loss": 1.4324, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.16236885969212667, |
|
"grad_norm": 4.991310119628906, |
|
"learning_rate": 6.746639396067283e-05, |
|
"loss": 0.6861, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.16256495734875967, |
|
"grad_norm": 9.824933052062988, |
|
"learning_rate": 6.724659424618401e-05, |
|
"loss": 1.2527, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.16276105500539267, |
|
"grad_norm": 9.043631553649902, |
|
"learning_rate": 6.702697164260626e-05, |
|
"loss": 1.2587, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.16295715266202568, |
|
"grad_norm": 6.001374244689941, |
|
"learning_rate": 6.680752733752777e-05, |
|
"loss": 1.0727, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.16315325031865868, |
|
"grad_norm": 7.015263557434082, |
|
"learning_rate": 6.658826251757264e-05, |
|
"loss": 0.963, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.1633493479752917, |
|
"grad_norm": 16.830978393554688, |
|
"learning_rate": 6.63691783683944e-05, |
|
"loss": 1.3392, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.1635454456319247, |
|
"grad_norm": 12.247193336486816, |
|
"learning_rate": 6.61502760746697e-05, |
|
"loss": 2.2754, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.1637415432885577, |
|
"grad_norm": 3.836216449737549, |
|
"learning_rate": 6.593155682009176e-05, |
|
"loss": 2.4028, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.1639376409451907, |
|
"grad_norm": 5.886529445648193, |
|
"learning_rate": 6.571302178736404e-05, |
|
"loss": 1.0402, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.1641337386018237, |
|
"grad_norm": 5.377973556518555, |
|
"learning_rate": 6.549467215819378e-05, |
|
"loss": 1.4784, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.1643298362584567, |
|
"grad_norm": 4.867081165313721, |
|
"learning_rate": 6.527650911328585e-05, |
|
"loss": 1.9024, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.1645259339150897, |
|
"grad_norm": 8.56846809387207, |
|
"learning_rate": 6.505853383233596e-05, |
|
"loss": 1.2514, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.16472203157172272, |
|
"grad_norm": 14.424854278564453, |
|
"learning_rate": 6.484074749402467e-05, |
|
"loss": 1.2373, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.16491812922835572, |
|
"grad_norm": 5.4463067054748535, |
|
"learning_rate": 6.462315127601071e-05, |
|
"loss": 1.5188, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.16511422688498872, |
|
"grad_norm": 5.752686500549316, |
|
"learning_rate": 6.440574635492493e-05, |
|
"loss": 1.0012, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.16531032454162173, |
|
"grad_norm": 5.5835442543029785, |
|
"learning_rate": 6.418853390636364e-05, |
|
"loss": 1.8797, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.16550642219825473, |
|
"grad_norm": 8.692926406860352, |
|
"learning_rate": 6.39715151048823e-05, |
|
"loss": 1.2265, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.16570251985488774, |
|
"grad_norm": 5.524386882781982, |
|
"learning_rate": 6.375469112398946e-05, |
|
"loss": 0.7208, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.16589861751152074, |
|
"grad_norm": 5.7656145095825195, |
|
"learning_rate": 6.353806313614001e-05, |
|
"loss": 1.2716, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.16609471516815374, |
|
"grad_norm": 12.103293418884277, |
|
"learning_rate": 6.332163231272911e-05, |
|
"loss": 3.2642, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.16629081282478675, |
|
"grad_norm": 22.349411010742188, |
|
"learning_rate": 6.310539982408568e-05, |
|
"loss": 2.3537, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.16648691048141975, |
|
"grad_norm": 7.782928466796875, |
|
"learning_rate": 6.288936683946633e-05, |
|
"loss": 1.1905, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.16668300813805276, |
|
"grad_norm": 6.026412487030029, |
|
"learning_rate": 6.267353452704876e-05, |
|
"loss": 1.1583, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.16687910579468576, |
|
"grad_norm": 10.18653392791748, |
|
"learning_rate": 6.245790405392553e-05, |
|
"loss": 2.5888, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.16707520345131877, |
|
"grad_norm": 6.149682998657227, |
|
"learning_rate": 6.224247658609781e-05, |
|
"loss": 1.0514, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.16727130110795177, |
|
"grad_norm": 5.528243541717529, |
|
"learning_rate": 6.202725328846909e-05, |
|
"loss": 0.8576, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.16746739876458477, |
|
"grad_norm": 7.682480812072754, |
|
"learning_rate": 6.181223532483875e-05, |
|
"loss": 1.5959, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.16766349642121778, |
|
"grad_norm": 4.653623580932617, |
|
"learning_rate": 6.159742385789588e-05, |
|
"loss": 0.7513, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.16785959407785078, |
|
"grad_norm": 18.469745635986328, |
|
"learning_rate": 6.138282004921295e-05, |
|
"loss": 3.1673, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.1680556917344838, |
|
"grad_norm": 4.403334140777588, |
|
"learning_rate": 6.116842505923955e-05, |
|
"loss": 0.9741, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.16825178939111676, |
|
"grad_norm": 5.073941230773926, |
|
"learning_rate": 6.095424004729611e-05, |
|
"loss": 0.8638, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.16844788704774977, |
|
"grad_norm": 6.09031867980957, |
|
"learning_rate": 6.0740266171567606e-05, |
|
"loss": 0.7445, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.16864398470438277, |
|
"grad_norm": 5.235655784606934, |
|
"learning_rate": 6.0526504589097254e-05, |
|
"loss": 1.79, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.16884008236101578, |
|
"grad_norm": 5.974957466125488, |
|
"learning_rate": 6.031295645578049e-05, |
|
"loss": 2.638, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.16903618001764878, |
|
"grad_norm": 4.034956932067871, |
|
"learning_rate": 6.009962292635838e-05, |
|
"loss": 1.2214, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.16923227767428178, |
|
"grad_norm": 5.100616931915283, |
|
"learning_rate": 5.988650515441156e-05, |
|
"loss": 1.7694, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.1694283753309148, |
|
"grad_norm": 6.878417491912842, |
|
"learning_rate": 5.967360429235407e-05, |
|
"loss": 1.2678, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.1696244729875478, |
|
"grad_norm": 6.435969352722168, |
|
"learning_rate": 5.946092149142692e-05, |
|
"loss": 1.449, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.1698205706441808, |
|
"grad_norm": 5.022701740264893, |
|
"learning_rate": 5.924845790169209e-05, |
|
"loss": 0.8444, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.1700166683008138, |
|
"grad_norm": 4.173498153686523, |
|
"learning_rate": 5.903621467202608e-05, |
|
"loss": 1.089, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.1702127659574468, |
|
"grad_norm": 6.396774768829346, |
|
"learning_rate": 5.88241929501139e-05, |
|
"loss": 1.1751, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.1704088636140798, |
|
"grad_norm": 4.628440856933594, |
|
"learning_rate": 5.861239388244273e-05, |
|
"loss": 1.4271, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.1706049612707128, |
|
"grad_norm": 13.596205711364746, |
|
"learning_rate": 5.8400818614295806e-05, |
|
"loss": 0.7325, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.17080105892734582, |
|
"grad_norm": 8.975894927978516, |
|
"learning_rate": 5.818946828974607e-05, |
|
"loss": 1.8689, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.17099715658397882, |
|
"grad_norm": 5.900057315826416, |
|
"learning_rate": 5.797834405165035e-05, |
|
"loss": 0.8738, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.17119325424061183, |
|
"grad_norm": 6.28643798828125, |
|
"learning_rate": 5.776744704164272e-05, |
|
"loss": 2.4116, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.17138935189724483, |
|
"grad_norm": 6.7851033210754395, |
|
"learning_rate": 5.7556778400128583e-05, |
|
"loss": 1.5964, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.17158544955387783, |
|
"grad_norm": 12.063843727111816, |
|
"learning_rate": 5.7346339266278505e-05, |
|
"loss": 1.9227, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.17178154721051084, |
|
"grad_norm": 6.9710469245910645, |
|
"learning_rate": 5.713613077802199e-05, |
|
"loss": 1.3143, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.17197764486714384, |
|
"grad_norm": 10.355106353759766, |
|
"learning_rate": 5.6926154072041295e-05, |
|
"loss": 2.005, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.17217374252377685, |
|
"grad_norm": 8.473671913146973, |
|
"learning_rate": 5.671641028376546e-05, |
|
"loss": 1.4054, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.17236984018040985, |
|
"grad_norm": 6.026031494140625, |
|
"learning_rate": 5.650690054736396e-05, |
|
"loss": 0.9981, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.17256593783704285, |
|
"grad_norm": 7.832981109619141, |
|
"learning_rate": 5.629762599574064e-05, |
|
"loss": 1.318, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.17276203549367586, |
|
"grad_norm": 8.796941757202148, |
|
"learning_rate": 5.6088587760527656e-05, |
|
"loss": 1.7696, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.17295813315030886, |
|
"grad_norm": 5.9997124671936035, |
|
"learning_rate": 5.587978697207927e-05, |
|
"loss": 1.5515, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.17315423080694187, |
|
"grad_norm": 10.373340606689453, |
|
"learning_rate": 5.5671224759465775e-05, |
|
"loss": 1.735, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.17335032846357487, |
|
"grad_norm": 6.619256973266602, |
|
"learning_rate": 5.546290225046744e-05, |
|
"loss": 1.3402, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.17354642612020788, |
|
"grad_norm": 7.469152927398682, |
|
"learning_rate": 5.5254820571568325e-05, |
|
"loss": 1.0792, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.17374252377684088, |
|
"grad_norm": 4.927844524383545, |
|
"learning_rate": 5.504698084795014e-05, |
|
"loss": 1.4485, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.17393862143347388, |
|
"grad_norm": 4.025864601135254, |
|
"learning_rate": 5.483938420348642e-05, |
|
"loss": 0.9515, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.17413471909010686, |
|
"grad_norm": 14.059304237365723, |
|
"learning_rate": 5.4632031760736146e-05, |
|
"loss": 1.9276, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.17433081674673986, |
|
"grad_norm": 6.336188793182373, |
|
"learning_rate": 5.4424924640937824e-05, |
|
"loss": 1.7752, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.17452691440337287, |
|
"grad_norm": 7.820261478424072, |
|
"learning_rate": 5.421806396400339e-05, |
|
"loss": 1.4182, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.17472301206000587, |
|
"grad_norm": 3.8585610389709473, |
|
"learning_rate": 5.4011450848512225e-05, |
|
"loss": 1.1074, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.17491910971663888, |
|
"grad_norm": 5.7243123054504395, |
|
"learning_rate": 5.3805086411704985e-05, |
|
"loss": 2.2836, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.17511520737327188, |
|
"grad_norm": 3.6346094608306885, |
|
"learning_rate": 5.3598971769477655e-05, |
|
"loss": 1.2185, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.17531130502990488, |
|
"grad_norm": 5.283199310302734, |
|
"learning_rate": 5.339310803637538e-05, |
|
"loss": 1.0693, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.1755074026865379, |
|
"grad_norm": 5.19635009765625, |
|
"learning_rate": 5.318749632558674e-05, |
|
"loss": 1.3408, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.1757035003431709, |
|
"grad_norm": 7.379054546356201, |
|
"learning_rate": 5.298213774893735e-05, |
|
"loss": 1.6441, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.1758995979998039, |
|
"grad_norm": 10.650435447692871, |
|
"learning_rate": 5.277703341688407e-05, |
|
"loss": 1.1186, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.1760956956564369, |
|
"grad_norm": 4.219560623168945, |
|
"learning_rate": 5.257218443850895e-05, |
|
"loss": 1.1465, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.1762917933130699, |
|
"grad_norm": 9.091025352478027, |
|
"learning_rate": 5.236759192151336e-05, |
|
"loss": 2.0643, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.1764878909697029, |
|
"grad_norm": 6.650880336761475, |
|
"learning_rate": 5.2163256972211714e-05, |
|
"loss": 1.6255, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.1766839886263359, |
|
"grad_norm": 5.008049011230469, |
|
"learning_rate": 5.195918069552572e-05, |
|
"loss": 1.0065, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.17688008628296892, |
|
"grad_norm": 6.407417297363281, |
|
"learning_rate": 5.175536419497831e-05, |
|
"loss": 0.7189, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.17707618393960192, |
|
"grad_norm": 4.843297004699707, |
|
"learning_rate": 5.155180857268779e-05, |
|
"loss": 1.5639, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.17727228159623493, |
|
"grad_norm": 3.7099854946136475, |
|
"learning_rate": 5.1348514929361714e-05, |
|
"loss": 1.0337, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.17746837925286793, |
|
"grad_norm": 7.940112590789795, |
|
"learning_rate": 5.114548436429098e-05, |
|
"loss": 1.9378, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.17766447690950093, |
|
"grad_norm": 6.462250232696533, |
|
"learning_rate": 5.0942717975344035e-05, |
|
"loss": 1.2462, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.17786057456613394, |
|
"grad_norm": 3.7596821784973145, |
|
"learning_rate": 5.074021685896072e-05, |
|
"loss": 0.5025, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.17805667222276694, |
|
"grad_norm": 5.532608509063721, |
|
"learning_rate": 5.0537982110146464e-05, |
|
"loss": 1.2263, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.17825276987939995, |
|
"grad_norm": 6.286501884460449, |
|
"learning_rate": 5.033601482246628e-05, |
|
"loss": 0.9899, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.17844886753603295, |
|
"grad_norm": 8.714296340942383, |
|
"learning_rate": 5.013431608803909e-05, |
|
"loss": 1.0626, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.17864496519266596, |
|
"grad_norm": 6.827104568481445, |
|
"learning_rate": 4.993288699753146e-05, |
|
"loss": 1.1741, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.17884106284929896, |
|
"grad_norm": 7.155247688293457, |
|
"learning_rate": 4.973172864015193e-05, |
|
"loss": 1.9644, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.17903716050593196, |
|
"grad_norm": 4.7466139793396, |
|
"learning_rate": 4.953084210364508e-05, |
|
"loss": 1.2649, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.17923325816256497, |
|
"grad_norm": 6.292973041534424, |
|
"learning_rate": 4.933022847428571e-05, |
|
"loss": 2.3925, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.17942935581919797, |
|
"grad_norm": 6.9081950187683105, |
|
"learning_rate": 4.912988883687283e-05, |
|
"loss": 2.7031, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.17962545347583098, |
|
"grad_norm": 10.206450462341309, |
|
"learning_rate": 4.892982427472387e-05, |
|
"loss": 1.2479, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.17982155113246398, |
|
"grad_norm": 9.753491401672363, |
|
"learning_rate": 4.873003586966881e-05, |
|
"loss": 1.1683, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.18001764878909696, |
|
"grad_norm": 5.422173023223877, |
|
"learning_rate": 4.853052470204438e-05, |
|
"loss": 1.0117, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.18021374644572996, |
|
"grad_norm": 4.505789756774902, |
|
"learning_rate": 4.8331291850688096e-05, |
|
"loss": 0.7031, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.18040984410236297, |
|
"grad_norm": 6.756499290466309, |
|
"learning_rate": 4.813233839293265e-05, |
|
"loss": 1.5295, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.18060594175899597, |
|
"grad_norm": 7.535793781280518, |
|
"learning_rate": 4.7933665404599804e-05, |
|
"loss": 1.2856, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.18080203941562897, |
|
"grad_norm": 8.04592227935791, |
|
"learning_rate": 4.773527395999474e-05, |
|
"loss": 0.9891, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.18099813707226198, |
|
"grad_norm": 5.000694274902344, |
|
"learning_rate": 4.753716513190024e-05, |
|
"loss": 1.3128, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.18119423472889498, |
|
"grad_norm": 8.065174102783203, |
|
"learning_rate": 4.7339339991570876e-05, |
|
"loss": 1.9393, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.18139033238552799, |
|
"grad_norm": 6.1632256507873535, |
|
"learning_rate": 4.714179960872712e-05, |
|
"loss": 1.7183, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.181586430042161, |
|
"grad_norm": 6.445769786834717, |
|
"learning_rate": 4.69445450515498e-05, |
|
"loss": 2.6642, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.181782527698794, |
|
"grad_norm": 8.51395320892334, |
|
"learning_rate": 4.674757738667405e-05, |
|
"loss": 1.2191, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.181978625355427, |
|
"grad_norm": 9.159540176391602, |
|
"learning_rate": 4.6550897679183625e-05, |
|
"loss": 2.1541, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.18217472301206, |
|
"grad_norm": 7.939080715179443, |
|
"learning_rate": 4.635450699260535e-05, |
|
"loss": 1.4875, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.182370820668693, |
|
"grad_norm": 11.180910110473633, |
|
"learning_rate": 4.615840638890305e-05, |
|
"loss": 1.9817, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.182566918325326, |
|
"grad_norm": 7.014312744140625, |
|
"learning_rate": 4.596259692847198e-05, |
|
"loss": 1.2057, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.18276301598195901, |
|
"grad_norm": 3.89388108253479, |
|
"learning_rate": 4.5767079670133064e-05, |
|
"loss": 1.2204, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.18295911363859202, |
|
"grad_norm": 7.551772594451904, |
|
"learning_rate": 4.557185567112725e-05, |
|
"loss": 1.271, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.18315521129522502, |
|
"grad_norm": 5.167237758636475, |
|
"learning_rate": 4.537692598710962e-05, |
|
"loss": 2.0885, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.18335130895185803, |
|
"grad_norm": 7.082968235015869, |
|
"learning_rate": 4.5182291672143796e-05, |
|
"loss": 2.0684, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.18354740660849103, |
|
"grad_norm": 5.6844801902771, |
|
"learning_rate": 4.498795377869622e-05, |
|
"loss": 1.2413, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.18374350426512404, |
|
"grad_norm": 8.226604461669922, |
|
"learning_rate": 4.479391335763054e-05, |
|
"loss": 1.7761, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.18393960192175704, |
|
"grad_norm": 6.878251075744629, |
|
"learning_rate": 4.4600171458201735e-05, |
|
"loss": 1.4162, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.18413569957839004, |
|
"grad_norm": 6.791055202484131, |
|
"learning_rate": 4.440672912805063e-05, |
|
"loss": 1.3623, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.18433179723502305, |
|
"grad_norm": 4.67439079284668, |
|
"learning_rate": 4.4213587413198085e-05, |
|
"loss": 0.9896, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.18452789489165605, |
|
"grad_norm": 6.860576629638672, |
|
"learning_rate": 4.402074735803955e-05, |
|
"loss": 1.7911, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.18472399254828906, |
|
"grad_norm": 5.80997896194458, |
|
"learning_rate": 4.382821000533913e-05, |
|
"loss": 2.8923, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.18492009020492206, |
|
"grad_norm": 5.034128189086914, |
|
"learning_rate": 4.3635976396224186e-05, |
|
"loss": 1.7938, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.18511618786155506, |
|
"grad_norm": 4.669545650482178, |
|
"learning_rate": 4.3444047570179525e-05, |
|
"loss": 0.8484, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.18531228551818807, |
|
"grad_norm": 6.147574424743652, |
|
"learning_rate": 4.3252424565042017e-05, |
|
"loss": 1.1467, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.18550838317482107, |
|
"grad_norm": 21.214567184448242, |
|
"learning_rate": 4.306110841699471e-05, |
|
"loss": 3.0427, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.18570448083145408, |
|
"grad_norm": 5.828713893890381, |
|
"learning_rate": 4.2870100160561344e-05, |
|
"loss": 0.9022, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.18590057848808705, |
|
"grad_norm": 8.350537300109863, |
|
"learning_rate": 4.267940082860088e-05, |
|
"loss": 1.5416, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.18609667614472006, |
|
"grad_norm": 3.421091318130493, |
|
"learning_rate": 4.2489011452301705e-05, |
|
"loss": 0.7294, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.18629277380135306, |
|
"grad_norm": 5.414788246154785, |
|
"learning_rate": 4.229893306117616e-05, |
|
"loss": 0.9339, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.18648887145798607, |
|
"grad_norm": 4.0585761070251465, |
|
"learning_rate": 4.210916668305496e-05, |
|
"loss": 0.9102, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.18668496911461907, |
|
"grad_norm": 13.428049087524414, |
|
"learning_rate": 4.1919713344081704e-05, |
|
"loss": 2.287, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.18688106677125207, |
|
"grad_norm": 7.367785453796387, |
|
"learning_rate": 4.17305740687072e-05, |
|
"loss": 1.5482, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.18707716442788508, |
|
"grad_norm": 4.9446635246276855, |
|
"learning_rate": 4.154174987968398e-05, |
|
"loss": 1.5513, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.18727326208451808, |
|
"grad_norm": 6.660252094268799, |
|
"learning_rate": 4.135324179806078e-05, |
|
"loss": 1.6624, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.1874693597411511, |
|
"grad_norm": 8.531050682067871, |
|
"learning_rate": 4.11650508431771e-05, |
|
"loss": 1.58, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.1876654573977841, |
|
"grad_norm": 5.770985126495361, |
|
"learning_rate": 4.09771780326575e-05, |
|
"loss": 1.2329, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.1878615550544171, |
|
"grad_norm": 6.347046375274658, |
|
"learning_rate": 4.0789624382406264e-05, |
|
"loss": 1.3816, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.1880576527110501, |
|
"grad_norm": 10.09997272491455, |
|
"learning_rate": 4.060239090660181e-05, |
|
"loss": 1.6532, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.1882537503676831, |
|
"grad_norm": 5.894500255584717, |
|
"learning_rate": 4.041547861769126e-05, |
|
"loss": 2.0952, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.1884498480243161, |
|
"grad_norm": 3.906419038772583, |
|
"learning_rate": 4.0228888526384936e-05, |
|
"loss": 1.5555, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.1886459456809491, |
|
"grad_norm": 6.6089630126953125, |
|
"learning_rate": 4.004262164165098e-05, |
|
"loss": 2.0901, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.18884204333758212, |
|
"grad_norm": 5.344242095947266, |
|
"learning_rate": 3.985667897070974e-05, |
|
"loss": 2.2121, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.18903814099421512, |
|
"grad_norm": 3.924384117126465, |
|
"learning_rate": 3.967106151902842e-05, |
|
"loss": 2.3173, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.18923423865084812, |
|
"grad_norm": 5.392980098724365, |
|
"learning_rate": 3.948577029031566e-05, |
|
"loss": 1.3514, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.18943033630748113, |
|
"grad_norm": 5.208516597747803, |
|
"learning_rate": 3.930080628651607e-05, |
|
"loss": 1.1333, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.18962643396411413, |
|
"grad_norm": 8.4103364944458, |
|
"learning_rate": 3.9116170507804784e-05, |
|
"loss": 3.0624, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.18982253162074714, |
|
"grad_norm": 4.458950996398926, |
|
"learning_rate": 3.89318639525822e-05, |
|
"loss": 1.974, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.19001862927738014, |
|
"grad_norm": 4.55155086517334, |
|
"learning_rate": 3.874788761746836e-05, |
|
"loss": 1.2587, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.19021472693401315, |
|
"grad_norm": 3.517223834991455, |
|
"learning_rate": 3.8564242497297686e-05, |
|
"loss": 0.9581, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.19041082459064615, |
|
"grad_norm": 6.368021011352539, |
|
"learning_rate": 3.8380929585113666e-05, |
|
"loss": 1.6365, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.19060692224727915, |
|
"grad_norm": 5.153208255767822, |
|
"learning_rate": 3.819794987216333e-05, |
|
"loss": 0.9107, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.19080301990391216, |
|
"grad_norm": 9.487665176391602, |
|
"learning_rate": 3.801530434789199e-05, |
|
"loss": 2.9295, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.19099911756054516, |
|
"grad_norm": 10.716341018676758, |
|
"learning_rate": 3.783299399993781e-05, |
|
"loss": 1.9429, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.19119521521717817, |
|
"grad_norm": 4.834690093994141, |
|
"learning_rate": 3.7651019814126654e-05, |
|
"loss": 1.7383, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.19139131287381117, |
|
"grad_norm": 4.203547954559326, |
|
"learning_rate": 3.74693827744665e-05, |
|
"loss": 0.7084, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.19158741053044417, |
|
"grad_norm": 8.38505744934082, |
|
"learning_rate": 3.7288083863142274e-05, |
|
"loss": 2.7231, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.19178350818707715, |
|
"grad_norm": 6.748301029205322, |
|
"learning_rate": 3.710712406051049e-05, |
|
"loss": 1.3249, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.19197960584371015, |
|
"grad_norm": 5.358412265777588, |
|
"learning_rate": 3.692650434509404e-05, |
|
"loss": 0.7995, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.19217570350034316, |
|
"grad_norm": 3.909745693206787, |
|
"learning_rate": 3.674622569357673e-05, |
|
"loss": 2.3175, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.19237180115697616, |
|
"grad_norm": 11.555536270141602, |
|
"learning_rate": 3.6566289080798153e-05, |
|
"loss": 1.7112, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.19256789881360917, |
|
"grad_norm": 10.718825340270996, |
|
"learning_rate": 3.638669547974831e-05, |
|
"loss": 1.1124, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.19276399647024217, |
|
"grad_norm": 4.483185291290283, |
|
"learning_rate": 3.6207445861562494e-05, |
|
"loss": 1.85, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.19296009412687518, |
|
"grad_norm": 4.881377696990967, |
|
"learning_rate": 3.602854119551585e-05, |
|
"loss": 0.8783, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.19315619178350818, |
|
"grad_norm": 5.172372817993164, |
|
"learning_rate": 3.584998244901826e-05, |
|
"loss": 1.1851, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.19335228944014118, |
|
"grad_norm": 8.540918350219727, |
|
"learning_rate": 3.567177058760905e-05, |
|
"loss": 0.8138, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.1935483870967742, |
|
"grad_norm": 10.905487060546875, |
|
"learning_rate": 3.549390657495188e-05, |
|
"loss": 2.8036, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.1937444847534072, |
|
"grad_norm": 6.448824405670166, |
|
"learning_rate": 3.531639137282938e-05, |
|
"loss": 2.4083, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.1939405824100402, |
|
"grad_norm": 4.409211158752441, |
|
"learning_rate": 3.5139225941138e-05, |
|
"loss": 1.2686, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.1941366800666732, |
|
"grad_norm": 4.925105094909668, |
|
"learning_rate": 3.496241123788294e-05, |
|
"loss": 3.3955, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.1943327777233062, |
|
"grad_norm": 6.541469097137451, |
|
"learning_rate": 3.478594821917279e-05, |
|
"loss": 1.4906, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.1945288753799392, |
|
"grad_norm": 7.749048233032227, |
|
"learning_rate": 3.460983783921443e-05, |
|
"loss": 1.7907, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.1947249730365722, |
|
"grad_norm": 4.833098888397217, |
|
"learning_rate": 3.44340810503079e-05, |
|
"loss": 1.2777, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.19492107069320522, |
|
"grad_norm": 5.138740062713623, |
|
"learning_rate": 3.425867880284129e-05, |
|
"loss": 1.7267, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.19511716834983822, |
|
"grad_norm": 8.098197937011719, |
|
"learning_rate": 3.4083632045285466e-05, |
|
"loss": 1.7961, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.19531326600647123, |
|
"grad_norm": 4.534963130950928, |
|
"learning_rate": 3.390894172418903e-05, |
|
"loss": 1.773, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.19550936366310423, |
|
"grad_norm": 9.311367988586426, |
|
"learning_rate": 3.373460878417315e-05, |
|
"loss": 1.2173, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.19570546131973723, |
|
"grad_norm": 4.8790388107299805, |
|
"learning_rate": 3.356063416792662e-05, |
|
"loss": 1.3003, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.19590155897637024, |
|
"grad_norm": 4.137528419494629, |
|
"learning_rate": 3.338701881620052e-05, |
|
"loss": 1.8794, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.19609765663300324, |
|
"grad_norm": 8.082904815673828, |
|
"learning_rate": 3.321376366780326e-05, |
|
"loss": 1.3442, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.19629375428963625, |
|
"grad_norm": 7.494518280029297, |
|
"learning_rate": 3.3040869659595485e-05, |
|
"loss": 2.3671, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.19648985194626925, |
|
"grad_norm": 4.482415676116943, |
|
"learning_rate": 3.286833772648504e-05, |
|
"loss": 0.7332, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.19668594960290225, |
|
"grad_norm": 6.464245796203613, |
|
"learning_rate": 3.269616880142182e-05, |
|
"loss": 1.6453, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.19688204725953526, |
|
"grad_norm": 6.342423915863037, |
|
"learning_rate": 3.252436381539291e-05, |
|
"loss": 1.5742, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.19707814491616826, |
|
"grad_norm": 2.4557156562805176, |
|
"learning_rate": 3.235292369741729e-05, |
|
"loss": 0.3941, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.19727424257280127, |
|
"grad_norm": 6.024993419647217, |
|
"learning_rate": 3.218184937454103e-05, |
|
"loss": 1.7341, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.19747034022943427, |
|
"grad_norm": 4.793758392333984, |
|
"learning_rate": 3.2011141771832184e-05, |
|
"loss": 0.8558, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.19766643788606725, |
|
"grad_norm": 8.792879104614258, |
|
"learning_rate": 3.184080181237575e-05, |
|
"loss": 0.6646, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.19786253554270025, |
|
"grad_norm": 8.334936141967773, |
|
"learning_rate": 3.167083041726886e-05, |
|
"loss": 1.1602, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.19805863319933326, |
|
"grad_norm": 5.780764579772949, |
|
"learning_rate": 3.1501228505615534e-05, |
|
"loss": 2.342, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.19825473085596626, |
|
"grad_norm": 9.708879470825195, |
|
"learning_rate": 3.1331996994521915e-05, |
|
"loss": 0.981, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.19845082851259926, |
|
"grad_norm": 5.935221195220947, |
|
"learning_rate": 3.1163136799091186e-05, |
|
"loss": 0.8119, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.19864692616923227, |
|
"grad_norm": 9.303512573242188, |
|
"learning_rate": 3.0994648832418784e-05, |
|
"loss": 1.5852, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.19884302382586527, |
|
"grad_norm": 7.7904052734375, |
|
"learning_rate": 3.082653400558724e-05, |
|
"loss": 1.4118, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.19903912148249828, |
|
"grad_norm": 4.866517066955566, |
|
"learning_rate": 3.065879322766142e-05, |
|
"loss": 0.7066, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.19923521913913128, |
|
"grad_norm": 4.0074896812438965, |
|
"learning_rate": 3.0491427405683514e-05, |
|
"loss": 1.0785, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.19943131679576429, |
|
"grad_norm": 5.893292427062988, |
|
"learning_rate": 3.0324437444668274e-05, |
|
"loss": 2.6953, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.1996274144523973, |
|
"grad_norm": 10.129006385803223, |
|
"learning_rate": 3.0157824247597922e-05, |
|
"loss": 1.4669, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.1998235121090303, |
|
"grad_norm": 6.336552143096924, |
|
"learning_rate": 2.9991588715417383e-05, |
|
"loss": 1.9093, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.2000196097656633, |
|
"grad_norm": 7.2662482261657715, |
|
"learning_rate": 2.9825731747029394e-05, |
|
"loss": 1.3408, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.2002157074222963, |
|
"grad_norm": 4.900631427764893, |
|
"learning_rate": 2.966025423928972e-05, |
|
"loss": 1.2219, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.2004118050789293, |
|
"grad_norm": 10.661945343017578, |
|
"learning_rate": 2.9495157087002123e-05, |
|
"loss": 2.2735, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.2006079027355623, |
|
"grad_norm": 6.423792362213135, |
|
"learning_rate": 2.9330441182913694e-05, |
|
"loss": 1.362, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.2006079027355623, |
|
"eval_loss": 0.3885071277618408, |
|
"eval_runtime": 77.8128, |
|
"eval_samples_per_second": 27.605, |
|
"eval_steps_per_second": 13.802, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.20080400039219531, |
|
"grad_norm": 7.308141231536865, |
|
"learning_rate": 2.9166107417709888e-05, |
|
"loss": 1.0916, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.20100009804882832, |
|
"grad_norm": 3.7046291828155518, |
|
"learning_rate": 2.9002156680009906e-05, |
|
"loss": 1.7084, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.20119619570546132, |
|
"grad_norm": 3.8992371559143066, |
|
"learning_rate": 2.883858985636165e-05, |
|
"loss": 1.0185, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.20139229336209433, |
|
"grad_norm": 4.895712852478027, |
|
"learning_rate": 2.867540783123711e-05, |
|
"loss": 1.1252, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.20158839101872733, |
|
"grad_norm": 10.118478775024414, |
|
"learning_rate": 2.851261148702744e-05, |
|
"loss": 1.8931, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.20178448867536034, |
|
"grad_norm": 9.701197624206543, |
|
"learning_rate": 2.835020170403837e-05, |
|
"loss": 2.1721, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.20198058633199334, |
|
"grad_norm": 5.3619184494018555, |
|
"learning_rate": 2.8188179360485267e-05, |
|
"loss": 1.3209, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.20217668398862634, |
|
"grad_norm": 7.866724967956543, |
|
"learning_rate": 2.8026545332488418e-05, |
|
"loss": 1.4948, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.20237278164525935, |
|
"grad_norm": 5.446761608123779, |
|
"learning_rate": 2.7865300494068435e-05, |
|
"loss": 1.041, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.20256887930189235, |
|
"grad_norm": 8.317418098449707, |
|
"learning_rate": 2.7704445717141368e-05, |
|
"loss": 1.0779, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.20276497695852536, |
|
"grad_norm": 4.434285640716553, |
|
"learning_rate": 2.7543981871514023e-05, |
|
"loss": 1.9139, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.20296107461515836, |
|
"grad_norm": 14.01183795928955, |
|
"learning_rate": 2.7383909824879294e-05, |
|
"loss": 2.4565, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.20315717227179136, |
|
"grad_norm": 4.523947715759277, |
|
"learning_rate": 2.7224230442811526e-05, |
|
"loss": 1.7748, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.20335326992842437, |
|
"grad_norm": 10.790266990661621, |
|
"learning_rate": 2.7064944588761688e-05, |
|
"loss": 1.9152, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.20354936758505734, |
|
"grad_norm": 7.028364181518555, |
|
"learning_rate": 2.6906053124052798e-05, |
|
"loss": 1.6433, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.20374546524169035, |
|
"grad_norm": 8.6144437789917, |
|
"learning_rate": 2.674755690787526e-05, |
|
"loss": 2.3653, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.20394156289832335, |
|
"grad_norm": 4.6696343421936035, |
|
"learning_rate": 2.6589456797282253e-05, |
|
"loss": 1.0972, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.20413766055495636, |
|
"grad_norm": 7.880300998687744, |
|
"learning_rate": 2.6431753647184988e-05, |
|
"loss": 1.47, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.20433375821158936, |
|
"grad_norm": 11.923666954040527, |
|
"learning_rate": 2.6274448310348178e-05, |
|
"loss": 2.1949, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.20452985586822237, |
|
"grad_norm": 11.636192321777344, |
|
"learning_rate": 2.6117541637385402e-05, |
|
"loss": 1.3088, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.20472595352485537, |
|
"grad_norm": 4.3036885261535645, |
|
"learning_rate": 2.5961034476754487e-05, |
|
"loss": 1.5687, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.20492205118148837, |
|
"grad_norm": 4.218216419219971, |
|
"learning_rate": 2.5804927674752922e-05, |
|
"loss": 0.672, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.20511814883812138, |
|
"grad_norm": 4.2901291847229, |
|
"learning_rate": 2.564922207551337e-05, |
|
"loss": 1.048, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.20531424649475438, |
|
"grad_norm": 6.806373119354248, |
|
"learning_rate": 2.549391852099896e-05, |
|
"loss": 2.2606, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.2055103441513874, |
|
"grad_norm": 5.39298152923584, |
|
"learning_rate": 2.5339017850998802e-05, |
|
"loss": 1.5175, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.2057064418080204, |
|
"grad_norm": 7.037325859069824, |
|
"learning_rate": 2.5184520903123476e-05, |
|
"loss": 1.3757, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.2059025394646534, |
|
"grad_norm": 6.3087239265441895, |
|
"learning_rate": 2.503042851280043e-05, |
|
"loss": 1.0206, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.2060986371212864, |
|
"grad_norm": 6.11033821105957, |
|
"learning_rate": 2.4876741513269597e-05, |
|
"loss": 1.99, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.2062947347779194, |
|
"grad_norm": 5.525476932525635, |
|
"learning_rate": 2.4723460735578697e-05, |
|
"loss": 0.7599, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.2064908324345524, |
|
"grad_norm": 6.608320236206055, |
|
"learning_rate": 2.4570587008578895e-05, |
|
"loss": 1.2481, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.2066869300911854, |
|
"grad_norm": 3.596764087677002, |
|
"learning_rate": 2.4418121158920227e-05, |
|
"loss": 0.713, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.20688302774781842, |
|
"grad_norm": 7.637975692749023, |
|
"learning_rate": 2.4266064011047264e-05, |
|
"loss": 1.3772, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.20707912540445142, |
|
"grad_norm": 4.366866111755371, |
|
"learning_rate": 2.411441638719445e-05, |
|
"loss": 0.5525, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.20727522306108442, |
|
"grad_norm": 5.603202819824219, |
|
"learning_rate": 2.396317910738184e-05, |
|
"loss": 0.9035, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.20747132071771743, |
|
"grad_norm": 7.776876449584961, |
|
"learning_rate": 2.3812352989410537e-05, |
|
"loss": 1.6217, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.20766741837435043, |
|
"grad_norm": 9.07315444946289, |
|
"learning_rate": 2.366193884885841e-05, |
|
"loss": 2.2412, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.20786351603098344, |
|
"grad_norm": 7.473457336425781, |
|
"learning_rate": 2.3511937499075508e-05, |
|
"loss": 1.6256, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.20805961368761644, |
|
"grad_norm": 10.729793548583984, |
|
"learning_rate": 2.3362349751179825e-05, |
|
"loss": 1.247, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.20825571134424944, |
|
"grad_norm": 6.086622714996338, |
|
"learning_rate": 2.3213176414052738e-05, |
|
"loss": 1.613, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.20845180900088245, |
|
"grad_norm": 6.6213836669921875, |
|
"learning_rate": 2.30644182943349e-05, |
|
"loss": 0.9007, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 0.20864790665751545, |
|
"grad_norm": 4.5187602043151855, |
|
"learning_rate": 2.2916076196421587e-05, |
|
"loss": 1.3616, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.20884400431414846, |
|
"grad_norm": 8.176258087158203, |
|
"learning_rate": 2.276815092245851e-05, |
|
"loss": 1.1822, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.20904010197078146, |
|
"grad_norm": 6.9451494216918945, |
|
"learning_rate": 2.2620643272337426e-05, |
|
"loss": 1.2958, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.20923619962741447, |
|
"grad_norm": 4.266102313995361, |
|
"learning_rate": 2.2473554043691913e-05, |
|
"loss": 1.5274, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 0.20943229728404747, |
|
"grad_norm": 7.066012382507324, |
|
"learning_rate": 2.2326884031892882e-05, |
|
"loss": 1.0549, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.20962839494068045, |
|
"grad_norm": 11.074556350708008, |
|
"learning_rate": 2.21806340300444e-05, |
|
"loss": 1.3012, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 0.20982449259731345, |
|
"grad_norm": 5.717066287994385, |
|
"learning_rate": 2.203480482897935e-05, |
|
"loss": 1.4355, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.21002059025394645, |
|
"grad_norm": 5.230037689208984, |
|
"learning_rate": 2.188939721725526e-05, |
|
"loss": 1.7493, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.21021668791057946, |
|
"grad_norm": 4.878988265991211, |
|
"learning_rate": 2.174441198114985e-05, |
|
"loss": 0.7765, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.21041278556721246, |
|
"grad_norm": 4.619425296783447, |
|
"learning_rate": 2.159984990465691e-05, |
|
"loss": 1.8523, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 0.21060888322384547, |
|
"grad_norm": 4.031160354614258, |
|
"learning_rate": 2.1455711769482113e-05, |
|
"loss": 0.9848, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.21080498088047847, |
|
"grad_norm": 7.096545219421387, |
|
"learning_rate": 2.131199835503861e-05, |
|
"loss": 1.4097, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.21100107853711147, |
|
"grad_norm": 5.833085060119629, |
|
"learning_rate": 2.1168710438442953e-05, |
|
"loss": 0.9551, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.21119717619374448, |
|
"grad_norm": 5.613104343414307, |
|
"learning_rate": 2.1025848794510815e-05, |
|
"loss": 1.9112, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.21139327385037748, |
|
"grad_norm": 7.113245010375977, |
|
"learning_rate": 2.0883414195752972e-05, |
|
"loss": 1.2791, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.2115893715070105, |
|
"grad_norm": 5.34241247177124, |
|
"learning_rate": 2.0741407412370838e-05, |
|
"loss": 1.4305, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 0.2117854691636435, |
|
"grad_norm": 4.97748327255249, |
|
"learning_rate": 2.0599829212252553e-05, |
|
"loss": 1.0597, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.2119815668202765, |
|
"grad_norm": 6.11815881729126, |
|
"learning_rate": 2.045868036096864e-05, |
|
"loss": 1.3793, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 0.2121776644769095, |
|
"grad_norm": 7.822902202606201, |
|
"learning_rate": 2.031796162176811e-05, |
|
"loss": 1.7602, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.2123737621335425, |
|
"grad_norm": 5.7074875831604, |
|
"learning_rate": 2.0177673755574044e-05, |
|
"loss": 1.9033, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.2125698597901755, |
|
"grad_norm": 5.141830921173096, |
|
"learning_rate": 2.0037817520979686e-05, |
|
"loss": 0.6569, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.2127659574468085, |
|
"grad_norm": 8.139151573181152, |
|
"learning_rate": 1.9898393674244243e-05, |
|
"loss": 1.6038, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.21296205510344152, |
|
"grad_norm": 6.489431858062744, |
|
"learning_rate": 1.975940296928882e-05, |
|
"loss": 1.7581, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.21315815276007452, |
|
"grad_norm": 9.372897148132324, |
|
"learning_rate": 1.9620846157692418e-05, |
|
"loss": 2.5077, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 0.21335425041670752, |
|
"grad_norm": 11.757204055786133, |
|
"learning_rate": 1.9482723988687734e-05, |
|
"loss": 2.0571, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.21355034807334053, |
|
"grad_norm": 4.995403289794922, |
|
"learning_rate": 1.934503720915719e-05, |
|
"loss": 1.3159, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.21374644572997353, |
|
"grad_norm": 4.217586994171143, |
|
"learning_rate": 1.9207786563628894e-05, |
|
"loss": 0.7797, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.21394254338660654, |
|
"grad_norm": 5.487307548522949, |
|
"learning_rate": 1.907097279427258e-05, |
|
"loss": 1.2424, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 0.21413864104323954, |
|
"grad_norm": 5.5197014808654785, |
|
"learning_rate": 1.8934596640895607e-05, |
|
"loss": 1.6582, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.21433473869987255, |
|
"grad_norm": 9.223129272460938, |
|
"learning_rate": 1.8798658840939055e-05, |
|
"loss": 1.5407, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 0.21453083635650555, |
|
"grad_norm": 6.948054790496826, |
|
"learning_rate": 1.8663160129473535e-05, |
|
"loss": 0.7758, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.21472693401313855, |
|
"grad_norm": 5.582278251647949, |
|
"learning_rate": 1.8528101239195395e-05, |
|
"loss": 2.0049, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.21492303166977156, |
|
"grad_norm": 4.116588115692139, |
|
"learning_rate": 1.8393482900422644e-05, |
|
"loss": 1.8004, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.21511912932640456, |
|
"grad_norm": 6.368351936340332, |
|
"learning_rate": 1.8259305841091123e-05, |
|
"loss": 1.4293, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 0.21531522698303757, |
|
"grad_norm": 5.690855503082275, |
|
"learning_rate": 1.8125570786750455e-05, |
|
"loss": 1.1567, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.21551132463967054, |
|
"grad_norm": 7.080834865570068, |
|
"learning_rate": 1.7992278460560152e-05, |
|
"loss": 1.1252, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 0.21570742229630355, |
|
"grad_norm": 3.924699544906616, |
|
"learning_rate": 1.785942958328569e-05, |
|
"loss": 1.9084, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.21590351995293655, |
|
"grad_norm": 4.245781421661377, |
|
"learning_rate": 1.772702487329474e-05, |
|
"loss": 1.2304, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.21609961760956956, |
|
"grad_norm": 11.68691349029541, |
|
"learning_rate": 1.7595065046553085e-05, |
|
"loss": 1.0978, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.21629571526620256, |
|
"grad_norm": 5.583014011383057, |
|
"learning_rate": 1.7463550816620876e-05, |
|
"loss": 2.6622, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 0.21649181292283556, |
|
"grad_norm": 6.454019546508789, |
|
"learning_rate": 1.7332482894648726e-05, |
|
"loss": 1.7374, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.21668791057946857, |
|
"grad_norm": 9.824748992919922, |
|
"learning_rate": 1.7201861989373926e-05, |
|
"loss": 2.4338, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.21688400823610157, |
|
"grad_norm": 4.780369758605957, |
|
"learning_rate": 1.7071688807116525e-05, |
|
"loss": 0.9636, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.21708010589273458, |
|
"grad_norm": 6.861015796661377, |
|
"learning_rate": 1.694196405177556e-05, |
|
"loss": 1.1233, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.21727620354936758, |
|
"grad_norm": 4.883321762084961, |
|
"learning_rate": 1.681268842482523e-05, |
|
"loss": 0.8476, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.21747230120600058, |
|
"grad_norm": 4.856523513793945, |
|
"learning_rate": 1.6683862625311164e-05, |
|
"loss": 1.4524, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 0.2176683988626336, |
|
"grad_norm": 5.7625017166137695, |
|
"learning_rate": 1.6555487349846544e-05, |
|
"loss": 1.5877, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.2178644965192666, |
|
"grad_norm": 6.9636359214782715, |
|
"learning_rate": 1.6427563292608416e-05, |
|
"loss": 1.3804, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 0.2180605941758996, |
|
"grad_norm": 5.53889799118042, |
|
"learning_rate": 1.630009114533384e-05, |
|
"loss": 1.4286, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.2182566918325326, |
|
"grad_norm": 8.081591606140137, |
|
"learning_rate": 1.6173071597316335e-05, |
|
"loss": 2.5756, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 0.2184527894891656, |
|
"grad_norm": 9.321168899536133, |
|
"learning_rate": 1.604650533540194e-05, |
|
"loss": 2.107, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 0.2186488871457986, |
|
"grad_norm": 7.095883846282959, |
|
"learning_rate": 1.5920393043985593e-05, |
|
"loss": 0.9074, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.2188449848024316, |
|
"grad_norm": 10.197619438171387, |
|
"learning_rate": 1.579473540500751e-05, |
|
"loss": 1.7506, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.21904108245906462, |
|
"grad_norm": 8.685538291931152, |
|
"learning_rate": 1.5669533097949328e-05, |
|
"loss": 1.6941, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 0.21923718011569762, |
|
"grad_norm": 6.081611156463623, |
|
"learning_rate": 1.5544786799830568e-05, |
|
"loss": 2.3827, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.21943327777233063, |
|
"grad_norm": 4.3908162117004395, |
|
"learning_rate": 1.5420497185204873e-05, |
|
"loss": 1.1959, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 0.21962937542896363, |
|
"grad_norm": 13.473033905029297, |
|
"learning_rate": 1.5296664926156525e-05, |
|
"loss": 2.7042, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.21982547308559663, |
|
"grad_norm": 7.351769924163818, |
|
"learning_rate": 1.5173290692296582e-05, |
|
"loss": 2.4473, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 0.22002157074222964, |
|
"grad_norm": 4.283702373504639, |
|
"learning_rate": 1.5050375150759433e-05, |
|
"loss": 1.2878, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.22021766839886264, |
|
"grad_norm": 6.013007164001465, |
|
"learning_rate": 1.4927918966199094e-05, |
|
"loss": 1.7944, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 0.22041376605549565, |
|
"grad_norm": 6.42008638381958, |
|
"learning_rate": 1.4805922800785733e-05, |
|
"loss": 1.727, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.22060986371212865, |
|
"grad_norm": 8.836663246154785, |
|
"learning_rate": 1.4684387314201919e-05, |
|
"loss": 2.6804, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.22080596136876166, |
|
"grad_norm": 6.903241157531738, |
|
"learning_rate": 1.4563313163639192e-05, |
|
"loss": 1.1425, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 0.22100205902539466, |
|
"grad_norm": 25.018720626831055, |
|
"learning_rate": 1.4442701003794434e-05, |
|
"loss": 2.5687, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 0.22119815668202766, |
|
"grad_norm": 4.393866062164307, |
|
"learning_rate": 1.4322551486866364e-05, |
|
"loss": 1.2885, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.22139425433866064, |
|
"grad_norm": 10.157917976379395, |
|
"learning_rate": 1.4202865262552045e-05, |
|
"loss": 2.6514, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 0.22159035199529364, |
|
"grad_norm": 8.52134895324707, |
|
"learning_rate": 1.4083642978043277e-05, |
|
"loss": 1.5537, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.22178644965192665, |
|
"grad_norm": 7.985105991363525, |
|
"learning_rate": 1.3964885278023176e-05, |
|
"loss": 1.631, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 0.22198254730855965, |
|
"grad_norm": 6.039412021636963, |
|
"learning_rate": 1.3846592804662638e-05, |
|
"loss": 0.9408, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.22217864496519266, |
|
"grad_norm": 6.0496697425842285, |
|
"learning_rate": 1.3728766197616905e-05, |
|
"loss": 2.2771, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 0.22237474262182566, |
|
"grad_norm": 6.791964530944824, |
|
"learning_rate": 1.3611406094022095e-05, |
|
"loss": 1.5164, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.22257084027845866, |
|
"grad_norm": 5.74752950668335, |
|
"learning_rate": 1.349451312849177e-05, |
|
"loss": 1.7676, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.22276693793509167, |
|
"grad_norm": 5.087756633758545, |
|
"learning_rate": 1.337808793311346e-05, |
|
"loss": 0.8442, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.22296303559172467, |
|
"grad_norm": 6.2235426902771, |
|
"learning_rate": 1.3262131137445266e-05, |
|
"loss": 1.7659, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 0.22315913324835768, |
|
"grad_norm": 5.494975566864014, |
|
"learning_rate": 1.314664336851248e-05, |
|
"loss": 2.0008, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 0.22335523090499068, |
|
"grad_norm": 7.123453617095947, |
|
"learning_rate": 1.3031625250804191e-05, |
|
"loss": 1.3864, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 0.22355132856162369, |
|
"grad_norm": 4.093453407287598, |
|
"learning_rate": 1.2917077406269872e-05, |
|
"loss": 0.8456, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.2237474262182567, |
|
"grad_norm": 9.605305671691895, |
|
"learning_rate": 1.280300045431605e-05, |
|
"loss": 2.1936, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 0.2239435238748897, |
|
"grad_norm": 5.773606777191162, |
|
"learning_rate": 1.2689395011802896e-05, |
|
"loss": 1.3258, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 0.2241396215315227, |
|
"grad_norm": 5.294035911560059, |
|
"learning_rate": 1.2576261693041036e-05, |
|
"loss": 1.6611, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 0.2243357191881557, |
|
"grad_norm": 8.730361938476562, |
|
"learning_rate": 1.2463601109788058e-05, |
|
"loss": 2.3499, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.2245318168447887, |
|
"grad_norm": 7.3704304695129395, |
|
"learning_rate": 1.235141387124531e-05, |
|
"loss": 1.6554, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.2247279145014217, |
|
"grad_norm": 6.475876331329346, |
|
"learning_rate": 1.2239700584054514e-05, |
|
"loss": 1.5567, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 0.22492401215805471, |
|
"grad_norm": 5.318692207336426, |
|
"learning_rate": 1.2128461852294647e-05, |
|
"loss": 1.6012, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 0.22512010981468772, |
|
"grad_norm": 8.062721252441406, |
|
"learning_rate": 1.2017698277478506e-05, |
|
"loss": 1.0389, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.22531620747132072, |
|
"grad_norm": 8.7105131149292, |
|
"learning_rate": 1.1907410458549517e-05, |
|
"loss": 3.1607, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 0.22551230512795373, |
|
"grad_norm": 8.629892349243164, |
|
"learning_rate": 1.1797598991878512e-05, |
|
"loss": 1.8994, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.22570840278458673, |
|
"grad_norm": 8.824653625488281, |
|
"learning_rate": 1.1688264471260546e-05, |
|
"loss": 0.8091, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 0.22590450044121974, |
|
"grad_norm": 4.759090900421143, |
|
"learning_rate": 1.1579407487911564e-05, |
|
"loss": 1.3273, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.22610059809785274, |
|
"grad_norm": 4.78541374206543, |
|
"learning_rate": 1.1471028630465285e-05, |
|
"loss": 0.8232, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 0.22629669575448574, |
|
"grad_norm": 6.233590602874756, |
|
"learning_rate": 1.136312848497002e-05, |
|
"loss": 2.525, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 0.22649279341111875, |
|
"grad_norm": 7.085466384887695, |
|
"learning_rate": 1.1255707634885526e-05, |
|
"loss": 1.6632, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.22668889106775175, |
|
"grad_norm": 4.938376426696777, |
|
"learning_rate": 1.1148766661079768e-05, |
|
"loss": 0.6869, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.22688498872438476, |
|
"grad_norm": 9.032883644104004, |
|
"learning_rate": 1.1042306141825808e-05, |
|
"loss": 1.7692, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 0.22708108638101776, |
|
"grad_norm": 7.324281215667725, |
|
"learning_rate": 1.0936326652798779e-05, |
|
"loss": 1.5287, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 0.22727718403765074, |
|
"grad_norm": 9.270583152770996, |
|
"learning_rate": 1.083082876707262e-05, |
|
"loss": 2.5909, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 0.22747328169428374, |
|
"grad_norm": 8.368681907653809, |
|
"learning_rate": 1.0725813055117051e-05, |
|
"loss": 1.3548, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.22766937935091675, |
|
"grad_norm": 5.176713943481445, |
|
"learning_rate": 1.0621280084794483e-05, |
|
"loss": 0.6788, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.22786547700754975, |
|
"grad_norm": 16.383853912353516, |
|
"learning_rate": 1.0517230421357016e-05, |
|
"loss": 1.4931, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 0.22806157466418275, |
|
"grad_norm": 6.631777286529541, |
|
"learning_rate": 1.041366462744322e-05, |
|
"loss": 1.8152, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 0.22825767232081576, |
|
"grad_norm": 6.580327033996582, |
|
"learning_rate": 1.0310583263075236e-05, |
|
"loss": 1.2307, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.22845376997744876, |
|
"grad_norm": 3.8760287761688232, |
|
"learning_rate": 1.0207986885655662e-05, |
|
"loss": 1.398, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.22864986763408177, |
|
"grad_norm": 4.494516849517822, |
|
"learning_rate": 1.0105876049964658e-05, |
|
"loss": 1.9966, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 0.22884596529071477, |
|
"grad_norm": 4.6880879402160645, |
|
"learning_rate": 1.0004251308156776e-05, |
|
"loss": 1.7868, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 0.22904206294734777, |
|
"grad_norm": 4.97517204284668, |
|
"learning_rate": 9.903113209758096e-06, |
|
"loss": 0.9129, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.22923816060398078, |
|
"grad_norm": 6.1455183029174805, |
|
"learning_rate": 9.802462301663218e-06, |
|
"loss": 1.0401, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 0.22943425826061378, |
|
"grad_norm": 5.264155387878418, |
|
"learning_rate": 9.702299128132286e-06, |
|
"loss": 0.6803, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.2296303559172468, |
|
"grad_norm": 7.0842108726501465, |
|
"learning_rate": 9.602624230788127e-06, |
|
"loss": 1.2855, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 0.2298264535738798, |
|
"grad_norm": 23.354158401489258, |
|
"learning_rate": 9.503438148613208e-06, |
|
"loss": 2.12, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.2300225512305128, |
|
"grad_norm": 4.803404331207275, |
|
"learning_rate": 9.404741417946783e-06, |
|
"loss": 1.6464, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 0.2302186488871458, |
|
"grad_norm": 5.195870876312256, |
|
"learning_rate": 9.306534572481996e-06, |
|
"loss": 2.3011, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 0.2304147465437788, |
|
"grad_norm": 4.52567720413208, |
|
"learning_rate": 9.208818143262965e-06, |
|
"loss": 0.6482, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.2306108442004118, |
|
"grad_norm": 6.062211990356445, |
|
"learning_rate": 9.111592658681933e-06, |
|
"loss": 1.5717, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.2308069418570448, |
|
"grad_norm": 6.672834396362305, |
|
"learning_rate": 9.014858644476444e-06, |
|
"loss": 1.468, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 0.23100303951367782, |
|
"grad_norm": 8.963277816772461, |
|
"learning_rate": 8.918616623726428e-06, |
|
"loss": 1.5148, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.23119913717031082, |
|
"grad_norm": 4.698469638824463, |
|
"learning_rate": 8.822867116851396e-06, |
|
"loss": 2.3368, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 0.23139523482694382, |
|
"grad_norm": 7.142313003540039, |
|
"learning_rate": 8.727610641607642e-06, |
|
"loss": 2.7635, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.23159133248357683, |
|
"grad_norm": 5.125174045562744, |
|
"learning_rate": 8.632847713085502e-06, |
|
"loss": 1.3292, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 0.23178743014020983, |
|
"grad_norm": 5.394103527069092, |
|
"learning_rate": 8.538578843706423e-06, |
|
"loss": 1.7812, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 0.23198352779684284, |
|
"grad_norm": 6.396340370178223, |
|
"learning_rate": 8.444804543220308e-06, |
|
"loss": 1.4261, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 0.23217962545347584, |
|
"grad_norm": 10.947870254516602, |
|
"learning_rate": 8.351525318702702e-06, |
|
"loss": 0.9685, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.23237572311010884, |
|
"grad_norm": 8.984673500061035, |
|
"learning_rate": 8.25874167455214e-06, |
|
"loss": 1.0452, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.23257182076674185, |
|
"grad_norm": 9.12095832824707, |
|
"learning_rate": 8.166454112487288e-06, |
|
"loss": 1.5966, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 0.23276791842337485, |
|
"grad_norm": 17.694997787475586, |
|
"learning_rate": 8.074663131544313e-06, |
|
"loss": 2.1847, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 0.23296401608000786, |
|
"grad_norm": 7.42760705947876, |
|
"learning_rate": 7.983369228074155e-06, |
|
"loss": 2.1763, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.23316011373664083, |
|
"grad_norm": 6.491481304168701, |
|
"learning_rate": 7.892572895739913e-06, |
|
"loss": 1.6988, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 0.23335621139327384, |
|
"grad_norm": 8.320481300354004, |
|
"learning_rate": 7.80227462551405e-06, |
|
"loss": 1.4067, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.23355230904990684, |
|
"grad_norm": 6.234914302825928, |
|
"learning_rate": 7.712474905675837e-06, |
|
"loss": 1.675, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 0.23374840670653985, |
|
"grad_norm": 6.310087203979492, |
|
"learning_rate": 7.62317422180866e-06, |
|
"loss": 2.4978, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.23394450436317285, |
|
"grad_norm": 7.758471488952637, |
|
"learning_rate": 7.53437305679745e-06, |
|
"loss": 1.8757, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 0.23414060201980585, |
|
"grad_norm": 4.8770318031311035, |
|
"learning_rate": 7.446071890826023e-06, |
|
"loss": 1.5988, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 0.23433669967643886, |
|
"grad_norm": 3.4752848148345947, |
|
"learning_rate": 7.358271201374478e-06, |
|
"loss": 1.526, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.23453279733307186, |
|
"grad_norm": 4.924532890319824, |
|
"learning_rate": 7.270971463216658e-06, |
|
"loss": 0.7961, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.23472889498970487, |
|
"grad_norm": 5.1697306632995605, |
|
"learning_rate": 7.184173148417561e-06, |
|
"loss": 1.114, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 0.23492499264633787, |
|
"grad_norm": 6.321658134460449, |
|
"learning_rate": 7.0978767263307764e-06, |
|
"loss": 1.0523, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 0.23512109030297088, |
|
"grad_norm": 4.862210273742676, |
|
"learning_rate": 7.012082663595931e-06, |
|
"loss": 2.3474, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 0.23531718795960388, |
|
"grad_norm": 6.735289096832275, |
|
"learning_rate": 6.9267914241362585e-06, |
|
"loss": 2.2363, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.23551328561623688, |
|
"grad_norm": 6.494600772857666, |
|
"learning_rate": 6.842003469155955e-06, |
|
"loss": 1.4925, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 0.2357093832728699, |
|
"grad_norm": 6.873384952545166, |
|
"learning_rate": 6.757719257137807e-06, |
|
"loss": 1.7298, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 0.2359054809295029, |
|
"grad_norm": 11.259360313415527, |
|
"learning_rate": 6.673939243840599e-06, |
|
"loss": 1.0081, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 0.2361015785861359, |
|
"grad_norm": 4.887358665466309, |
|
"learning_rate": 6.590663882296788e-06, |
|
"loss": 2.1434, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.2362976762427689, |
|
"grad_norm": 4.775937557220459, |
|
"learning_rate": 6.507893622809913e-06, |
|
"loss": 0.6404, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.2364937738994019, |
|
"grad_norm": 4.322073459625244, |
|
"learning_rate": 6.425628912952242e-06, |
|
"loss": 0.9223, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 0.2366898715560349, |
|
"grad_norm": 5.388957500457764, |
|
"learning_rate": 6.343870197562307e-06, |
|
"loss": 1.7039, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 0.2368859692126679, |
|
"grad_norm": 7.30275297164917, |
|
"learning_rate": 6.262617918742586e-06, |
|
"loss": 1.7324, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.23708206686930092, |
|
"grad_norm": 6.703151702880859, |
|
"learning_rate": 6.181872515857001e-06, |
|
"loss": 2.2659, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 0.23727816452593392, |
|
"grad_norm": 7.984116554260254, |
|
"learning_rate": 6.10163442552858e-06, |
|
"loss": 1.5091, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.23747426218256693, |
|
"grad_norm": 6.996639728546143, |
|
"learning_rate": 6.021904081637142e-06, |
|
"loss": 1.9216, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 0.23767035983919993, |
|
"grad_norm": 9.186728477478027, |
|
"learning_rate": 5.942681915316894e-06, |
|
"loss": 2.6348, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.23786645749583293, |
|
"grad_norm": 12.796127319335938, |
|
"learning_rate": 5.863968354954141e-06, |
|
"loss": 1.6889, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 0.23806255515246594, |
|
"grad_norm": 10.490285873413086, |
|
"learning_rate": 5.7857638261849314e-06, |
|
"loss": 2.4966, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 0.23825865280909894, |
|
"grad_norm": 4.777184009552002, |
|
"learning_rate": 5.70806875189277e-06, |
|
"loss": 1.7654, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.23845475046573195, |
|
"grad_norm": 5.094268321990967, |
|
"learning_rate": 5.630883552206367e-06, |
|
"loss": 0.7742, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.23865084812236495, |
|
"grad_norm": 3.6413767337799072, |
|
"learning_rate": 5.554208644497283e-06, |
|
"loss": 0.7798, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 0.23884694577899795, |
|
"grad_norm": 11.46200942993164, |
|
"learning_rate": 5.478044443377761e-06, |
|
"loss": 1.899, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 0.23904304343563093, |
|
"grad_norm": 6.592775821685791, |
|
"learning_rate": 5.402391360698456e-06, |
|
"loss": 0.7878, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 0.23923914109226393, |
|
"grad_norm": 4.292765140533447, |
|
"learning_rate": 5.327249805546175e-06, |
|
"loss": 1.1989, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.23943523874889694, |
|
"grad_norm": 5.002085208892822, |
|
"learning_rate": 5.2526201842416965e-06, |
|
"loss": 1.0088, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 0.23963133640552994, |
|
"grad_norm": 8.515301704406738, |
|
"learning_rate": 5.178502900337534e-06, |
|
"loss": 2.1646, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 0.23982743406216295, |
|
"grad_norm": 5.089056491851807, |
|
"learning_rate": 5.104898354615861e-06, |
|
"loss": 1.0671, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 0.24002353171879595, |
|
"grad_norm": 4.508873462677002, |
|
"learning_rate": 5.031806945086204e-06, |
|
"loss": 2.017, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.24021962937542896, |
|
"grad_norm": 5.015408992767334, |
|
"learning_rate": 4.959229066983373e-06, |
|
"loss": 2.8555, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.24041572703206196, |
|
"grad_norm": 5.895279407501221, |
|
"learning_rate": 4.887165112765291e-06, |
|
"loss": 0.8382, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 0.24061182468869496, |
|
"grad_norm": 4.138760566711426, |
|
"learning_rate": 4.8156154721109345e-06, |
|
"loss": 0.9274, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 0.24080792234532797, |
|
"grad_norm": 6.234400749206543, |
|
"learning_rate": 4.74458053191813e-06, |
|
"loss": 1.9066, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 0.24100402000196097, |
|
"grad_norm": 6.2309441566467285, |
|
"learning_rate": 4.674060676301528e-06, |
|
"loss": 1.2137, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 0.24120011765859398, |
|
"grad_norm": 6.614995956420898, |
|
"learning_rate": 4.604056286590497e-06, |
|
"loss": 2.1118, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.24139621531522698, |
|
"grad_norm": 7.630300045013428, |
|
"learning_rate": 4.534567741327112e-06, |
|
"loss": 1.028, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 0.24159231297185998, |
|
"grad_norm": 19.701765060424805, |
|
"learning_rate": 4.465595416264012e-06, |
|
"loss": 2.3872, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.241788410628493, |
|
"grad_norm": 9.740580558776855, |
|
"learning_rate": 4.397139684362462e-06, |
|
"loss": 1.8809, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 0.241984508285126, |
|
"grad_norm": 5.803571701049805, |
|
"learning_rate": 4.329200915790288e-06, |
|
"loss": 2.1341, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 0.242180605941759, |
|
"grad_norm": 5.827723503112793, |
|
"learning_rate": 4.261779477919892e-06, |
|
"loss": 0.8315, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.242376703598392, |
|
"grad_norm": 11.323637008666992, |
|
"learning_rate": 4.194875735326253e-06, |
|
"loss": 3.126, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.242572801255025, |
|
"grad_norm": 6.157869815826416, |
|
"learning_rate": 4.12849004978495e-06, |
|
"loss": 1.5295, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 0.242768898911658, |
|
"grad_norm": 3.730557680130005, |
|
"learning_rate": 4.062622780270253e-06, |
|
"loss": 0.8116, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 0.24296499656829101, |
|
"grad_norm": 7.499046325683594, |
|
"learning_rate": 3.9972742829531225e-06, |
|
"loss": 1.5176, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 0.24316109422492402, |
|
"grad_norm": 8.390069007873535, |
|
"learning_rate": 3.932444911199318e-06, |
|
"loss": 0.7114, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.24335719188155702, |
|
"grad_norm": 3.6422150135040283, |
|
"learning_rate": 3.8681350155674315e-06, |
|
"loss": 0.8022, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 0.24355328953819003, |
|
"grad_norm": 11.20263957977295, |
|
"learning_rate": 3.8043449438071256e-06, |
|
"loss": 1.6755, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 0.24374938719482303, |
|
"grad_norm": 8.747391700744629, |
|
"learning_rate": 3.7410750408571005e-06, |
|
"loss": 2.4434, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 0.24394548485145603, |
|
"grad_norm": 4.528903961181641, |
|
"learning_rate": 3.678325648843306e-06, |
|
"loss": 1.1245, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 0.24414158250808904, |
|
"grad_norm": 3.3688809871673584, |
|
"learning_rate": 3.6160971070770654e-06, |
|
"loss": 1.2526, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.24433768016472204, |
|
"grad_norm": 7.626687049865723, |
|
"learning_rate": 3.5543897520533e-06, |
|
"loss": 2.2074, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 0.24453377782135505, |
|
"grad_norm": 6.176375389099121, |
|
"learning_rate": 3.4932039174486174e-06, |
|
"loss": 1.9458, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 0.24472987547798805, |
|
"grad_norm": 3.6054394245147705, |
|
"learning_rate": 3.432539934119572e-06, |
|
"loss": 1.6104, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.24492597313462103, |
|
"grad_norm": 4.255305767059326, |
|
"learning_rate": 3.372398130100851e-06, |
|
"loss": 1.4093, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 0.24512207079125403, |
|
"grad_norm": 7.934302806854248, |
|
"learning_rate": 3.3127788306035134e-06, |
|
"loss": 1.5826, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.24531816844788704, |
|
"grad_norm": 3.8543338775634766, |
|
"learning_rate": 3.253682358013244e-06, |
|
"loss": 0.5287, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 0.24551426610452004, |
|
"grad_norm": 5.766990661621094, |
|
"learning_rate": 3.1951090318885434e-06, |
|
"loss": 2.0189, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 0.24571036376115304, |
|
"grad_norm": 5.633528232574463, |
|
"learning_rate": 3.1370591689590777e-06, |
|
"loss": 1.1827, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 0.24590646141778605, |
|
"grad_norm": 4.339439868927002, |
|
"learning_rate": 3.0795330831239313e-06, |
|
"loss": 1.5764, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 0.24610255907441905, |
|
"grad_norm": 7.025827407836914, |
|
"learning_rate": 3.022531085449931e-06, |
|
"loss": 1.94, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.24629865673105206, |
|
"grad_norm": 7.727778434753418, |
|
"learning_rate": 2.966053484169917e-06, |
|
"loss": 1.6985, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 0.24649475438768506, |
|
"grad_norm": 4.980061054229736, |
|
"learning_rate": 2.9101005846811304e-06, |
|
"loss": 0.6095, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 0.24669085204431807, |
|
"grad_norm": 9.262031555175781, |
|
"learning_rate": 2.854672689543514e-06, |
|
"loss": 1.9517, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 0.24688694970095107, |
|
"grad_norm": 10.269953727722168, |
|
"learning_rate": 2.7997700984781272e-06, |
|
"loss": 1.8526, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 0.24708304735758407, |
|
"grad_norm": 5.118125915527344, |
|
"learning_rate": 2.745393108365457e-06, |
|
"loss": 1.2967, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.24727914501421708, |
|
"grad_norm": 7.1361403465271, |
|
"learning_rate": 2.6915420132439085e-06, |
|
"loss": 1.9573, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 0.24747524267085008, |
|
"grad_norm": 5.416757583618164, |
|
"learning_rate": 2.638217104308127e-06, |
|
"loss": 1.2684, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 0.24767134032748309, |
|
"grad_norm": 5.381096363067627, |
|
"learning_rate": 2.585418669907458e-06, |
|
"loss": 0.922, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 0.2478674379841161, |
|
"grad_norm": 4.663347244262695, |
|
"learning_rate": 2.5331469955443778e-06, |
|
"loss": 0.8759, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 0.2480635356407491, |
|
"grad_norm": 12.182421684265137, |
|
"learning_rate": 2.4814023638729757e-06, |
|
"loss": 0.8895, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.2482596332973821, |
|
"grad_norm": 6.225351333618164, |
|
"learning_rate": 2.430185054697409e-06, |
|
"loss": 1.8041, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 0.2484557309540151, |
|
"grad_norm": 9.036142349243164, |
|
"learning_rate": 2.3794953449703837e-06, |
|
"loss": 2.0539, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 0.2486518286106481, |
|
"grad_norm": 6.467651844024658, |
|
"learning_rate": 2.3293335087916314e-06, |
|
"loss": 1.5472, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 0.2488479262672811, |
|
"grad_norm": 7.765258312225342, |
|
"learning_rate": 2.279699817406533e-06, |
|
"loss": 1.8689, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 0.24904402392391412, |
|
"grad_norm": 9.24152946472168, |
|
"learning_rate": 2.230594539204489e-06, |
|
"loss": 1.9637, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.24924012158054712, |
|
"grad_norm": 4.302461624145508, |
|
"learning_rate": 2.1820179397176287e-06, |
|
"loss": 0.8332, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 0.24943621923718012, |
|
"grad_norm": 5.98826789855957, |
|
"learning_rate": 2.133970281619246e-06, |
|
"loss": 3.038, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 0.24963231689381313, |
|
"grad_norm": 5.860217094421387, |
|
"learning_rate": 2.0864518247224797e-06, |
|
"loss": 1.7476, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 0.24982841455044613, |
|
"grad_norm": 7.621738910675049, |
|
"learning_rate": 2.039462825978822e-06, |
|
"loss": 1.7148, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 0.2500245122070791, |
|
"grad_norm": 6.9399614334106445, |
|
"learning_rate": 1.9930035394768233e-06, |
|
"loss": 1.3601, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.25022060986371214, |
|
"grad_norm": 17.67597007751465, |
|
"learning_rate": 1.947074216440592e-06, |
|
"loss": 3.0125, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 0.2504167075203451, |
|
"grad_norm": 4.356191158294678, |
|
"learning_rate": 1.9016751052285953e-06, |
|
"loss": 1.0786, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 0.25061280517697815, |
|
"grad_norm": 5.882324695587158, |
|
"learning_rate": 1.8568064513321715e-06, |
|
"loss": 1.2133, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 0.2508089028336111, |
|
"grad_norm": 4.599434852600098, |
|
"learning_rate": 1.8124684973742534e-06, |
|
"loss": 1.3988, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 0.25100500049024416, |
|
"grad_norm": 10.744729995727539, |
|
"learning_rate": 1.768661483108136e-06, |
|
"loss": 2.4072, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.25120109814687713, |
|
"grad_norm": 4.716323375701904, |
|
"learning_rate": 1.7253856454160333e-06, |
|
"loss": 2.1812, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 0.25139719580351017, |
|
"grad_norm": 3.4088003635406494, |
|
"learning_rate": 1.682641218307901e-06, |
|
"loss": 0.8226, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 0.25159329346014314, |
|
"grad_norm": 5.354026794433594, |
|
"learning_rate": 1.640428432920138e-06, |
|
"loss": 1.3017, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 0.2517893911167762, |
|
"grad_norm": 4.884855270385742, |
|
"learning_rate": 1.5987475175143651e-06, |
|
"loss": 0.6766, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 0.25198548877340915, |
|
"grad_norm": 8.052160263061523, |
|
"learning_rate": 1.557598697476148e-06, |
|
"loss": 2.2202, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.2521815864300422, |
|
"grad_norm": 5.809813976287842, |
|
"learning_rate": 1.5169821953137875e-06, |
|
"loss": 1.1507, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 0.25237768408667516, |
|
"grad_norm": 5.726134300231934, |
|
"learning_rate": 1.47689823065712e-06, |
|
"loss": 1.3755, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 0.2525737817433082, |
|
"grad_norm": 6.9269022941589355, |
|
"learning_rate": 1.4373470202563855e-06, |
|
"loss": 1.9541, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 0.25276987939994117, |
|
"grad_norm": 6.82175874710083, |
|
"learning_rate": 1.398328777980973e-06, |
|
"loss": 0.9483, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 0.2529659770565742, |
|
"grad_norm": 5.6992058753967285, |
|
"learning_rate": 1.3598437148182652e-06, |
|
"loss": 1.8013, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.2531620747132072, |
|
"grad_norm": 6.446459770202637, |
|
"learning_rate": 1.3218920388725853e-06, |
|
"loss": 1.059, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 0.2533581723698402, |
|
"grad_norm": 5.6021223068237305, |
|
"learning_rate": 1.2844739553640073e-06, |
|
"loss": 2.3983, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 0.2535542700264732, |
|
"grad_norm": 5.945677757263184, |
|
"learning_rate": 1.2475896666272136e-06, |
|
"loss": 1.3391, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 0.25375036768310616, |
|
"grad_norm": 8.367269515991211, |
|
"learning_rate": 1.2112393721104843e-06, |
|
"loss": 0.9355, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 0.2539464653397392, |
|
"grad_norm": 4.452106952667236, |
|
"learning_rate": 1.1754232683745537e-06, |
|
"loss": 1.3174, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.25414256299637217, |
|
"grad_norm": 5.392490863800049, |
|
"learning_rate": 1.1401415490916e-06, |
|
"loss": 0.7378, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 0.2543386606530052, |
|
"grad_norm": 7.21303129196167, |
|
"learning_rate": 1.1053944050441245e-06, |
|
"loss": 2.9174, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 0.2545347583096382, |
|
"grad_norm": 8.784597396850586, |
|
"learning_rate": 1.0711820241240067e-06, |
|
"loss": 3.5646, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 0.2547308559662712, |
|
"grad_norm": 6.628826141357422, |
|
"learning_rate": 1.0375045913314063e-06, |
|
"loss": 1.2581, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 0.2549269536229042, |
|
"grad_norm": 11.671515464782715, |
|
"learning_rate": 1.0043622887738413e-06, |
|
"loss": 2.1951, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.2551230512795372, |
|
"grad_norm": 4.008133888244629, |
|
"learning_rate": 9.717552956651331e-07, |
|
"loss": 2.3149, |
|
"step": 1301 |
|
}, |
|
{ |
|
"epoch": 0.2553191489361702, |
|
"grad_norm": 8.170031547546387, |
|
"learning_rate": 9.396837883244746e-07, |
|
"loss": 1.576, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 0.2555152465928032, |
|
"grad_norm": 7.534716606140137, |
|
"learning_rate": 9.081479401754966e-07, |
|
"loss": 2.1834, |
|
"step": 1303 |
|
}, |
|
{ |
|
"epoch": 0.2557113442494362, |
|
"grad_norm": 4.619508266448975, |
|
"learning_rate": 8.771479217452694e-07, |
|
"loss": 1.6227, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 0.25590744190606923, |
|
"grad_norm": 4.464870452880859, |
|
"learning_rate": 8.466839006634364e-07, |
|
"loss": 0.6817, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.2561035395627022, |
|
"grad_norm": 7.0456743240356445, |
|
"learning_rate": 8.167560416612596e-07, |
|
"loss": 2.6702, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 0.25629963721933524, |
|
"grad_norm": 3.647512674331665, |
|
"learning_rate": 7.873645065708091e-07, |
|
"loss": 2.1032, |
|
"step": 1307 |
|
}, |
|
{ |
|
"epoch": 0.2564957348759682, |
|
"grad_norm": 10.428240776062012, |
|
"learning_rate": 7.585094543239857e-07, |
|
"loss": 1.6973, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 0.25669183253260125, |
|
"grad_norm": 4.8686299324035645, |
|
"learning_rate": 7.301910409517221e-07, |
|
"loss": 1.8198, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 0.2568879301892342, |
|
"grad_norm": 13.215803146362305, |
|
"learning_rate": 7.024094195831277e-07, |
|
"loss": 1.791, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.25708402784586726, |
|
"grad_norm": 7.418087959289551, |
|
"learning_rate": 6.751647404446781e-07, |
|
"loss": 1.0254, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 0.25728012550250023, |
|
"grad_norm": 3.0393893718719482, |
|
"learning_rate": 6.484571508593718e-07, |
|
"loss": 0.429, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 0.25747622315913327, |
|
"grad_norm": 8.804031372070312, |
|
"learning_rate": 6.222867952459299e-07, |
|
"loss": 1.6825, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 0.25767232081576624, |
|
"grad_norm": 13.433954238891602, |
|
"learning_rate": 5.966538151180645e-07, |
|
"loss": 1.9529, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 0.2578684184723993, |
|
"grad_norm": 5.364668369293213, |
|
"learning_rate": 5.715583490836673e-07, |
|
"loss": 1.2584, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.25806451612903225, |
|
"grad_norm": 4.066149711608887, |
|
"learning_rate": 5.470005328440664e-07, |
|
"loss": 1.6191, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 0.2582606137856653, |
|
"grad_norm": 8.135047912597656, |
|
"learning_rate": 5.22980499193304e-07, |
|
"loss": 2.0681, |
|
"step": 1317 |
|
}, |
|
{ |
|
"epoch": 0.25845671144229826, |
|
"grad_norm": 3.4767017364501953, |
|
"learning_rate": 4.994983780174156e-07, |
|
"loss": 1.0348, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 0.2586528090989313, |
|
"grad_norm": 6.77761173248291, |
|
"learning_rate": 4.7655429629372973e-07, |
|
"loss": 0.7133, |
|
"step": 1319 |
|
}, |
|
{ |
|
"epoch": 0.25884890675556427, |
|
"grad_norm": 4.188867092132568, |
|
"learning_rate": 4.5414837809018007e-07, |
|
"loss": 1.6745, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.2590450044121973, |
|
"grad_norm": 3.1510398387908936, |
|
"learning_rate": 4.322807445646171e-07, |
|
"loss": 0.7392, |
|
"step": 1321 |
|
}, |
|
{ |
|
"epoch": 0.2592411020688303, |
|
"grad_norm": 14.09803581237793, |
|
"learning_rate": 4.1095151396418617e-07, |
|
"loss": 2.5941, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 0.2594371997254633, |
|
"grad_norm": 7.8199028968811035, |
|
"learning_rate": 3.9016080162466164e-07, |
|
"loss": 1.2837, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 0.2596332973820963, |
|
"grad_norm": 4.440105438232422, |
|
"learning_rate": 3.699087199698359e-07, |
|
"loss": 1.7956, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 0.25982939503872926, |
|
"grad_norm": 7.9481048583984375, |
|
"learning_rate": 3.50195378510898e-07, |
|
"loss": 0.7979, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.2600254926953623, |
|
"grad_norm": 6.204482555389404, |
|
"learning_rate": 3.310208838458562e-07, |
|
"loss": 2.1294, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 0.26022159035199527, |
|
"grad_norm": 6.299440860748291, |
|
"learning_rate": 3.1238533965897156e-07, |
|
"loss": 1.4474, |
|
"step": 1327 |
|
}, |
|
{ |
|
"epoch": 0.2604176880086283, |
|
"grad_norm": 5.203184604644775, |
|
"learning_rate": 2.9428884672015876e-07, |
|
"loss": 1.7164, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 0.2606137856652613, |
|
"grad_norm": 5.018675804138184, |
|
"learning_rate": 2.7673150288447526e-07, |
|
"loss": 1.0184, |
|
"step": 1329 |
|
}, |
|
{ |
|
"epoch": 0.2608098833218943, |
|
"grad_norm": 5.618327617645264, |
|
"learning_rate": 2.597134030915771e-07, |
|
"loss": 1.4616, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.2610059809785273, |
|
"grad_norm": 10.888134002685547, |
|
"learning_rate": 2.432346393652196e-07, |
|
"loss": 2.6353, |
|
"step": 1331 |
|
}, |
|
{ |
|
"epoch": 0.2612020786351603, |
|
"grad_norm": 4.126128196716309, |
|
"learning_rate": 2.2729530081273542e-07, |
|
"loss": 1.0003, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 0.2613981762917933, |
|
"grad_norm": 7.105251789093018, |
|
"learning_rate": 2.118954736245682e-07, |
|
"loss": 1.4611, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 0.2615942739484263, |
|
"grad_norm": 4.438936233520508, |
|
"learning_rate": 1.9703524107382855e-07, |
|
"loss": 1.0069, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 0.2617903716050593, |
|
"grad_norm": 8.27482795715332, |
|
"learning_rate": 1.8271468351579446e-07, |
|
"loss": 2.8264, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.26198646926169233, |
|
"grad_norm": 7.972108840942383, |
|
"learning_rate": 1.6893387838750053e-07, |
|
"loss": 2.4485, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 0.2621825669183253, |
|
"grad_norm": 5.920525074005127, |
|
"learning_rate": 1.5569290020736039e-07, |
|
"loss": 1.4498, |
|
"step": 1337 |
|
}, |
|
{ |
|
"epoch": 0.26237866457495834, |
|
"grad_norm": 5.699307918548584, |
|
"learning_rate": 1.429918205746672e-07, |
|
"loss": 2.0144, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 0.2625747622315913, |
|
"grad_norm": 2.4948651790618896, |
|
"learning_rate": 1.308307081693272e-07, |
|
"loss": 0.7282, |
|
"step": 1339 |
|
}, |
|
{ |
|
"epoch": 0.26277085988822435, |
|
"grad_norm": 9.36095142364502, |
|
"learning_rate": 1.192096287513711e-07, |
|
"loss": 2.026, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.2629669575448573, |
|
"grad_norm": 11.627699851989746, |
|
"learning_rate": 1.0812864516067667e-07, |
|
"loss": 1.6524, |
|
"step": 1341 |
|
}, |
|
{ |
|
"epoch": 0.26316305520149036, |
|
"grad_norm": 4.287498474121094, |
|
"learning_rate": 9.758781731661337e-08, |
|
"loss": 1.1361, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 0.26335915285812334, |
|
"grad_norm": 3.8631551265716553, |
|
"learning_rate": 8.758720221768713e-08, |
|
"loss": 1.0284, |
|
"step": 1343 |
|
}, |
|
{ |
|
"epoch": 0.26355525051475637, |
|
"grad_norm": 7.8086323738098145, |
|
"learning_rate": 7.812685394127384e-08, |
|
"loss": 1.3352, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.26375134817138934, |
|
"grad_norm": 7.071966171264648, |
|
"learning_rate": 6.920682364330855e-08, |
|
"loss": 1.783, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.2639474458280224, |
|
"grad_norm": 6.961006164550781, |
|
"learning_rate": 6.082715955800789e-08, |
|
"loss": 0.666, |
|
"step": 1346 |
|
}, |
|
{ |
|
"epoch": 0.26414354348465535, |
|
"grad_norm": 10.395101547241211, |
|
"learning_rate": 5.298790699758138e-08, |
|
"loss": 1.6436, |
|
"step": 1347 |
|
}, |
|
{ |
|
"epoch": 0.2643396411412884, |
|
"grad_norm": 6.384310722351074, |
|
"learning_rate": 4.5689108352053867e-08, |
|
"loss": 1.5711, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 0.26453573879792136, |
|
"grad_norm": 5.545608997344971, |
|
"learning_rate": 3.893080308898789e-08, |
|
"loss": 1.2522, |
|
"step": 1349 |
|
}, |
|
{ |
|
"epoch": 0.2647318364545544, |
|
"grad_norm": 17.074024200439453, |
|
"learning_rate": 3.271302775325058e-08, |
|
"loss": 3.1042, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.26492793411118737, |
|
"grad_norm": 7.239038944244385, |
|
"learning_rate": 2.7035815966891532e-08, |
|
"loss": 1.658, |
|
"step": 1351 |
|
}, |
|
{ |
|
"epoch": 0.2651240317678204, |
|
"grad_norm": 6.944170951843262, |
|
"learning_rate": 2.1899198428876333e-08, |
|
"loss": 1.5184, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 0.2653201294244534, |
|
"grad_norm": 5.548795700073242, |
|
"learning_rate": 1.730320291498666e-08, |
|
"loss": 1.6825, |
|
"step": 1353 |
|
}, |
|
{ |
|
"epoch": 0.26551622708108635, |
|
"grad_norm": 8.901213645935059, |
|
"learning_rate": 1.3247854277609328e-08, |
|
"loss": 1.1894, |
|
"step": 1354 |
|
}, |
|
{ |
|
"epoch": 0.2657123247377194, |
|
"grad_norm": 9.556551933288574, |
|
"learning_rate": 9.73317444566968e-09, |
|
"loss": 2.2414, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.26590842239435236, |
|
"grad_norm": 6.794140338897705, |
|
"learning_rate": 6.759182424453947e-09, |
|
"loss": 1.3999, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 0.2661045200509854, |
|
"grad_norm": 5.626242637634277, |
|
"learning_rate": 4.325894295553745e-09, |
|
"loss": 1.5497, |
|
"step": 1357 |
|
}, |
|
{ |
|
"epoch": 0.26630061770761837, |
|
"grad_norm": 6.506687164306641, |
|
"learning_rate": 2.433323216721739e-09, |
|
"loss": 2.8993, |
|
"step": 1358 |
|
}, |
|
{ |
|
"epoch": 0.2664967153642514, |
|
"grad_norm": 5.048938274383545, |
|
"learning_rate": 1.081479421871645e-09, |
|
"loss": 1.7226, |
|
"step": 1359 |
|
}, |
|
{ |
|
"epoch": 0.2666928130208844, |
|
"grad_norm": 5.443484306335449, |
|
"learning_rate": 2.7037022096720876e-10, |
|
"loss": 0.9488, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.2668889106775174, |
|
"grad_norm": 17.070817947387695, |
|
"learning_rate": 0.0, |
|
"loss": 2.8164, |
|
"step": 1361 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1361, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 341, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.914693202093015e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|