|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 200, |
|
"global_step": 13939, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 7.174115790228854e-05, |
|
"grad_norm": 0.201171875, |
|
"learning_rate": 9.999282588420978e-06, |
|
"loss": 0.8366, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0017935289475572136, |
|
"grad_norm": 0.7421875, |
|
"learning_rate": 9.982064710524428e-06, |
|
"loss": 0.9723, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0035870578951144273, |
|
"grad_norm": 0.166015625, |
|
"learning_rate": 9.964129421048858e-06, |
|
"loss": 0.9534, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0053805868426716405, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 9.946194131573285e-06, |
|
"loss": 0.9673, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.007174115790228855, |
|
"grad_norm": 0.7109375, |
|
"learning_rate": 9.928258842097713e-06, |
|
"loss": 0.9947, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.008967644737786069, |
|
"grad_norm": 0.671875, |
|
"learning_rate": 9.91032355262214e-06, |
|
"loss": 0.87, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.010761173685343281, |
|
"grad_norm": 0.1923828125, |
|
"learning_rate": 9.892388263146568e-06, |
|
"loss": 0.9713, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.012554702632900495, |
|
"grad_norm": 1.4921875, |
|
"learning_rate": 9.874452973670995e-06, |
|
"loss": 0.9759, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.01434823158045771, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 9.856517684195423e-06, |
|
"loss": 1.0042, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01614176052801492, |
|
"grad_norm": 0.1416015625, |
|
"learning_rate": 9.83858239471985e-06, |
|
"loss": 1.03, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.017935289475572137, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 9.82064710524428e-06, |
|
"loss": 0.9579, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01972881842312935, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 9.802711815768707e-06, |
|
"loss": 0.894, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.021522347370686562, |
|
"grad_norm": 0.46875, |
|
"learning_rate": 9.784776526293135e-06, |
|
"loss": 1.0126, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.023315876318243778, |
|
"grad_norm": 0.1669921875, |
|
"learning_rate": 9.766841236817564e-06, |
|
"loss": 0.9455, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.02510940526580099, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 9.748905947341992e-06, |
|
"loss": 1.0228, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.026902934213358203, |
|
"grad_norm": 0.890625, |
|
"learning_rate": 9.73097065786642e-06, |
|
"loss": 1.0725, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.02869646316091542, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 9.713035368390847e-06, |
|
"loss": 0.8015, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.03048999210847263, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 9.695100078915274e-06, |
|
"loss": 1.0028, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.03228352105602984, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 9.677164789439702e-06, |
|
"loss": 0.9588, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.03407705000358706, |
|
"grad_norm": 0.1748046875, |
|
"learning_rate": 9.65922949996413e-06, |
|
"loss": 1.076, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.035870578951144275, |
|
"grad_norm": 0.177734375, |
|
"learning_rate": 9.641294210488557e-06, |
|
"loss": 1.0304, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.037664107898701484, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 9.623358921012986e-06, |
|
"loss": 0.9989, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.0394576368462587, |
|
"grad_norm": 0.1640625, |
|
"learning_rate": 9.605423631537414e-06, |
|
"loss": 0.9504, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.041251165793815915, |
|
"grad_norm": 0.1943359375, |
|
"learning_rate": 9.587488342061841e-06, |
|
"loss": 0.9648, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.043044694741373124, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 9.569553052586269e-06, |
|
"loss": 1.0331, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.04483822368893034, |
|
"grad_norm": 0.1650390625, |
|
"learning_rate": 9.551617763110698e-06, |
|
"loss": 0.9401, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.046631752636487556, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 9.533682473635126e-06, |
|
"loss": 1.0657, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.048425281584044765, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 9.515747184159553e-06, |
|
"loss": 0.9503, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.05021881053160198, |
|
"grad_norm": 0.2734375, |
|
"learning_rate": 9.497811894683981e-06, |
|
"loss": 1.0092, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.052012339479159196, |
|
"grad_norm": 0.2890625, |
|
"learning_rate": 9.479876605208408e-06, |
|
"loss": 1.113, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.053805868426716405, |
|
"grad_norm": 0.279296875, |
|
"learning_rate": 9.461941315732836e-06, |
|
"loss": 0.9051, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.05559939737427362, |
|
"grad_norm": 0.671875, |
|
"learning_rate": 9.444006026257264e-06, |
|
"loss": 1.0507, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.05739292632183084, |
|
"grad_norm": 0.1728515625, |
|
"learning_rate": 9.426070736781693e-06, |
|
"loss": 0.9657, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.059186455269388046, |
|
"grad_norm": 0.2353515625, |
|
"learning_rate": 9.40813544730612e-06, |
|
"loss": 0.9106, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.06097998421694526, |
|
"grad_norm": 0.220703125, |
|
"learning_rate": 9.390200157830548e-06, |
|
"loss": 0.9593, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.06277351316450247, |
|
"grad_norm": 0.2236328125, |
|
"learning_rate": 9.372264868354975e-06, |
|
"loss": 1.1362, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.06456704211205969, |
|
"grad_norm": 0.375, |
|
"learning_rate": 9.354329578879405e-06, |
|
"loss": 0.9396, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.0663605710596169, |
|
"grad_norm": 0.6484375, |
|
"learning_rate": 9.336394289403832e-06, |
|
"loss": 1.0783, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.06815410000717412, |
|
"grad_norm": 0.1982421875, |
|
"learning_rate": 9.31845899992826e-06, |
|
"loss": 0.9663, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.06994762895473133, |
|
"grad_norm": 1.65625, |
|
"learning_rate": 9.300523710452687e-06, |
|
"loss": 0.914, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.07174115790228855, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 9.282588420977115e-06, |
|
"loss": 1.0177, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.07353468684984575, |
|
"grad_norm": 0.2138671875, |
|
"learning_rate": 9.264653131501543e-06, |
|
"loss": 1.0057, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.07532821579740297, |
|
"grad_norm": 0.2392578125, |
|
"learning_rate": 9.24671784202597e-06, |
|
"loss": 1.054, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.07712174474496018, |
|
"grad_norm": 0.16015625, |
|
"learning_rate": 9.2287825525504e-06, |
|
"loss": 0.9429, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.0789152736925174, |
|
"grad_norm": 0.609375, |
|
"learning_rate": 9.210847263074827e-06, |
|
"loss": 1.093, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.08070880264007461, |
|
"grad_norm": 0.1669921875, |
|
"learning_rate": 9.192911973599254e-06, |
|
"loss": 0.9702, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.08250233158763183, |
|
"grad_norm": 0.1650390625, |
|
"learning_rate": 9.174976684123682e-06, |
|
"loss": 0.9367, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.08429586053518903, |
|
"grad_norm": 0.224609375, |
|
"learning_rate": 9.157041394648111e-06, |
|
"loss": 0.987, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.08608938948274625, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 9.139106105172539e-06, |
|
"loss": 1.0192, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.08788291843030346, |
|
"grad_norm": 0.75, |
|
"learning_rate": 9.121170815696966e-06, |
|
"loss": 0.9659, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.08967644737786068, |
|
"grad_norm": 0.142578125, |
|
"learning_rate": 9.103235526221394e-06, |
|
"loss": 1.0774, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.0914699763254179, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 9.085300236745821e-06, |
|
"loss": 1.0287, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.09326350527297511, |
|
"grad_norm": 0.146484375, |
|
"learning_rate": 9.067364947270249e-06, |
|
"loss": 1.0198, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.09505703422053231, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 9.049429657794677e-06, |
|
"loss": 0.9995, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.09685056316808953, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 9.031494368319106e-06, |
|
"loss": 1.0788, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.09864409211564674, |
|
"grad_norm": 0.248046875, |
|
"learning_rate": 9.013559078843533e-06, |
|
"loss": 1.022, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.10043762106320396, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 8.995623789367961e-06, |
|
"loss": 1.1129, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.10223115001076118, |
|
"grad_norm": 0.142578125, |
|
"learning_rate": 8.977688499892389e-06, |
|
"loss": 1.1097, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.10402467895831839, |
|
"grad_norm": 0.271484375, |
|
"learning_rate": 8.959753210416818e-06, |
|
"loss": 0.9678, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.1058182079058756, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 8.941817920941245e-06, |
|
"loss": 0.9554, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.10761173685343281, |
|
"grad_norm": 0.2197265625, |
|
"learning_rate": 8.923882631465673e-06, |
|
"loss": 0.9818, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.10940526580099003, |
|
"grad_norm": 0.54296875, |
|
"learning_rate": 8.9059473419901e-06, |
|
"loss": 1.0429, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.11119879474854724, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 8.888012052514528e-06, |
|
"loss": 1.0533, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.11299232369610446, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 8.870076763038956e-06, |
|
"loss": 0.9546, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.11478585264366167, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 8.852141473563383e-06, |
|
"loss": 0.9579, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.11657938159121889, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 8.834206184087812e-06, |
|
"loss": 1.1338, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.11837291053877609, |
|
"grad_norm": 0.2734375, |
|
"learning_rate": 8.81627089461224e-06, |
|
"loss": 1.0436, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.1201664394863333, |
|
"grad_norm": 0.458984375, |
|
"learning_rate": 8.798335605136667e-06, |
|
"loss": 1.0058, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.12195996843389052, |
|
"grad_norm": 0.28515625, |
|
"learning_rate": 8.780400315661095e-06, |
|
"loss": 1.0284, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.12375349738144774, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 8.762465026185524e-06, |
|
"loss": 0.9829, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.12554702632900494, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 8.744529736709952e-06, |
|
"loss": 0.8737, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.12734055527656216, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 8.72659444723438e-06, |
|
"loss": 1.0051, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.12913408422411937, |
|
"grad_norm": 0.29296875, |
|
"learning_rate": 8.708659157758807e-06, |
|
"loss": 0.9725, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.1309276131716766, |
|
"grad_norm": 0.1650390625, |
|
"learning_rate": 8.690723868283235e-06, |
|
"loss": 1.0045, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.1327211421192338, |
|
"grad_norm": 0.2080078125, |
|
"learning_rate": 8.672788578807662e-06, |
|
"loss": 1.1961, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.13451467106679102, |
|
"grad_norm": 0.13671875, |
|
"learning_rate": 8.65485328933209e-06, |
|
"loss": 0.8977, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.13630820001434824, |
|
"grad_norm": 0.126953125, |
|
"learning_rate": 8.636917999856519e-06, |
|
"loss": 0.83, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.13810172896190545, |
|
"grad_norm": 0.201171875, |
|
"learning_rate": 8.618982710380946e-06, |
|
"loss": 0.9642, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.13989525790946267, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 8.601047420905374e-06, |
|
"loss": 0.8958, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.14168878685701988, |
|
"grad_norm": 0.2109375, |
|
"learning_rate": 8.583112131429802e-06, |
|
"loss": 1.002, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.1434823158045771, |
|
"grad_norm": 0.2021484375, |
|
"learning_rate": 8.56517684195423e-06, |
|
"loss": 1.1167, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.1452758447521343, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 8.547241552478658e-06, |
|
"loss": 0.9423, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.1470693736996915, |
|
"grad_norm": 0.5703125, |
|
"learning_rate": 8.529306263003086e-06, |
|
"loss": 0.9908, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.14886290264724872, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 8.511370973527513e-06, |
|
"loss": 0.9776, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.15065643159480593, |
|
"grad_norm": 0.236328125, |
|
"learning_rate": 8.493435684051941e-06, |
|
"loss": 0.864, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.15244996054236315, |
|
"grad_norm": 3.96875, |
|
"learning_rate": 8.475500394576369e-06, |
|
"loss": 1.0042, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.15424348948992037, |
|
"grad_norm": 0.2890625, |
|
"learning_rate": 8.457565105100796e-06, |
|
"loss": 0.9348, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.15603701843747758, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 8.439629815625225e-06, |
|
"loss": 0.8448, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.1578305473850348, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 8.421694526149653e-06, |
|
"loss": 1.0096, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.159624076332592, |
|
"grad_norm": 0.5234375, |
|
"learning_rate": 8.40375923667408e-06, |
|
"loss": 0.9438, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.16141760528014923, |
|
"grad_norm": 0.498046875, |
|
"learning_rate": 8.385823947198508e-06, |
|
"loss": 1.0227, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.16321113422770644, |
|
"grad_norm": 0.2353515625, |
|
"learning_rate": 8.367888657722937e-06, |
|
"loss": 1.02, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.16500466317526366, |
|
"grad_norm": 0.1630859375, |
|
"learning_rate": 8.349953368247365e-06, |
|
"loss": 0.947, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.16679819212282085, |
|
"grad_norm": 0.490234375, |
|
"learning_rate": 8.332018078771792e-06, |
|
"loss": 0.9727, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.16859172107037806, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 8.31408278929622e-06, |
|
"loss": 0.9206, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.17038525001793528, |
|
"grad_norm": 0.16796875, |
|
"learning_rate": 8.296147499820648e-06, |
|
"loss": 1.0461, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.1721787789654925, |
|
"grad_norm": 0.134765625, |
|
"learning_rate": 8.278212210345075e-06, |
|
"loss": 1.0327, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.1739723079130497, |
|
"grad_norm": 0.1923828125, |
|
"learning_rate": 8.260276920869503e-06, |
|
"loss": 1.0652, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.17576583686060693, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 8.24234163139393e-06, |
|
"loss": 0.9325, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.17755936580816414, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 8.22440634191836e-06, |
|
"loss": 1.0164, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.17935289475572136, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 8.206471052442787e-06, |
|
"loss": 0.9482, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.18114642370327858, |
|
"grad_norm": 0.7578125, |
|
"learning_rate": 8.188535762967215e-06, |
|
"loss": 0.8935, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.1829399526508358, |
|
"grad_norm": 0.443359375, |
|
"learning_rate": 8.170600473491644e-06, |
|
"loss": 0.9094, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.184733481598393, |
|
"grad_norm": 0.5546875, |
|
"learning_rate": 8.152665184016071e-06, |
|
"loss": 0.9651, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.18652701054595022, |
|
"grad_norm": 0.220703125, |
|
"learning_rate": 8.134729894540499e-06, |
|
"loss": 0.9974, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.18832053949350744, |
|
"grad_norm": 0.55859375, |
|
"learning_rate": 8.116794605064927e-06, |
|
"loss": 1.0355, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.19011406844106463, |
|
"grad_norm": 0.55859375, |
|
"learning_rate": 8.098859315589354e-06, |
|
"loss": 0.9738, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.19190759738862184, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 8.080924026113782e-06, |
|
"loss": 0.9252, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.19370112633617906, |
|
"grad_norm": 0.234375, |
|
"learning_rate": 8.06298873663821e-06, |
|
"loss": 0.9504, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.19549465528373627, |
|
"grad_norm": 0.208984375, |
|
"learning_rate": 8.045053447162637e-06, |
|
"loss": 0.9233, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.1972881842312935, |
|
"grad_norm": 0.62890625, |
|
"learning_rate": 8.027118157687066e-06, |
|
"loss": 1.0145, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.1990817131788507, |
|
"grad_norm": 0.287109375, |
|
"learning_rate": 8.009182868211494e-06, |
|
"loss": 0.9941, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.20087524212640792, |
|
"grad_norm": 0.216796875, |
|
"learning_rate": 7.991247578735921e-06, |
|
"loss": 0.9517, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.20266877107396514, |
|
"grad_norm": 0.2265625, |
|
"learning_rate": 7.97331228926035e-06, |
|
"loss": 0.8408, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.20446230002152235, |
|
"grad_norm": 0.142578125, |
|
"learning_rate": 7.955376999784778e-06, |
|
"loss": 0.9418, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.20625582896907957, |
|
"grad_norm": 0.10107421875, |
|
"learning_rate": 7.937441710309205e-06, |
|
"loss": 0.9479, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.20804935791663678, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 7.919506420833633e-06, |
|
"loss": 1.1943, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.209842886864194, |
|
"grad_norm": 0.392578125, |
|
"learning_rate": 7.90157113135806e-06, |
|
"loss": 1.0299, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.2116364158117512, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 7.883635841882488e-06, |
|
"loss": 1.0215, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.2134299447593084, |
|
"grad_norm": 0.46484375, |
|
"learning_rate": 7.865700552406916e-06, |
|
"loss": 1.0463, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.21522347370686562, |
|
"grad_norm": 0.1318359375, |
|
"learning_rate": 7.847765262931343e-06, |
|
"loss": 0.9215, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.21701700265442284, |
|
"grad_norm": 0.1435546875, |
|
"learning_rate": 7.829829973455773e-06, |
|
"loss": 0.964, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.21881053160198005, |
|
"grad_norm": 0.15625, |
|
"learning_rate": 7.8118946839802e-06, |
|
"loss": 0.9525, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.22060406054953727, |
|
"grad_norm": 0.2294921875, |
|
"learning_rate": 7.793959394504628e-06, |
|
"loss": 1.1366, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.22239758949709448, |
|
"grad_norm": 0.2734375, |
|
"learning_rate": 7.776024105029057e-06, |
|
"loss": 1.0143, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.2241911184446517, |
|
"grad_norm": 0.494140625, |
|
"learning_rate": 7.758088815553484e-06, |
|
"loss": 1.0759, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.22598464739220891, |
|
"grad_norm": 0.1650390625, |
|
"learning_rate": 7.740153526077912e-06, |
|
"loss": 0.8999, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.22777817633976613, |
|
"grad_norm": 0.125, |
|
"learning_rate": 7.72221823660234e-06, |
|
"loss": 1.0623, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.22957170528732335, |
|
"grad_norm": 0.25, |
|
"learning_rate": 7.704282947126767e-06, |
|
"loss": 0.9954, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.23136523423488056, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 7.686347657651195e-06, |
|
"loss": 1.1111, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.23315876318243778, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 7.668412368175622e-06, |
|
"loss": 0.8936, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.23495229212999497, |
|
"grad_norm": 0.279296875, |
|
"learning_rate": 7.65047707870005e-06, |
|
"loss": 0.9579, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.23674582107755218, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 7.632541789224479e-06, |
|
"loss": 1.1169, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.2385393500251094, |
|
"grad_norm": 0.29296875, |
|
"learning_rate": 7.614606499748907e-06, |
|
"loss": 1.0179, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.2403328789726666, |
|
"grad_norm": 1.0078125, |
|
"learning_rate": 7.596671210273334e-06, |
|
"loss": 0.9545, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.24212640792022383, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 7.578735920797762e-06, |
|
"loss": 1.0669, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.24391993686778105, |
|
"grad_norm": 0.197265625, |
|
"learning_rate": 7.56080063132219e-06, |
|
"loss": 0.978, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.24571346581533826, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 7.542865341846618e-06, |
|
"loss": 0.8955, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.24750699476289548, |
|
"grad_norm": 0.20703125, |
|
"learning_rate": 7.524930052371045e-06, |
|
"loss": 1.1202, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.2493005237104527, |
|
"grad_norm": 0.1826171875, |
|
"learning_rate": 7.5069947628954745e-06, |
|
"loss": 0.8992, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.2510940526580099, |
|
"grad_norm": 0.271484375, |
|
"learning_rate": 7.489059473419902e-06, |
|
"loss": 0.9247, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.2528875816055671, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 7.47112418394433e-06, |
|
"loss": 0.9982, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.2546811105531243, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 7.453188894468757e-06, |
|
"loss": 0.9403, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.25647463950068156, |
|
"grad_norm": 0.2001953125, |
|
"learning_rate": 7.4352536049931856e-06, |
|
"loss": 0.9692, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.25826816844823874, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 7.417318315517613e-06, |
|
"loss": 0.9188, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.260061697395796, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 7.399383026042041e-06, |
|
"loss": 0.9708, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.2618552263433532, |
|
"grad_norm": 2.9375, |
|
"learning_rate": 7.381447736566468e-06, |
|
"loss": 0.9779, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.2636487552909104, |
|
"grad_norm": 0.224609375, |
|
"learning_rate": 7.363512447090897e-06, |
|
"loss": 1.0341, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.2654422842384676, |
|
"grad_norm": 0.1337890625, |
|
"learning_rate": 7.345577157615324e-06, |
|
"loss": 0.9934, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.2672358131860248, |
|
"grad_norm": 0.1806640625, |
|
"learning_rate": 7.327641868139752e-06, |
|
"loss": 0.9893, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 0.26902934213358204, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 7.309706578664181e-06, |
|
"loss": 1.0313, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.2708228710811392, |
|
"grad_norm": 0.6484375, |
|
"learning_rate": 7.2917712891886086e-06, |
|
"loss": 1.0338, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 0.27261640002869647, |
|
"grad_norm": 0.212890625, |
|
"learning_rate": 7.273835999713036e-06, |
|
"loss": 0.9932, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.27440992897625366, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 7.255900710237464e-06, |
|
"loss": 0.9619, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 0.2762034579238109, |
|
"grad_norm": 0.171875, |
|
"learning_rate": 7.237965420761892e-06, |
|
"loss": 0.9433, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.2779969868713681, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 7.22003013128632e-06, |
|
"loss": 1.0252, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.27979051581892533, |
|
"grad_norm": 0.1279296875, |
|
"learning_rate": 7.202094841810747e-06, |
|
"loss": 0.963, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.2815840447664825, |
|
"grad_norm": 0.220703125, |
|
"learning_rate": 7.184159552335175e-06, |
|
"loss": 1.0337, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 0.28337757371403977, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 7.166224262859603e-06, |
|
"loss": 1.0714, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.28517110266159695, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 7.148288973384031e-06, |
|
"loss": 0.9653, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 0.2869646316091542, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 7.130353683908458e-06, |
|
"loss": 1.0517, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.2887581605567114, |
|
"grad_norm": 0.2255859375, |
|
"learning_rate": 7.112418394432886e-06, |
|
"loss": 1.1255, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 0.2905516895042686, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 7.094483104957315e-06, |
|
"loss": 1.0293, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.2923452184518258, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 7.076547815481743e-06, |
|
"loss": 1.1049, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 0.294138747399383, |
|
"grad_norm": 1.75, |
|
"learning_rate": 7.05861252600617e-06, |
|
"loss": 1.1538, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.29593227634694025, |
|
"grad_norm": 0.185546875, |
|
"learning_rate": 7.040677236530599e-06, |
|
"loss": 1.1805, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.29772580529449744, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 7.022741947055026e-06, |
|
"loss": 0.9831, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.2995193342420547, |
|
"grad_norm": 0.462890625, |
|
"learning_rate": 7.004806657579454e-06, |
|
"loss": 0.8992, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 0.30131286318961187, |
|
"grad_norm": 0.83984375, |
|
"learning_rate": 6.986871368103881e-06, |
|
"loss": 0.9759, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.3031063921371691, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 6.96893607862831e-06, |
|
"loss": 0.907, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 0.3048999210847263, |
|
"grad_norm": 0.142578125, |
|
"learning_rate": 6.951000789152737e-06, |
|
"loss": 1.1788, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.30669345003228354, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 6.933065499677165e-06, |
|
"loss": 0.9443, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 0.30848697897984073, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 6.915130210201592e-06, |
|
"loss": 1.0499, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.310280507927398, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 6.897194920726022e-06, |
|
"loss": 1.0442, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 0.31207403687495516, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 6.879259631250449e-06, |
|
"loss": 0.9441, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.31386756582251235, |
|
"grad_norm": 0.240234375, |
|
"learning_rate": 6.861324341774877e-06, |
|
"loss": 0.9437, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 0.3156610947700696, |
|
"grad_norm": 0.2314453125, |
|
"learning_rate": 6.843389052299305e-06, |
|
"loss": 1.0314, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.3174546237176268, |
|
"grad_norm": 0.21484375, |
|
"learning_rate": 6.825453762823733e-06, |
|
"loss": 1.0433, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 0.319248152665184, |
|
"grad_norm": 0.625, |
|
"learning_rate": 6.80751847334816e-06, |
|
"loss": 0.876, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.3210416816127412, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 6.789583183872588e-06, |
|
"loss": 1.0804, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 0.32283521056029846, |
|
"grad_norm": 0.1416015625, |
|
"learning_rate": 6.771647894397016e-06, |
|
"loss": 1.0427, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.32462873950785565, |
|
"grad_norm": 0.203125, |
|
"learning_rate": 6.753712604921444e-06, |
|
"loss": 1.0126, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 0.3264222684554129, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 6.735777315445871e-06, |
|
"loss": 1.0004, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.3282157974029701, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 6.717842025970299e-06, |
|
"loss": 1.0539, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 0.3300093263505273, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 6.699906736494728e-06, |
|
"loss": 0.8393, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.3318028552980845, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 6.681971447019156e-06, |
|
"loss": 0.9865, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 0.3335963842456417, |
|
"grad_norm": 0.125, |
|
"learning_rate": 6.664036157543583e-06, |
|
"loss": 1.0389, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.33538991319319894, |
|
"grad_norm": 0.2373046875, |
|
"learning_rate": 6.646100868068012e-06, |
|
"loss": 1.1386, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 0.33718344214075613, |
|
"grad_norm": 0.20703125, |
|
"learning_rate": 6.628165578592439e-06, |
|
"loss": 0.9599, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.3389769710883134, |
|
"grad_norm": 0.1904296875, |
|
"learning_rate": 6.610230289116867e-06, |
|
"loss": 1.023, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 0.34077050003587056, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 6.592294999641294e-06, |
|
"loss": 1.0246, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.3425640289834278, |
|
"grad_norm": 0.20703125, |
|
"learning_rate": 6.574359710165723e-06, |
|
"loss": 0.8527, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 0.344357557930985, |
|
"grad_norm": 0.13671875, |
|
"learning_rate": 6.55642442069015e-06, |
|
"loss": 0.9386, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.34615108687854224, |
|
"grad_norm": 0.53515625, |
|
"learning_rate": 6.538489131214578e-06, |
|
"loss": 1.0464, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 0.3479446158260994, |
|
"grad_norm": 0.23046875, |
|
"learning_rate": 6.520553841739005e-06, |
|
"loss": 0.9225, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.34973814477365667, |
|
"grad_norm": 0.1806640625, |
|
"learning_rate": 6.502618552263435e-06, |
|
"loss": 0.9708, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 0.35153167372121386, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 6.484683262787862e-06, |
|
"loss": 0.9824, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.3533252026687711, |
|
"grad_norm": 0.26171875, |
|
"learning_rate": 6.46674797331229e-06, |
|
"loss": 1.0268, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 0.3551187316163283, |
|
"grad_norm": 0.15625, |
|
"learning_rate": 6.448812683836717e-06, |
|
"loss": 0.9887, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.3569122605638855, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 6.430877394361146e-06, |
|
"loss": 1.0141, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 0.3587057895114427, |
|
"grad_norm": 0.12060546875, |
|
"learning_rate": 6.412942104885573e-06, |
|
"loss": 0.8455, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.3604993184589999, |
|
"grad_norm": 0.12890625, |
|
"learning_rate": 6.395006815410001e-06, |
|
"loss": 0.9621, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 0.36229284740655715, |
|
"grad_norm": 0.1396484375, |
|
"learning_rate": 6.377071525934429e-06, |
|
"loss": 0.903, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.36408637635411434, |
|
"grad_norm": 0.216796875, |
|
"learning_rate": 6.359136236458857e-06, |
|
"loss": 1.058, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 0.3658799053016716, |
|
"grad_norm": 0.16015625, |
|
"learning_rate": 6.341200946983284e-06, |
|
"loss": 0.9965, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.36767343424922877, |
|
"grad_norm": 0.76171875, |
|
"learning_rate": 6.323265657507712e-06, |
|
"loss": 0.9547, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 0.369466963196786, |
|
"grad_norm": 0.88671875, |
|
"learning_rate": 6.305330368032141e-06, |
|
"loss": 1.066, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.3712604921443432, |
|
"grad_norm": 0.17578125, |
|
"learning_rate": 6.287395078556569e-06, |
|
"loss": 0.9571, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 0.37305402109190045, |
|
"grad_norm": 0.279296875, |
|
"learning_rate": 6.269459789080996e-06, |
|
"loss": 1.1076, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.37484755003945763, |
|
"grad_norm": 0.234375, |
|
"learning_rate": 6.251524499605424e-06, |
|
"loss": 1.0116, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 0.3766410789870149, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 6.233589210129852e-06, |
|
"loss": 0.9815, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.37843460793457206, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 6.21565392065428e-06, |
|
"loss": 1.0173, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 0.38022813688212925, |
|
"grad_norm": 0.2021484375, |
|
"learning_rate": 6.197718631178707e-06, |
|
"loss": 0.9725, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.3820216658296865, |
|
"grad_norm": 0.14453125, |
|
"learning_rate": 6.179783341703136e-06, |
|
"loss": 0.9518, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 0.3838151947772437, |
|
"grad_norm": 0.56640625, |
|
"learning_rate": 6.161848052227563e-06, |
|
"loss": 1.0591, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.38560872372480093, |
|
"grad_norm": 0.443359375, |
|
"learning_rate": 6.143912762751991e-06, |
|
"loss": 0.9761, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 0.3874022526723581, |
|
"grad_norm": 0.458984375, |
|
"learning_rate": 6.125977473276418e-06, |
|
"loss": 1.0181, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.38919578161991536, |
|
"grad_norm": 0.53125, |
|
"learning_rate": 6.108042183800848e-06, |
|
"loss": 0.9424, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 0.39098931056747255, |
|
"grad_norm": 0.2216796875, |
|
"learning_rate": 6.090106894325275e-06, |
|
"loss": 1.126, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.3927828395150298, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 6.072171604849703e-06, |
|
"loss": 0.9558, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 0.394576368462587, |
|
"grad_norm": 0.2119140625, |
|
"learning_rate": 6.05423631537413e-06, |
|
"loss": 0.9462, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.3963698974101442, |
|
"grad_norm": 0.21484375, |
|
"learning_rate": 6.036301025898559e-06, |
|
"loss": 0.9375, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 0.3981634263577014, |
|
"grad_norm": 1.2890625, |
|
"learning_rate": 6.018365736422986e-06, |
|
"loss": 1.0341, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.39995695530525865, |
|
"grad_norm": 0.28515625, |
|
"learning_rate": 6.000430446947414e-06, |
|
"loss": 0.994, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 0.40175048425281584, |
|
"grad_norm": 0.1591796875, |
|
"learning_rate": 5.982495157471842e-06, |
|
"loss": 1.1157, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.40354401320037303, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 5.96455986799627e-06, |
|
"loss": 1.0531, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 0.4053375421479303, |
|
"grad_norm": 0.4375, |
|
"learning_rate": 5.946624578520697e-06, |
|
"loss": 0.9557, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.40713107109548746, |
|
"grad_norm": 0.146484375, |
|
"learning_rate": 5.928689289045125e-06, |
|
"loss": 1.055, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 0.4089246000430447, |
|
"grad_norm": 0.1416015625, |
|
"learning_rate": 5.910753999569554e-06, |
|
"loss": 0.9165, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.4107181289906019, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 5.892818710093982e-06, |
|
"loss": 0.9811, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 0.41251165793815914, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 5.874883420618409e-06, |
|
"loss": 1.0195, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.4143051868857163, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 5.856948131142837e-06, |
|
"loss": 1.0109, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 0.41609871583327357, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 5.839012841667265e-06, |
|
"loss": 0.9009, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.41789224478083076, |
|
"grad_norm": 0.423828125, |
|
"learning_rate": 5.821077552191693e-06, |
|
"loss": 0.8922, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 0.419685773728388, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 5.80314226271612e-06, |
|
"loss": 0.9849, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.4214793026759452, |
|
"grad_norm": 0.193359375, |
|
"learning_rate": 5.785206973240548e-06, |
|
"loss": 0.935, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 0.4232728316235024, |
|
"grad_norm": 0.115234375, |
|
"learning_rate": 5.767271683764976e-06, |
|
"loss": 1.0707, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.4250663605710596, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 5.749336394289404e-06, |
|
"loss": 0.9005, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 0.4268598895186168, |
|
"grad_norm": 0.1572265625, |
|
"learning_rate": 5.7314011048138314e-06, |
|
"loss": 1.0365, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.42865341846617405, |
|
"grad_norm": 1.8671875, |
|
"learning_rate": 5.713465815338261e-06, |
|
"loss": 0.9861, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 0.43044694741373124, |
|
"grad_norm": 0.13671875, |
|
"learning_rate": 5.695530525862688e-06, |
|
"loss": 1.0156, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.4322404763612885, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 5.677595236387116e-06, |
|
"loss": 1.0903, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 0.43403400530884567, |
|
"grad_norm": 0.375, |
|
"learning_rate": 5.659659946911543e-06, |
|
"loss": 1.0378, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.4358275342564029, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 5.641724657435972e-06, |
|
"loss": 0.9674, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 0.4376210632039601, |
|
"grad_norm": 0.48046875, |
|
"learning_rate": 5.623789367960399e-06, |
|
"loss": 0.9478, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.43941459215151735, |
|
"grad_norm": 0.2021484375, |
|
"learning_rate": 5.605854078484827e-06, |
|
"loss": 0.9359, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 0.44120812109907454, |
|
"grad_norm": 0.2109375, |
|
"learning_rate": 5.5879187890092544e-06, |
|
"loss": 0.9368, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.4430016500466318, |
|
"grad_norm": 0.2373046875, |
|
"learning_rate": 5.569983499533683e-06, |
|
"loss": 1.009, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 0.44479517899418897, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 5.55204821005811e-06, |
|
"loss": 0.8657, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.44658870794174615, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 5.534112920582538e-06, |
|
"loss": 1.0022, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 0.4483822368893034, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 5.516177631106967e-06, |
|
"loss": 0.9488, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.4501757658368606, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 5.498242341631395e-06, |
|
"loss": 0.8726, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 0.45196929478441783, |
|
"grad_norm": 0.15625, |
|
"learning_rate": 5.480307052155822e-06, |
|
"loss": 0.9772, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.453762823731975, |
|
"grad_norm": 0.14453125, |
|
"learning_rate": 5.46237176268025e-06, |
|
"loss": 1.0123, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 0.45555635267953226, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 5.444436473204678e-06, |
|
"loss": 1.2663, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.45734988162708945, |
|
"grad_norm": 0.12353515625, |
|
"learning_rate": 5.426501183729106e-06, |
|
"loss": 0.9921, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 0.4591434105746467, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 5.408565894253533e-06, |
|
"loss": 1.019, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.4609369395222039, |
|
"grad_norm": 0.640625, |
|
"learning_rate": 5.390630604777961e-06, |
|
"loss": 1.016, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 0.4627304684697611, |
|
"grad_norm": 0.51171875, |
|
"learning_rate": 5.372695315302389e-06, |
|
"loss": 1.0478, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.4645239974173183, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 5.354760025826817e-06, |
|
"loss": 0.9038, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 0.46631752636487556, |
|
"grad_norm": 0.2216796875, |
|
"learning_rate": 5.3368247363512445e-06, |
|
"loss": 0.9667, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.46811105531243274, |
|
"grad_norm": 0.2099609375, |
|
"learning_rate": 5.318889446875674e-06, |
|
"loss": 0.945, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 0.46990458425998993, |
|
"grad_norm": 0.2080078125, |
|
"learning_rate": 5.300954157400101e-06, |
|
"loss": 0.9285, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.4716981132075472, |
|
"grad_norm": 0.205078125, |
|
"learning_rate": 5.283018867924529e-06, |
|
"loss": 0.9441, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 0.47349164215510436, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 5.265083578448956e-06, |
|
"loss": 1.0327, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.4752851711026616, |
|
"grad_norm": 0.177734375, |
|
"learning_rate": 5.247148288973385e-06, |
|
"loss": 0.9288, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 0.4770787000502188, |
|
"grad_norm": 0.6875, |
|
"learning_rate": 5.229212999497812e-06, |
|
"loss": 0.9748, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.47887222899777604, |
|
"grad_norm": 0.28125, |
|
"learning_rate": 5.21127771002224e-06, |
|
"loss": 1.2034, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 0.4806657579453332, |
|
"grad_norm": 0.494140625, |
|
"learning_rate": 5.1933424205466675e-06, |
|
"loss": 0.9929, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.48245928689289047, |
|
"grad_norm": 0.126953125, |
|
"learning_rate": 5.175407131071096e-06, |
|
"loss": 1.0314, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 0.48425281584044766, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 5.1574718415955234e-06, |
|
"loss": 1.1025, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.4860463447880049, |
|
"grad_norm": 0.201171875, |
|
"learning_rate": 5.139536552119951e-06, |
|
"loss": 0.9664, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 0.4878398737355621, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 5.1216012626443786e-06, |
|
"loss": 1.0528, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.4896334026831193, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 5.103665973168808e-06, |
|
"loss": 1.012, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 0.4914269316306765, |
|
"grad_norm": 5.53125, |
|
"learning_rate": 5.085730683693235e-06, |
|
"loss": 1.0442, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.4932204605782337, |
|
"grad_norm": 0.1513671875, |
|
"learning_rate": 5.067795394217663e-06, |
|
"loss": 0.9695, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 0.49501398952579095, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 5.049860104742091e-06, |
|
"loss": 0.9442, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.49680751847334814, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 5.031924815266519e-06, |
|
"loss": 0.9851, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 0.4986010474209054, |
|
"grad_norm": 0.58984375, |
|
"learning_rate": 5.0139895257909464e-06, |
|
"loss": 1.0582, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.5003945763684626, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 4.996054236315375e-06, |
|
"loss": 1.0165, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 0.5021881053160198, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 4.978118946839802e-06, |
|
"loss": 1.0012, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.503981634263577, |
|
"grad_norm": 0.1396484375, |
|
"learning_rate": 4.96018365736423e-06, |
|
"loss": 0.9372, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 0.5057751632111342, |
|
"grad_norm": 0.169921875, |
|
"learning_rate": 4.9422483678886575e-06, |
|
"loss": 0.9966, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.5075686921586915, |
|
"grad_norm": 0.2353515625, |
|
"learning_rate": 4.924313078413086e-06, |
|
"loss": 0.9735, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 0.5093622211062486, |
|
"grad_norm": 0.1494140625, |
|
"learning_rate": 4.9063777889375135e-06, |
|
"loss": 0.9408, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.5111557500538059, |
|
"grad_norm": 0.47265625, |
|
"learning_rate": 4.888442499461942e-06, |
|
"loss": 0.9853, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 0.5129492790013631, |
|
"grad_norm": 0.171875, |
|
"learning_rate": 4.8705072099863694e-06, |
|
"loss": 0.8846, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.5147428079489202, |
|
"grad_norm": 0.1455078125, |
|
"learning_rate": 4.852571920510797e-06, |
|
"loss": 1.0347, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 0.5165363368964775, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 4.834636631035225e-06, |
|
"loss": 0.9861, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.5183298658440347, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 4.816701341559653e-06, |
|
"loss": 1.0312, |
|
"step": 7225 |
|
}, |
|
{ |
|
"epoch": 0.520123394791592, |
|
"grad_norm": 0.12353515625, |
|
"learning_rate": 4.798766052084081e-06, |
|
"loss": 0.9744, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.5219169237391491, |
|
"grad_norm": 3.671875, |
|
"learning_rate": 4.780830762608509e-06, |
|
"loss": 1.0849, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 0.5237104526867064, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 4.7628954731329365e-06, |
|
"loss": 0.9942, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.5255039816342636, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 4.744960183657364e-06, |
|
"loss": 0.9007, |
|
"step": 7325 |
|
}, |
|
{ |
|
"epoch": 0.5272975105818208, |
|
"grad_norm": 0.119140625, |
|
"learning_rate": 4.7270248941817924e-06, |
|
"loss": 0.9755, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.529091039529378, |
|
"grad_norm": 0.1806640625, |
|
"learning_rate": 4.70908960470622e-06, |
|
"loss": 0.981, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 0.5308845684769352, |
|
"grad_norm": 0.5859375, |
|
"learning_rate": 4.691154315230648e-06, |
|
"loss": 1.1553, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.5326780974244925, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 4.673219025755076e-06, |
|
"loss": 1.2496, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 0.5344716263720496, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 4.6552837362795035e-06, |
|
"loss": 0.9646, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.5362651553196068, |
|
"grad_norm": 0.1298828125, |
|
"learning_rate": 4.637348446803932e-06, |
|
"loss": 0.9441, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 0.5380586842671641, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 4.6194131573283595e-06, |
|
"loss": 0.9471, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.5398522132147213, |
|
"grad_norm": 1.4765625, |
|
"learning_rate": 4.601477867852788e-06, |
|
"loss": 0.8943, |
|
"step": 7525 |
|
}, |
|
{ |
|
"epoch": 0.5416457421622785, |
|
"grad_norm": 0.48828125, |
|
"learning_rate": 4.5835425783772154e-06, |
|
"loss": 1.056, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.5434392711098357, |
|
"grad_norm": 0.162109375, |
|
"learning_rate": 4.565607288901643e-06, |
|
"loss": 0.9676, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 0.5452328000573929, |
|
"grad_norm": 0.1650390625, |
|
"learning_rate": 4.5476719994260706e-06, |
|
"loss": 1.0258, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.5470263290049502, |
|
"grad_norm": 2.640625, |
|
"learning_rate": 4.529736709950499e-06, |
|
"loss": 1.0032, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 0.5488198579525073, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 4.5118014204749265e-06, |
|
"loss": 1.0199, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.5506133869000646, |
|
"grad_norm": 0.2431640625, |
|
"learning_rate": 4.493866130999355e-06, |
|
"loss": 1.0425, |
|
"step": 7675 |
|
}, |
|
{ |
|
"epoch": 0.5524069158476218, |
|
"grad_norm": 0.16015625, |
|
"learning_rate": 4.4759308415237825e-06, |
|
"loss": 1.0024, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.554200444795179, |
|
"grad_norm": 0.67578125, |
|
"learning_rate": 4.45799555204821e-06, |
|
"loss": 0.9574, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 0.5559939737427362, |
|
"grad_norm": 0.470703125, |
|
"learning_rate": 4.4400602625726384e-06, |
|
"loss": 0.9229, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.5577875026902934, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 4.422124973097066e-06, |
|
"loss": 1.0553, |
|
"step": 7775 |
|
}, |
|
{ |
|
"epoch": 0.5595810316378507, |
|
"grad_norm": 0.203125, |
|
"learning_rate": 4.404189683621494e-06, |
|
"loss": 0.9049, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.5613745605854078, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 4.386254394145922e-06, |
|
"loss": 0.9489, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 0.563168089532965, |
|
"grad_norm": 0.181640625, |
|
"learning_rate": 4.3683191046703495e-06, |
|
"loss": 1.0916, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.5649616184805223, |
|
"grad_norm": 0.19921875, |
|
"learning_rate": 4.350383815194777e-06, |
|
"loss": 1.0332, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 0.5667551474280795, |
|
"grad_norm": 0.208984375, |
|
"learning_rate": 4.3324485257192055e-06, |
|
"loss": 0.9569, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.5685486763756367, |
|
"grad_norm": 0.177734375, |
|
"learning_rate": 4.314513236243633e-06, |
|
"loss": 0.9475, |
|
"step": 7925 |
|
}, |
|
{ |
|
"epoch": 0.5703422053231939, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 4.2965779467680614e-06, |
|
"loss": 0.9851, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.5721357342707512, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 4.278642657292489e-06, |
|
"loss": 1.003, |
|
"step": 7975 |
|
}, |
|
{ |
|
"epoch": 0.5739292632183084, |
|
"grad_norm": 0.171875, |
|
"learning_rate": 4.2607073678169166e-06, |
|
"loss": 1.1598, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.5757227921658655, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 4.242772078341344e-06, |
|
"loss": 1.072, |
|
"step": 8025 |
|
}, |
|
{ |
|
"epoch": 0.5775163211134228, |
|
"grad_norm": 0.11279296875, |
|
"learning_rate": 4.2248367888657725e-06, |
|
"loss": 0.8758, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.57930985006098, |
|
"grad_norm": 0.259765625, |
|
"learning_rate": 4.206901499390201e-06, |
|
"loss": 1.1417, |
|
"step": 8075 |
|
}, |
|
{ |
|
"epoch": 0.5811033790085371, |
|
"grad_norm": 0.158203125, |
|
"learning_rate": 4.1889662099146285e-06, |
|
"loss": 0.9566, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.5828969079560944, |
|
"grad_norm": 0.18359375, |
|
"learning_rate": 4.171030920439056e-06, |
|
"loss": 1.0096, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 0.5846904369036516, |
|
"grad_norm": 0.1298828125, |
|
"learning_rate": 4.153095630963484e-06, |
|
"loss": 1.0485, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.5864839658512089, |
|
"grad_norm": 0.12451171875, |
|
"learning_rate": 4.135160341487912e-06, |
|
"loss": 0.9747, |
|
"step": 8175 |
|
}, |
|
{ |
|
"epoch": 0.588277494798766, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 4.1172250520123395e-06, |
|
"loss": 0.9826, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.5900710237463233, |
|
"grad_norm": 0.2080078125, |
|
"learning_rate": 4.099289762536768e-06, |
|
"loss": 1.0612, |
|
"step": 8225 |
|
}, |
|
{ |
|
"epoch": 0.5918645526938805, |
|
"grad_norm": 2.6875, |
|
"learning_rate": 4.0813544730611955e-06, |
|
"loss": 1.1022, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.5936580816414377, |
|
"grad_norm": 0.1923828125, |
|
"learning_rate": 4.063419183585623e-06, |
|
"loss": 0.9072, |
|
"step": 8275 |
|
}, |
|
{ |
|
"epoch": 0.5954516105889949, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 4.045483894110051e-06, |
|
"loss": 1.0234, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.5972451395365521, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 4.027548604634479e-06, |
|
"loss": 1.0396, |
|
"step": 8325 |
|
}, |
|
{ |
|
"epoch": 0.5990386684841094, |
|
"grad_norm": 0.2890625, |
|
"learning_rate": 4.009613315158907e-06, |
|
"loss": 0.877, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.6008321974316665, |
|
"grad_norm": 0.271484375, |
|
"learning_rate": 3.991678025683335e-06, |
|
"loss": 0.8808, |
|
"step": 8375 |
|
}, |
|
{ |
|
"epoch": 0.6026257263792237, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 3.9737427362077625e-06, |
|
"loss": 1.0299, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.604419255326781, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 3.95580744673219e-06, |
|
"loss": 1.0278, |
|
"step": 8425 |
|
}, |
|
{ |
|
"epoch": 0.6062127842743382, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 3.9378721572566185e-06, |
|
"loss": 0.9991, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.6080063132218954, |
|
"grad_norm": 0.2109375, |
|
"learning_rate": 3.919936867781046e-06, |
|
"loss": 0.9768, |
|
"step": 8475 |
|
}, |
|
{ |
|
"epoch": 0.6097998421694526, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 3.9020015783054745e-06, |
|
"loss": 1.0068, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.6115933711170098, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 3.884066288829902e-06, |
|
"loss": 1.0219, |
|
"step": 8525 |
|
}, |
|
{ |
|
"epoch": 0.6133869000645671, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 3.86613099935433e-06, |
|
"loss": 1.0836, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.6151804290121242, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 3.848195709878758e-06, |
|
"loss": 0.9227, |
|
"step": 8575 |
|
}, |
|
{ |
|
"epoch": 0.6169739579596815, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 3.8302604204031855e-06, |
|
"loss": 1.0083, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.6187674869072387, |
|
"grad_norm": 0.1943359375, |
|
"learning_rate": 3.812325130927613e-06, |
|
"loss": 1.0881, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 0.620561015854796, |
|
"grad_norm": 0.875, |
|
"learning_rate": 3.7943898414520415e-06, |
|
"loss": 0.9992, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.6223545448023531, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 3.7764545519764695e-06, |
|
"loss": 1.0048, |
|
"step": 8675 |
|
}, |
|
{ |
|
"epoch": 0.6241480737499103, |
|
"grad_norm": 5.84375, |
|
"learning_rate": 3.758519262500897e-06, |
|
"loss": 1.023, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.6259416026974676, |
|
"grad_norm": 0.228515625, |
|
"learning_rate": 3.740583973025325e-06, |
|
"loss": 0.892, |
|
"step": 8725 |
|
}, |
|
{ |
|
"epoch": 0.6277351316450247, |
|
"grad_norm": 3.59375, |
|
"learning_rate": 3.7226486835497526e-06, |
|
"loss": 0.9862, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.629528660592582, |
|
"grad_norm": 0.193359375, |
|
"learning_rate": 3.704713394074181e-06, |
|
"loss": 1.0362, |
|
"step": 8775 |
|
}, |
|
{ |
|
"epoch": 0.6313221895401392, |
|
"grad_norm": 0.7890625, |
|
"learning_rate": 3.6867781045986085e-06, |
|
"loss": 1.0916, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.6331157184876964, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 3.6688428151230365e-06, |
|
"loss": 0.9269, |
|
"step": 8825 |
|
}, |
|
{ |
|
"epoch": 0.6349092474352536, |
|
"grad_norm": 0.72265625, |
|
"learning_rate": 3.650907525647464e-06, |
|
"loss": 1.0274, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.6367027763828108, |
|
"grad_norm": 0.2451171875, |
|
"learning_rate": 3.632972236171892e-06, |
|
"loss": 0.9751, |
|
"step": 8875 |
|
}, |
|
{ |
|
"epoch": 0.638496305330368, |
|
"grad_norm": 1.265625, |
|
"learning_rate": 3.6150369466963196e-06, |
|
"loss": 1.0403, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.6402898342779253, |
|
"grad_norm": 0.2578125, |
|
"learning_rate": 3.597101657220748e-06, |
|
"loss": 0.9077, |
|
"step": 8925 |
|
}, |
|
{ |
|
"epoch": 0.6420833632254824, |
|
"grad_norm": 0.1630859375, |
|
"learning_rate": 3.5791663677451756e-06, |
|
"loss": 0.9537, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.6438768921730397, |
|
"grad_norm": 0.1806640625, |
|
"learning_rate": 3.5612310782696036e-06, |
|
"loss": 1.0603, |
|
"step": 8975 |
|
}, |
|
{ |
|
"epoch": 0.6456704211205969, |
|
"grad_norm": 0.40625, |
|
"learning_rate": 3.5432957887940315e-06, |
|
"loss": 1.0238, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.647463950068154, |
|
"grad_norm": 0.208984375, |
|
"learning_rate": 3.525360499318459e-06, |
|
"loss": 1.0003, |
|
"step": 9025 |
|
}, |
|
{ |
|
"epoch": 0.6492574790157113, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 3.5074252098428875e-06, |
|
"loss": 0.9724, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.6510510079632685, |
|
"grad_norm": 0.232421875, |
|
"learning_rate": 3.489489920367315e-06, |
|
"loss": 1.0083, |
|
"step": 9075 |
|
}, |
|
{ |
|
"epoch": 0.6528445369108258, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 3.471554630891743e-06, |
|
"loss": 1.1098, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.6546380658583829, |
|
"grad_norm": 0.62109375, |
|
"learning_rate": 3.4536193414161706e-06, |
|
"loss": 1.0617, |
|
"step": 9125 |
|
}, |
|
{ |
|
"epoch": 0.6564315948059402, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 3.4356840519405986e-06, |
|
"loss": 0.9711, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.6582251237534974, |
|
"grad_norm": 0.2255859375, |
|
"learning_rate": 3.417748762465026e-06, |
|
"loss": 0.97, |
|
"step": 9175 |
|
}, |
|
{ |
|
"epoch": 0.6600186527010546, |
|
"grad_norm": 0.1904296875, |
|
"learning_rate": 3.3998134729894545e-06, |
|
"loss": 0.9635, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.6618121816486118, |
|
"grad_norm": 2.84375, |
|
"learning_rate": 3.381878183513882e-06, |
|
"loss": 1.0514, |
|
"step": 9225 |
|
}, |
|
{ |
|
"epoch": 0.663605710596169, |
|
"grad_norm": 0.189453125, |
|
"learning_rate": 3.36394289403831e-06, |
|
"loss": 0.9953, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.6653992395437263, |
|
"grad_norm": 0.2099609375, |
|
"learning_rate": 3.3460076045627376e-06, |
|
"loss": 1.0291, |
|
"step": 9275 |
|
}, |
|
{ |
|
"epoch": 0.6671927684912834, |
|
"grad_norm": 0.54296875, |
|
"learning_rate": 3.3280723150871656e-06, |
|
"loss": 1.0054, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.6689862974388406, |
|
"grad_norm": 0.1982421875, |
|
"learning_rate": 3.310137025611594e-06, |
|
"loss": 0.9536, |
|
"step": 9325 |
|
}, |
|
{ |
|
"epoch": 0.6707798263863979, |
|
"grad_norm": 0.76953125, |
|
"learning_rate": 3.2922017361360216e-06, |
|
"loss": 1.0935, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.6725733553339551, |
|
"grad_norm": 0.130859375, |
|
"learning_rate": 3.2742664466604496e-06, |
|
"loss": 1.0158, |
|
"step": 9375 |
|
}, |
|
{ |
|
"epoch": 0.6743668842815123, |
|
"grad_norm": 0.404296875, |
|
"learning_rate": 3.256331157184877e-06, |
|
"loss": 1.0243, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.6761604132290695, |
|
"grad_norm": 0.236328125, |
|
"learning_rate": 3.238395867709305e-06, |
|
"loss": 0.9841, |
|
"step": 9425 |
|
}, |
|
{ |
|
"epoch": 0.6779539421766267, |
|
"grad_norm": 0.197265625, |
|
"learning_rate": 3.2204605782337327e-06, |
|
"loss": 0.9621, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.679747471124184, |
|
"grad_norm": 0.4296875, |
|
"learning_rate": 3.202525288758161e-06, |
|
"loss": 1.2427, |
|
"step": 9475 |
|
}, |
|
{ |
|
"epoch": 0.6815410000717411, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 3.1845899992825886e-06, |
|
"loss": 0.9871, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.6833345290192984, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 3.1666547098070166e-06, |
|
"loss": 0.9559, |
|
"step": 9525 |
|
}, |
|
{ |
|
"epoch": 0.6851280579668556, |
|
"grad_norm": 4.25, |
|
"learning_rate": 3.148719420331444e-06, |
|
"loss": 1.1722, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.6869215869144129, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 3.130784130855872e-06, |
|
"loss": 1.0006, |
|
"step": 9575 |
|
}, |
|
{ |
|
"epoch": 0.68871511586197, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 3.1128488413802997e-06, |
|
"loss": 0.9999, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.6905086448095272, |
|
"grad_norm": 0.97265625, |
|
"learning_rate": 3.094913551904728e-06, |
|
"loss": 0.9543, |
|
"step": 9625 |
|
}, |
|
{ |
|
"epoch": 0.6923021737570845, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 3.076978262429156e-06, |
|
"loss": 0.9658, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.6940957027046416, |
|
"grad_norm": 0.1748046875, |
|
"learning_rate": 3.0590429729535836e-06, |
|
"loss": 1.1451, |
|
"step": 9675 |
|
}, |
|
{ |
|
"epoch": 0.6958892316521988, |
|
"grad_norm": 0.166015625, |
|
"learning_rate": 3.0411076834780116e-06, |
|
"loss": 0.9441, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.6976827605997561, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 3.023172394002439e-06, |
|
"loss": 1.0021, |
|
"step": 9725 |
|
}, |
|
{ |
|
"epoch": 0.6994762895473133, |
|
"grad_norm": 0.2177734375, |
|
"learning_rate": 3.0052371045268676e-06, |
|
"loss": 1.0179, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.7012698184948705, |
|
"grad_norm": 0.130859375, |
|
"learning_rate": 2.987301815051295e-06, |
|
"loss": 1.0105, |
|
"step": 9775 |
|
}, |
|
{ |
|
"epoch": 0.7030633474424277, |
|
"grad_norm": 0.2392578125, |
|
"learning_rate": 2.969366525575723e-06, |
|
"loss": 0.8946, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.704856876389985, |
|
"grad_norm": 0.79296875, |
|
"learning_rate": 2.9514312361001507e-06, |
|
"loss": 0.9994, |
|
"step": 9825 |
|
}, |
|
{ |
|
"epoch": 0.7066504053375422, |
|
"grad_norm": 0.1611328125, |
|
"learning_rate": 2.9334959466245787e-06, |
|
"loss": 0.9527, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.7084439342850993, |
|
"grad_norm": 0.171875, |
|
"learning_rate": 2.9155606571490062e-06, |
|
"loss": 1.0214, |
|
"step": 9875 |
|
}, |
|
{ |
|
"epoch": 0.7102374632326566, |
|
"grad_norm": 0.63671875, |
|
"learning_rate": 2.8976253676734346e-06, |
|
"loss": 1.1297, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.7120309921802138, |
|
"grad_norm": 0.1552734375, |
|
"learning_rate": 2.8796900781978626e-06, |
|
"loss": 0.9318, |
|
"step": 9925 |
|
}, |
|
{ |
|
"epoch": 0.713824521127771, |
|
"grad_norm": 0.435546875, |
|
"learning_rate": 2.86175478872229e-06, |
|
"loss": 0.9355, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.7156180500753282, |
|
"grad_norm": 0.447265625, |
|
"learning_rate": 2.843819499246718e-06, |
|
"loss": 0.9861, |
|
"step": 9975 |
|
}, |
|
{ |
|
"epoch": 0.7174115790228854, |
|
"grad_norm": 0.1728515625, |
|
"learning_rate": 2.8258842097711457e-06, |
|
"loss": 1.0028, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.7192051079704427, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 2.807948920295574e-06, |
|
"loss": 0.9811, |
|
"step": 10025 |
|
}, |
|
{ |
|
"epoch": 0.7209986369179998, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 2.7900136308200017e-06, |
|
"loss": 1.025, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 0.7227921658655571, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 2.7720783413444296e-06, |
|
"loss": 0.9284, |
|
"step": 10075 |
|
}, |
|
{ |
|
"epoch": 0.7245856948131143, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 2.754143051868857e-06, |
|
"loss": 1.038, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.7263792237606715, |
|
"grad_norm": 0.58984375, |
|
"learning_rate": 2.736207762393285e-06, |
|
"loss": 1.0082, |
|
"step": 10125 |
|
}, |
|
{ |
|
"epoch": 0.7281727527082287, |
|
"grad_norm": 0.1875, |
|
"learning_rate": 2.7182724729177127e-06, |
|
"loss": 1.0237, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 0.7299662816557859, |
|
"grad_norm": 0.42578125, |
|
"learning_rate": 2.700337183442141e-06, |
|
"loss": 0.8815, |
|
"step": 10175 |
|
}, |
|
{ |
|
"epoch": 0.7317598106033432, |
|
"grad_norm": 0.49609375, |
|
"learning_rate": 2.6824018939665687e-06, |
|
"loss": 0.9161, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.7335533395509003, |
|
"grad_norm": 0.1494140625, |
|
"learning_rate": 2.6644666044909967e-06, |
|
"loss": 1.0186, |
|
"step": 10225 |
|
}, |
|
{ |
|
"epoch": 0.7353468684984575, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 2.6465313150154247e-06, |
|
"loss": 0.8941, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.7371403974460148, |
|
"grad_norm": 0.1845703125, |
|
"learning_rate": 2.6285960255398522e-06, |
|
"loss": 0.9991, |
|
"step": 10275 |
|
}, |
|
{ |
|
"epoch": 0.738933926393572, |
|
"grad_norm": 0.1669921875, |
|
"learning_rate": 2.6106607360642806e-06, |
|
"loss": 1.1567, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.7407274553411292, |
|
"grad_norm": 0.6796875, |
|
"learning_rate": 2.592725446588708e-06, |
|
"loss": 0.9901, |
|
"step": 10325 |
|
}, |
|
{ |
|
"epoch": 0.7425209842886864, |
|
"grad_norm": 0.1640625, |
|
"learning_rate": 2.574790157113136e-06, |
|
"loss": 1.0483, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 0.7443145132362436, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 2.5568548676375637e-06, |
|
"loss": 0.9463, |
|
"step": 10375 |
|
}, |
|
{ |
|
"epoch": 0.7461080421838009, |
|
"grad_norm": 0.146484375, |
|
"learning_rate": 2.5389195781619917e-06, |
|
"loss": 0.904, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.747901571131358, |
|
"grad_norm": 0.44921875, |
|
"learning_rate": 2.5209842886864193e-06, |
|
"loss": 0.9677, |
|
"step": 10425 |
|
}, |
|
{ |
|
"epoch": 0.7496951000789153, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 2.5030489992108477e-06, |
|
"loss": 1.1016, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 0.7514886290264725, |
|
"grad_norm": 0.15234375, |
|
"learning_rate": 2.4851137097352752e-06, |
|
"loss": 0.9498, |
|
"step": 10475 |
|
}, |
|
{ |
|
"epoch": 0.7532821579740298, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 2.467178420259703e-06, |
|
"loss": 0.9623, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.7550756869215869, |
|
"grad_norm": 0.251953125, |
|
"learning_rate": 2.449243130784131e-06, |
|
"loss": 0.9152, |
|
"step": 10525 |
|
}, |
|
{ |
|
"epoch": 0.7568692158691441, |
|
"grad_norm": 0.1435546875, |
|
"learning_rate": 2.4313078413085587e-06, |
|
"loss": 0.9425, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 0.7586627448167014, |
|
"grad_norm": 0.458984375, |
|
"learning_rate": 2.4133725518329867e-06, |
|
"loss": 1.0248, |
|
"step": 10575 |
|
}, |
|
{ |
|
"epoch": 0.7604562737642585, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 2.3954372623574147e-06, |
|
"loss": 0.8791, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.7622498027118157, |
|
"grad_norm": 0.177734375, |
|
"learning_rate": 2.3775019728818423e-06, |
|
"loss": 0.9201, |
|
"step": 10625 |
|
}, |
|
{ |
|
"epoch": 0.764043331659373, |
|
"grad_norm": 0.64453125, |
|
"learning_rate": 2.3595666834062702e-06, |
|
"loss": 1.0206, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 0.7658368606069302, |
|
"grad_norm": 0.46484375, |
|
"learning_rate": 2.341631393930698e-06, |
|
"loss": 1.0521, |
|
"step": 10675 |
|
}, |
|
{ |
|
"epoch": 0.7676303895544874, |
|
"grad_norm": 0.1591796875, |
|
"learning_rate": 2.323696104455126e-06, |
|
"loss": 1.0034, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.7694239185020446, |
|
"grad_norm": 0.44921875, |
|
"learning_rate": 2.305760814979554e-06, |
|
"loss": 0.957, |
|
"step": 10725 |
|
}, |
|
{ |
|
"epoch": 0.7712174474496019, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 2.2878255255039817e-06, |
|
"loss": 1.1092, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.7730109763971591, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 2.2698902360284097e-06, |
|
"loss": 0.9977, |
|
"step": 10775 |
|
}, |
|
{ |
|
"epoch": 0.7748045053447162, |
|
"grad_norm": 0.232421875, |
|
"learning_rate": 2.2519549465528377e-06, |
|
"loss": 1.0989, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.7765980342922735, |
|
"grad_norm": 0.11767578125, |
|
"learning_rate": 2.2340196570772652e-06, |
|
"loss": 1.0842, |
|
"step": 10825 |
|
}, |
|
{ |
|
"epoch": 0.7783915632398307, |
|
"grad_norm": 0.4921875, |
|
"learning_rate": 2.2160843676016932e-06, |
|
"loss": 1.0147, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.7801850921873879, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 2.198149078126121e-06, |
|
"loss": 1.0062, |
|
"step": 10875 |
|
}, |
|
{ |
|
"epoch": 0.7819786211349451, |
|
"grad_norm": 0.177734375, |
|
"learning_rate": 2.1802137886505488e-06, |
|
"loss": 1.0242, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.7837721500825023, |
|
"grad_norm": 0.275390625, |
|
"learning_rate": 2.1622784991749767e-06, |
|
"loss": 0.9424, |
|
"step": 10925 |
|
}, |
|
{ |
|
"epoch": 0.7855656790300596, |
|
"grad_norm": 0.2275390625, |
|
"learning_rate": 2.1443432096994047e-06, |
|
"loss": 0.931, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 0.7873592079776167, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 2.1264079202238323e-06, |
|
"loss": 0.9701, |
|
"step": 10975 |
|
}, |
|
{ |
|
"epoch": 0.789152736925174, |
|
"grad_norm": 0.13671875, |
|
"learning_rate": 2.1084726307482607e-06, |
|
"loss": 1.0076, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.7909462658727312, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 2.0905373412726882e-06, |
|
"loss": 0.9799, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 0.7927397948202884, |
|
"grad_norm": 0.6875, |
|
"learning_rate": 2.0726020517971162e-06, |
|
"loss": 1.0588, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 0.7945333237678456, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 2.054666762321544e-06, |
|
"loss": 0.932, |
|
"step": 11075 |
|
}, |
|
{ |
|
"epoch": 0.7963268527154028, |
|
"grad_norm": 0.1513671875, |
|
"learning_rate": 2.0367314728459718e-06, |
|
"loss": 1.053, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.7981203816629601, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 2.0187961833703997e-06, |
|
"loss": 0.9572, |
|
"step": 11125 |
|
}, |
|
{ |
|
"epoch": 0.7999139106105173, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 2.0008608938948277e-06, |
|
"loss": 1.0769, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 0.8017074395580744, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 1.9829256044192553e-06, |
|
"loss": 0.9988, |
|
"step": 11175 |
|
}, |
|
{ |
|
"epoch": 0.8035009685056317, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 1.9649903149436833e-06, |
|
"loss": 1.0431, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.8052944974531889, |
|
"grad_norm": 0.13671875, |
|
"learning_rate": 1.9470550254681112e-06, |
|
"loss": 1.0488, |
|
"step": 11225 |
|
}, |
|
{ |
|
"epoch": 0.8070880264007461, |
|
"grad_norm": 0.287109375, |
|
"learning_rate": 1.929119735992539e-06, |
|
"loss": 0.9088, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.8088815553483033, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 1.9111844465169668e-06, |
|
"loss": 1.0106, |
|
"step": 11275 |
|
}, |
|
{ |
|
"epoch": 0.8106750842958605, |
|
"grad_norm": 0.203125, |
|
"learning_rate": 1.8932491570413948e-06, |
|
"loss": 0.8832, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.8124686132434178, |
|
"grad_norm": 0.421875, |
|
"learning_rate": 1.8753138675658227e-06, |
|
"loss": 0.9656, |
|
"step": 11325 |
|
}, |
|
{ |
|
"epoch": 0.8142621421909749, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 1.8573785780902507e-06, |
|
"loss": 0.9692, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 0.8160556711385322, |
|
"grad_norm": 0.1669921875, |
|
"learning_rate": 1.8394432886146785e-06, |
|
"loss": 0.9788, |
|
"step": 11375 |
|
}, |
|
{ |
|
"epoch": 0.8178492000860894, |
|
"grad_norm": 0.1259765625, |
|
"learning_rate": 1.8215079991391063e-06, |
|
"loss": 1.0019, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.8196427290336467, |
|
"grad_norm": 0.80078125, |
|
"learning_rate": 1.8035727096635342e-06, |
|
"loss": 0.9674, |
|
"step": 11425 |
|
}, |
|
{ |
|
"epoch": 0.8214362579812038, |
|
"grad_norm": 0.1572265625, |
|
"learning_rate": 1.785637420187962e-06, |
|
"loss": 0.9064, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 0.823229786928761, |
|
"grad_norm": 3.34375, |
|
"learning_rate": 1.7677021307123898e-06, |
|
"loss": 1.1879, |
|
"step": 11475 |
|
}, |
|
{ |
|
"epoch": 0.8250233158763183, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 1.7497668412368178e-06, |
|
"loss": 1.148, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.8268168448238754, |
|
"grad_norm": 1.125, |
|
"learning_rate": 1.7318315517612455e-06, |
|
"loss": 0.9733, |
|
"step": 11525 |
|
}, |
|
{ |
|
"epoch": 0.8286103737714327, |
|
"grad_norm": 0.265625, |
|
"learning_rate": 1.7138962622856733e-06, |
|
"loss": 1.0721, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 0.8304039027189899, |
|
"grad_norm": 0.1474609375, |
|
"learning_rate": 1.6959609728101013e-06, |
|
"loss": 1.0644, |
|
"step": 11575 |
|
}, |
|
{ |
|
"epoch": 0.8321974316665471, |
|
"grad_norm": 0.1728515625, |
|
"learning_rate": 1.678025683334529e-06, |
|
"loss": 0.9961, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.8339909606141043, |
|
"grad_norm": 0.2060546875, |
|
"learning_rate": 1.6600903938589572e-06, |
|
"loss": 0.9952, |
|
"step": 11625 |
|
}, |
|
{ |
|
"epoch": 0.8357844895616615, |
|
"grad_norm": 0.146484375, |
|
"learning_rate": 1.642155104383385e-06, |
|
"loss": 0.9518, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 0.8375780185092188, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 1.6242198149078128e-06, |
|
"loss": 0.9067, |
|
"step": 11675 |
|
}, |
|
{ |
|
"epoch": 0.839371547456776, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 1.6062845254322408e-06, |
|
"loss": 0.9853, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.8411650764043331, |
|
"grad_norm": 0.1279296875, |
|
"learning_rate": 1.5883492359566685e-06, |
|
"loss": 0.9909, |
|
"step": 11725 |
|
}, |
|
{ |
|
"epoch": 0.8429586053518904, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 1.5704139464810963e-06, |
|
"loss": 1.0348, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 0.8447521342994476, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 1.5524786570055243e-06, |
|
"loss": 0.9372, |
|
"step": 11775 |
|
}, |
|
{ |
|
"epoch": 0.8465456632470048, |
|
"grad_norm": 0.375, |
|
"learning_rate": 1.534543367529952e-06, |
|
"loss": 1.0784, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.848339192194562, |
|
"grad_norm": 0.1279296875, |
|
"learning_rate": 1.5166080780543798e-06, |
|
"loss": 0.9116, |
|
"step": 11825 |
|
}, |
|
{ |
|
"epoch": 0.8501327211421192, |
|
"grad_norm": 0.1171875, |
|
"learning_rate": 1.4986727885788078e-06, |
|
"loss": 1.0438, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 0.8519262500896765, |
|
"grad_norm": 0.1279296875, |
|
"learning_rate": 1.4807374991032356e-06, |
|
"loss": 0.9669, |
|
"step": 11875 |
|
}, |
|
{ |
|
"epoch": 0.8537197790372336, |
|
"grad_norm": 0.201171875, |
|
"learning_rate": 1.4628022096276633e-06, |
|
"loss": 0.9231, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.8555133079847909, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 1.4448669201520913e-06, |
|
"loss": 1.0184, |
|
"step": 11925 |
|
}, |
|
{ |
|
"epoch": 0.8573068369323481, |
|
"grad_norm": 0.6953125, |
|
"learning_rate": 1.4269316306765193e-06, |
|
"loss": 1.1583, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 0.8591003658799053, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 1.4089963412009473e-06, |
|
"loss": 1.0219, |
|
"step": 11975 |
|
}, |
|
{ |
|
"epoch": 0.8608938948274625, |
|
"grad_norm": 0.16015625, |
|
"learning_rate": 1.391061051725375e-06, |
|
"loss": 0.9889, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.8626874237750197, |
|
"grad_norm": 0.70703125, |
|
"learning_rate": 1.3731257622498028e-06, |
|
"loss": 0.9859, |
|
"step": 12025 |
|
}, |
|
{ |
|
"epoch": 0.864480952722577, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 1.3551904727742308e-06, |
|
"loss": 1.044, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 0.8662744816701342, |
|
"grad_norm": 0.1513671875, |
|
"learning_rate": 1.3372551832986586e-06, |
|
"loss": 1.1042, |
|
"step": 12075 |
|
}, |
|
{ |
|
"epoch": 0.8680680106176913, |
|
"grad_norm": 0.11328125, |
|
"learning_rate": 1.3193198938230863e-06, |
|
"loss": 0.8639, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.8698615395652486, |
|
"grad_norm": 0.2109375, |
|
"learning_rate": 1.3013846043475143e-06, |
|
"loss": 1.0858, |
|
"step": 12125 |
|
}, |
|
{ |
|
"epoch": 0.8716550685128058, |
|
"grad_norm": 0.12109375, |
|
"learning_rate": 1.283449314871942e-06, |
|
"loss": 0.86, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 0.873448597460363, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 1.2655140253963699e-06, |
|
"loss": 1.0895, |
|
"step": 12175 |
|
}, |
|
{ |
|
"epoch": 0.8752421264079202, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 1.2475787359207978e-06, |
|
"loss": 1.2286, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.8770356553554775, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 1.2296434464452258e-06, |
|
"loss": 0.9593, |
|
"step": 12225 |
|
}, |
|
{ |
|
"epoch": 0.8788291843030347, |
|
"grad_norm": 0.1904296875, |
|
"learning_rate": 1.2117081569696536e-06, |
|
"loss": 0.8951, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 0.8806227132505918, |
|
"grad_norm": 0.150390625, |
|
"learning_rate": 1.1937728674940814e-06, |
|
"loss": 1.1512, |
|
"step": 12275 |
|
}, |
|
{ |
|
"epoch": 0.8824162421981491, |
|
"grad_norm": 5.53125, |
|
"learning_rate": 1.1758375780185093e-06, |
|
"loss": 0.9898, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.8842097711457063, |
|
"grad_norm": 0.212890625, |
|
"learning_rate": 1.1579022885429373e-06, |
|
"loss": 1.0355, |
|
"step": 12325 |
|
}, |
|
{ |
|
"epoch": 0.8860033000932636, |
|
"grad_norm": 0.287109375, |
|
"learning_rate": 1.139966999067365e-06, |
|
"loss": 0.9758, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 0.8877968290408207, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 1.1220317095917929e-06, |
|
"loss": 0.9874, |
|
"step": 12375 |
|
}, |
|
{ |
|
"epoch": 0.8895903579883779, |
|
"grad_norm": 0.6640625, |
|
"learning_rate": 1.1040964201162208e-06, |
|
"loss": 1.0189, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.8913838869359352, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 1.0861611306406486e-06, |
|
"loss": 1.0027, |
|
"step": 12425 |
|
}, |
|
{ |
|
"epoch": 0.8931774158834923, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 1.0682258411650764e-06, |
|
"loss": 0.9254, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 0.8949709448310496, |
|
"grad_norm": 0.1767578125, |
|
"learning_rate": 1.0502905516895044e-06, |
|
"loss": 0.9798, |
|
"step": 12475 |
|
}, |
|
{ |
|
"epoch": 0.8967644737786068, |
|
"grad_norm": 0.2216796875, |
|
"learning_rate": 1.0323552622139323e-06, |
|
"loss": 1.0012, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.898558002726164, |
|
"grad_norm": 0.65234375, |
|
"learning_rate": 1.0144199727383601e-06, |
|
"loss": 1.1634, |
|
"step": 12525 |
|
}, |
|
{ |
|
"epoch": 0.9003515316737212, |
|
"grad_norm": 0.236328125, |
|
"learning_rate": 9.964846832627879e-07, |
|
"loss": 1.03, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 0.9021450606212784, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 9.785493937872159e-07, |
|
"loss": 0.9248, |
|
"step": 12575 |
|
}, |
|
{ |
|
"epoch": 0.9039385895688357, |
|
"grad_norm": 0.1669921875, |
|
"learning_rate": 9.606141043116436e-07, |
|
"loss": 0.8934, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.9057321185163929, |
|
"grad_norm": 0.1865234375, |
|
"learning_rate": 9.426788148360716e-07, |
|
"loss": 1.1064, |
|
"step": 12625 |
|
}, |
|
{ |
|
"epoch": 0.90752564746395, |
|
"grad_norm": 0.1591796875, |
|
"learning_rate": 9.247435253604995e-07, |
|
"loss": 0.9369, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 0.9093191764115073, |
|
"grad_norm": 0.2412109375, |
|
"learning_rate": 9.068082358849273e-07, |
|
"loss": 1.0222, |
|
"step": 12675 |
|
}, |
|
{ |
|
"epoch": 0.9111127053590645, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 8.888729464093551e-07, |
|
"loss": 0.9838, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.9129062343066217, |
|
"grad_norm": 0.19921875, |
|
"learning_rate": 8.70937656933783e-07, |
|
"loss": 0.8956, |
|
"step": 12725 |
|
}, |
|
{ |
|
"epoch": 0.9146997632541789, |
|
"grad_norm": 0.17578125, |
|
"learning_rate": 8.530023674582108e-07, |
|
"loss": 0.9418, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 0.9164932922017361, |
|
"grad_norm": 0.5234375, |
|
"learning_rate": 8.350670779826386e-07, |
|
"loss": 1.0145, |
|
"step": 12775 |
|
}, |
|
{ |
|
"epoch": 0.9182868211492934, |
|
"grad_norm": 0.2138671875, |
|
"learning_rate": 8.171317885070666e-07, |
|
"loss": 0.9934, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.9200803500968505, |
|
"grad_norm": 0.375, |
|
"learning_rate": 7.991964990314945e-07, |
|
"loss": 1.1503, |
|
"step": 12825 |
|
}, |
|
{ |
|
"epoch": 0.9218738790444078, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 7.812612095559223e-07, |
|
"loss": 1.2094, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 0.923667407991965, |
|
"grad_norm": 0.1552734375, |
|
"learning_rate": 7.633259200803501e-07, |
|
"loss": 1.0571, |
|
"step": 12875 |
|
}, |
|
{ |
|
"epoch": 0.9254609369395222, |
|
"grad_norm": 0.2734375, |
|
"learning_rate": 7.45390630604778e-07, |
|
"loss": 1.0455, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.9272544658870794, |
|
"grad_norm": 0.431640625, |
|
"learning_rate": 7.274553411292058e-07, |
|
"loss": 1.0746, |
|
"step": 12925 |
|
}, |
|
{ |
|
"epoch": 0.9290479948346366, |
|
"grad_norm": 0.12890625, |
|
"learning_rate": 7.095200516536338e-07, |
|
"loss": 1.0629, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 0.9308415237821939, |
|
"grad_norm": 0.177734375, |
|
"learning_rate": 6.915847621780616e-07, |
|
"loss": 0.9921, |
|
"step": 12975 |
|
}, |
|
{ |
|
"epoch": 0.9326350527297511, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 6.736494727024895e-07, |
|
"loss": 0.9427, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.9344285816773082, |
|
"grad_norm": 0.1650390625, |
|
"learning_rate": 6.557141832269173e-07, |
|
"loss": 0.9967, |
|
"step": 13025 |
|
}, |
|
{ |
|
"epoch": 0.9362221106248655, |
|
"grad_norm": 0.19140625, |
|
"learning_rate": 6.377788937513452e-07, |
|
"loss": 1.1217, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 0.9380156395724227, |
|
"grad_norm": 0.1318359375, |
|
"learning_rate": 6.19843604275773e-07, |
|
"loss": 1.0301, |
|
"step": 13075 |
|
}, |
|
{ |
|
"epoch": 0.9398091685199799, |
|
"grad_norm": 1.6171875, |
|
"learning_rate": 6.019083148002009e-07, |
|
"loss": 0.9226, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.9416026974675371, |
|
"grad_norm": 0.953125, |
|
"learning_rate": 5.839730253246288e-07, |
|
"loss": 1.1147, |
|
"step": 13125 |
|
}, |
|
{ |
|
"epoch": 0.9433962264150944, |
|
"grad_norm": 0.2294921875, |
|
"learning_rate": 5.660377358490567e-07, |
|
"loss": 1.1343, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 0.9451897553626516, |
|
"grad_norm": 0.1923828125, |
|
"learning_rate": 5.481024463734845e-07, |
|
"loss": 1.1753, |
|
"step": 13175 |
|
}, |
|
{ |
|
"epoch": 0.9469832843102087, |
|
"grad_norm": 0.1767578125, |
|
"learning_rate": 5.301671568979123e-07, |
|
"loss": 1.0303, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.948776813257766, |
|
"grad_norm": 1.6484375, |
|
"learning_rate": 5.122318674223403e-07, |
|
"loss": 1.0228, |
|
"step": 13225 |
|
}, |
|
{ |
|
"epoch": 0.9505703422053232, |
|
"grad_norm": 0.169921875, |
|
"learning_rate": 4.942965779467681e-07, |
|
"loss": 0.9644, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 0.9523638711528805, |
|
"grad_norm": 0.177734375, |
|
"learning_rate": 4.7636128847119593e-07, |
|
"loss": 0.9838, |
|
"step": 13275 |
|
}, |
|
{ |
|
"epoch": 0.9541574001004376, |
|
"grad_norm": 0.162109375, |
|
"learning_rate": 4.5842599899562386e-07, |
|
"loss": 0.8988, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.9559509290479948, |
|
"grad_norm": 0.1357421875, |
|
"learning_rate": 4.404907095200517e-07, |
|
"loss": 1.2, |
|
"step": 13325 |
|
}, |
|
{ |
|
"epoch": 0.9577444579955521, |
|
"grad_norm": 0.2470703125, |
|
"learning_rate": 4.225554200444795e-07, |
|
"loss": 0.9431, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 0.9595379869431092, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 4.0462013056890743e-07, |
|
"loss": 0.9729, |
|
"step": 13375 |
|
}, |
|
{ |
|
"epoch": 0.9613315158906665, |
|
"grad_norm": 0.212890625, |
|
"learning_rate": 3.8668484109333525e-07, |
|
"loss": 1.1659, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.9631250448382237, |
|
"grad_norm": 0.546875, |
|
"learning_rate": 3.6874955161776313e-07, |
|
"loss": 0.9316, |
|
"step": 13425 |
|
}, |
|
{ |
|
"epoch": 0.9649185737857809, |
|
"grad_norm": 0.1728515625, |
|
"learning_rate": 3.50814262142191e-07, |
|
"loss": 0.9412, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 0.9667121027333381, |
|
"grad_norm": 5.65625, |
|
"learning_rate": 3.328789726666189e-07, |
|
"loss": 0.9748, |
|
"step": 13475 |
|
}, |
|
{ |
|
"epoch": 0.9685056316808953, |
|
"grad_norm": 0.1796875, |
|
"learning_rate": 3.149436831910467e-07, |
|
"loss": 0.9574, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.9702991606284526, |
|
"grad_norm": 0.2490234375, |
|
"learning_rate": 2.970083937154746e-07, |
|
"loss": 0.9165, |
|
"step": 13525 |
|
}, |
|
{ |
|
"epoch": 0.9720926895760098, |
|
"grad_norm": 0.4375, |
|
"learning_rate": 2.7907310423990245e-07, |
|
"loss": 0.9801, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 0.9738862185235669, |
|
"grad_norm": 0.24609375, |
|
"learning_rate": 2.611378147643303e-07, |
|
"loss": 0.9946, |
|
"step": 13575 |
|
}, |
|
{ |
|
"epoch": 0.9756797474711242, |
|
"grad_norm": 1.03125, |
|
"learning_rate": 2.4320252528875815e-07, |
|
"loss": 0.9575, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.9774732764186814, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 2.2526723581318605e-07, |
|
"loss": 0.9646, |
|
"step": 13625 |
|
}, |
|
{ |
|
"epoch": 0.9792668053662386, |
|
"grad_norm": 0.1708984375, |
|
"learning_rate": 2.0733194633761392e-07, |
|
"loss": 1.0246, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 0.9810603343137958, |
|
"grad_norm": 0.453125, |
|
"learning_rate": 1.8939665686204177e-07, |
|
"loss": 0.9829, |
|
"step": 13675 |
|
}, |
|
{ |
|
"epoch": 0.982853863261353, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 1.7146136738646965e-07, |
|
"loss": 1.0098, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.9846473922089103, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 1.535260779108975e-07, |
|
"loss": 0.9828, |
|
"step": 13725 |
|
}, |
|
{ |
|
"epoch": 0.9864409211564674, |
|
"grad_norm": 0.1611328125, |
|
"learning_rate": 1.3559078843532534e-07, |
|
"loss": 0.9689, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 0.9882344501040247, |
|
"grad_norm": 0.1904296875, |
|
"learning_rate": 1.1765549895975323e-07, |
|
"loss": 1.0347, |
|
"step": 13775 |
|
}, |
|
{ |
|
"epoch": 0.9900279790515819, |
|
"grad_norm": 0.1669921875, |
|
"learning_rate": 9.972020948418109e-08, |
|
"loss": 1.0367, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.9918215079991392, |
|
"grad_norm": 0.28515625, |
|
"learning_rate": 8.178492000860894e-08, |
|
"loss": 0.904, |
|
"step": 13825 |
|
}, |
|
{ |
|
"epoch": 0.9936150369466963, |
|
"grad_norm": 0.16796875, |
|
"learning_rate": 6.384963053303682e-08, |
|
"loss": 0.9934, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 0.9954085658942535, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 4.591434105746467e-08, |
|
"loss": 1.009, |
|
"step": 13875 |
|
}, |
|
{ |
|
"epoch": 0.9972020948418108, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 2.7979051581892535e-08, |
|
"loss": 0.9746, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.998995623789368, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 1.0043762106320397e-08, |
|
"loss": 1.0373, |
|
"step": 13925 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 13939, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.128724037953004e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|