{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.031583079199251185, "eval_steps": 393, "global_step": 1569, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.0129432249363408e-05, "grad_norm": 1.2282008356123697e-05, "learning_rate": 2e-05, "loss": 46.0, "step": 1 }, { "epoch": 2.0129432249363408e-05, "eval_loss": 11.5, "eval_runtime": 130.2655, "eval_samples_per_second": 160.58, "eval_steps_per_second": 80.29, "step": 1 }, { "epoch": 4.0258864498726816e-05, "grad_norm": 2.1924333850620314e-05, "learning_rate": 4e-05, "loss": 46.0, "step": 2 }, { "epoch": 6.038829674809022e-05, "grad_norm": 1.5113248082343489e-05, "learning_rate": 6e-05, "loss": 46.0, "step": 3 }, { "epoch": 8.051772899745363e-05, "grad_norm": 1.604706449143123e-05, "learning_rate": 8e-05, "loss": 46.0, "step": 4 }, { "epoch": 0.00010064716124681703, "grad_norm": 2.237814805994276e-05, "learning_rate": 0.0001, "loss": 46.0, "step": 5 }, { "epoch": 0.00012077659349618043, "grad_norm": 2.265493640152272e-05, "learning_rate": 0.00012, "loss": 46.0, "step": 6 }, { "epoch": 0.00014090602574554385, "grad_norm": 1.7647287677391432e-05, "learning_rate": 0.00014, "loss": 46.0, "step": 7 }, { "epoch": 0.00016103545799490726, "grad_norm": 2.9863820600439794e-05, "learning_rate": 0.00016, "loss": 46.0, "step": 8 }, { "epoch": 0.00018116489024427065, "grad_norm": 1.1106193596788216e-05, "learning_rate": 0.00018, "loss": 46.0, "step": 9 }, { "epoch": 0.00020129432249363407, "grad_norm": 1.795052594388835e-05, "learning_rate": 0.0002, "loss": 46.0, "step": 10 }, { "epoch": 0.00022142375474299748, "grad_norm": 1.9582959794206545e-05, "learning_rate": 0.0001999997969619787, "loss": 46.0, "step": 11 }, { "epoch": 0.00024155318699236087, "grad_norm": 3.401090725674294e-05, "learning_rate": 0.00019999918784873927, "loss": 46.0, "step": 12 }, { "epoch": 0.0002616826192417243, "grad_norm": 1.5809200704097748e-05, "learning_rate": 0.00019999817266275517, "loss": 46.0, "step": 13 }, { "epoch": 0.0002818120514910877, "grad_norm": 1.1195018487342168e-05, "learning_rate": 0.00019999675140814887, "loss": 46.0, "step": 14 }, { "epoch": 0.0003019414837404511, "grad_norm": 4.638621976482682e-05, "learning_rate": 0.00019999492409069166, "loss": 46.0, "step": 15 }, { "epoch": 0.00032207091598981453, "grad_norm": 2.1349093003664166e-05, "learning_rate": 0.00019999269071780394, "loss": 46.0, "step": 16 }, { "epoch": 0.0003422003482391779, "grad_norm": 1.8559087038738653e-05, "learning_rate": 0.0001999900512985548, "loss": 46.0, "step": 17 }, { "epoch": 0.0003623297804885413, "grad_norm": 9.40681820793543e-06, "learning_rate": 0.00019998700584366238, "loss": 46.0, "step": 18 }, { "epoch": 0.00038245921273790474, "grad_norm": 1.6563231838517822e-05, "learning_rate": 0.0001999835543654935, "loss": 46.0, "step": 19 }, { "epoch": 0.00040258864498726813, "grad_norm": 1.7772108549252152e-05, "learning_rate": 0.0001999796968780638, "loss": 46.0, "step": 20 }, { "epoch": 0.0004227180772366315, "grad_norm": 3.5355504223844036e-05, "learning_rate": 0.00019997543339703757, "loss": 46.0, "step": 21 }, { "epoch": 0.00044284750948599496, "grad_norm": 2.7386544388718903e-05, "learning_rate": 0.00019997076393972783, "loss": 46.0, "step": 22 }, { "epoch": 0.00046297694173535835, "grad_norm": 2.447705810482148e-05, "learning_rate": 0.0001999656885250961, "loss": 46.0, "step": 23 }, { "epoch": 0.00048310637398472174, "grad_norm": 2.7531290470506065e-05, "learning_rate": 0.00019996020717375247, "loss": 46.0, "step": 24 }, { "epoch": 0.0005032358062340851, "grad_norm": 1.6084130038507283e-05, "learning_rate": 0.00019995431990795531, "loss": 46.0, "step": 25 }, { "epoch": 0.0005233652384834486, "grad_norm": 1.0257011126668658e-05, "learning_rate": 0.00019994802675161148, "loss": 46.0, "step": 26 }, { "epoch": 0.000543494670732812, "grad_norm": 1.4123150322120637e-05, "learning_rate": 0.00019994132773027597, "loss": 46.0, "step": 27 }, { "epoch": 0.0005636241029821754, "grad_norm": 2.4509085051249713e-05, "learning_rate": 0.00019993422287115185, "loss": 46.0, "step": 28 }, { "epoch": 0.0005837535352315388, "grad_norm": 1.926114782691002e-05, "learning_rate": 0.0001999267122030903, "loss": 46.0, "step": 29 }, { "epoch": 0.0006038829674809022, "grad_norm": 2.307241447851993e-05, "learning_rate": 0.0001999187957565903, "loss": 46.0, "step": 30 }, { "epoch": 0.0006240123997302656, "grad_norm": 1.905815770442132e-05, "learning_rate": 0.00019991047356379866, "loss": 46.0, "step": 31 }, { "epoch": 0.0006441418319796291, "grad_norm": 3.1321374990511686e-05, "learning_rate": 0.00019990174565850984, "loss": 46.0, "step": 32 }, { "epoch": 0.0006642712642289924, "grad_norm": 1.4927420124877244e-05, "learning_rate": 0.00019989261207616573, "loss": 46.0, "step": 33 }, { "epoch": 0.0006844006964783558, "grad_norm": 6.0364993260009214e-05, "learning_rate": 0.00019988307285385566, "loss": 46.0, "step": 34 }, { "epoch": 0.0007045301287277192, "grad_norm": 3.057021604035981e-05, "learning_rate": 0.00019987312803031607, "loss": 46.0, "step": 35 }, { "epoch": 0.0007246595609770826, "grad_norm": 2.576452425273601e-05, "learning_rate": 0.00019986277764593057, "loss": 46.0, "step": 36 }, { "epoch": 0.0007447889932264461, "grad_norm": 1.8830834960681386e-05, "learning_rate": 0.00019985202174272956, "loss": 46.0, "step": 37 }, { "epoch": 0.0007649184254758095, "grad_norm": 2.4923951059463434e-05, "learning_rate": 0.0001998408603643902, "loss": 46.0, "step": 38 }, { "epoch": 0.0007850478577251729, "grad_norm": 2.467411286488641e-05, "learning_rate": 0.00019982929355623615, "loss": 46.0, "step": 39 }, { "epoch": 0.0008051772899745363, "grad_norm": 4.072063529747538e-05, "learning_rate": 0.00019981732136523746, "loss": 46.0, "step": 40 }, { "epoch": 0.0008253067222238997, "grad_norm": 2.181486888730433e-05, "learning_rate": 0.0001998049438400103, "loss": 46.0, "step": 41 }, { "epoch": 0.000845436154473263, "grad_norm": 1.713978599582333e-05, "learning_rate": 0.0001997921610308169, "loss": 46.0, "step": 42 }, { "epoch": 0.0008655655867226265, "grad_norm": 1.962662281584926e-05, "learning_rate": 0.00019977897298956515, "loss": 46.0, "step": 43 }, { "epoch": 0.0008856950189719899, "grad_norm": 2.0111776393605396e-05, "learning_rate": 0.0001997653797698085, "loss": 46.0, "step": 44 }, { "epoch": 0.0009058244512213533, "grad_norm": 3.424846727284603e-05, "learning_rate": 0.0001997513814267458, "loss": 46.0, "step": 45 }, { "epoch": 0.0009259538834707167, "grad_norm": 2.3754591893521138e-05, "learning_rate": 0.00019973697801722095, "loss": 46.0, "step": 46 }, { "epoch": 0.0009460833157200801, "grad_norm": 3.0731138394912705e-05, "learning_rate": 0.00019972216959972274, "loss": 46.0, "step": 47 }, { "epoch": 0.0009662127479694435, "grad_norm": 2.0918800146318972e-05, "learning_rate": 0.0001997069562343846, "loss": 46.0, "step": 48 }, { "epoch": 0.000986342180218807, "grad_norm": 3.504705455270596e-05, "learning_rate": 0.0001996913379829844, "loss": 46.0, "step": 49 }, { "epoch": 0.0010064716124681702, "grad_norm": 2.918963582487777e-05, "learning_rate": 0.0001996753149089441, "loss": 46.0, "step": 50 }, { "epoch": 0.0010266010447175337, "grad_norm": 3.13876080326736e-05, "learning_rate": 0.00019965888707732953, "loss": 46.0, "step": 51 }, { "epoch": 0.0010467304769668972, "grad_norm": 1.2979964594705962e-05, "learning_rate": 0.0001996420545548502, "loss": 46.0, "step": 52 }, { "epoch": 0.0010668599092162605, "grad_norm": 2.1166308215470053e-05, "learning_rate": 0.00019962481740985895, "loss": 46.0, "step": 53 }, { "epoch": 0.001086989341465624, "grad_norm": 2.2731392164132558e-05, "learning_rate": 0.00019960717571235173, "loss": 46.0, "step": 54 }, { "epoch": 0.0011071187737149873, "grad_norm": 2.4999253582791425e-05, "learning_rate": 0.00019958912953396723, "loss": 46.0, "step": 55 }, { "epoch": 0.0011272482059643508, "grad_norm": 6.292035686783493e-05, "learning_rate": 0.00019957067894798663, "loss": 46.0, "step": 56 }, { "epoch": 0.0011473776382137143, "grad_norm": 1.5206553143798374e-05, "learning_rate": 0.00019955182402933334, "loss": 46.0, "step": 57 }, { "epoch": 0.0011675070704630776, "grad_norm": 3.7179495848249644e-05, "learning_rate": 0.0001995325648545727, "loss": 46.0, "step": 58 }, { "epoch": 0.001187636502712441, "grad_norm": 2.6544912543613464e-05, "learning_rate": 0.00019951290150191158, "loss": 46.0, "step": 59 }, { "epoch": 0.0012077659349618043, "grad_norm": 1.6469433830934577e-05, "learning_rate": 0.00019949283405119815, "loss": 46.0, "step": 60 }, { "epoch": 0.0012278953672111678, "grad_norm": 2.489179496478755e-05, "learning_rate": 0.00019947236258392154, "loss": 46.0, "step": 61 }, { "epoch": 0.0012480247994605311, "grad_norm": 1.7665654013399035e-05, "learning_rate": 0.00019945148718321143, "loss": 46.0, "step": 62 }, { "epoch": 0.0012681542317098946, "grad_norm": 2.1060941435280256e-05, "learning_rate": 0.00019943020793383785, "loss": 46.0, "step": 63 }, { "epoch": 0.0012882836639592581, "grad_norm": 1.269436688744463e-05, "learning_rate": 0.00019940852492221075, "loss": 46.0, "step": 64 }, { "epoch": 0.0013084130962086214, "grad_norm": 2.0220479200361297e-05, "learning_rate": 0.0001993864382363796, "loss": 46.0, "step": 65 }, { "epoch": 0.0013285425284579849, "grad_norm": 2.53070866165217e-05, "learning_rate": 0.00019936394796603318, "loss": 46.0, "step": 66 }, { "epoch": 0.0013486719607073482, "grad_norm": 3.738961459021084e-05, "learning_rate": 0.00019934105420249908, "loss": 46.0, "step": 67 }, { "epoch": 0.0013688013929567117, "grad_norm": 2.57118354056729e-05, "learning_rate": 0.0001993177570387434, "loss": 46.0, "step": 68 }, { "epoch": 0.0013889308252060752, "grad_norm": 1.631179293326568e-05, "learning_rate": 0.00019929405656937032, "loss": 46.0, "step": 69 }, { "epoch": 0.0014090602574554384, "grad_norm": 2.4105151169351302e-05, "learning_rate": 0.00019926995289062176, "loss": 46.0, "step": 70 }, { "epoch": 0.001429189689704802, "grad_norm": 6.657966878265142e-05, "learning_rate": 0.00019924544610037698, "loss": 46.0, "step": 71 }, { "epoch": 0.0014493191219541652, "grad_norm": 2.2929831175133586e-05, "learning_rate": 0.00019922053629815224, "loss": 46.0, "step": 72 }, { "epoch": 0.0014694485542035287, "grad_norm": 2.9039501896477304e-05, "learning_rate": 0.00019919522358510024, "loss": 46.0, "step": 73 }, { "epoch": 0.0014895779864528922, "grad_norm": 5.170597432879731e-05, "learning_rate": 0.00019916950806400983, "loss": 46.0, "step": 74 }, { "epoch": 0.0015097074187022555, "grad_norm": 2.5796563932090066e-05, "learning_rate": 0.00019914338983930557, "loss": 46.0, "step": 75 }, { "epoch": 0.001529836850951619, "grad_norm": 2.303555083926767e-05, "learning_rate": 0.0001991168690170474, "loss": 46.0, "step": 76 }, { "epoch": 0.0015499662832009823, "grad_norm": 3.512139664962888e-05, "learning_rate": 0.00019908994570492993, "loss": 46.0, "step": 77 }, { "epoch": 0.0015700957154503458, "grad_norm": 3.3836728107417e-05, "learning_rate": 0.00019906262001228228, "loss": 46.0, "step": 78 }, { "epoch": 0.001590225147699709, "grad_norm": 2.430832500976976e-05, "learning_rate": 0.00019903489205006764, "loss": 46.0, "step": 79 }, { "epoch": 0.0016103545799490725, "grad_norm": 3.29008289554622e-05, "learning_rate": 0.0001990067619308825, "loss": 46.0, "step": 80 }, { "epoch": 0.001630484012198436, "grad_norm": 2.6406103643239476e-05, "learning_rate": 0.00019897822976895665, "loss": 46.0, "step": 81 }, { "epoch": 0.0016506134444477993, "grad_norm": 1.8888900740421377e-05, "learning_rate": 0.00019894929568015226, "loss": 46.0, "step": 82 }, { "epoch": 0.0016707428766971628, "grad_norm": 3.3854525099741295e-05, "learning_rate": 0.0001989199597819638, "loss": 46.0, "step": 83 }, { "epoch": 0.001690872308946526, "grad_norm": 2.4574126655352302e-05, "learning_rate": 0.0001988902221935173, "loss": 46.0, "step": 84 }, { "epoch": 0.0017110017411958896, "grad_norm": 1.5161581359279808e-05, "learning_rate": 0.00019886008303557, "loss": 46.0, "step": 85 }, { "epoch": 0.001731131173445253, "grad_norm": 2.6385316232335754e-05, "learning_rate": 0.00019882954243050972, "loss": 46.0, "step": 86 }, { "epoch": 0.0017512606056946164, "grad_norm": 1.925287324411329e-05, "learning_rate": 0.00019879860050235469, "loss": 46.0, "step": 87 }, { "epoch": 0.0017713900379439798, "grad_norm": 1.4624111827288289e-05, "learning_rate": 0.00019876725737675254, "loss": 46.0, "step": 88 }, { "epoch": 0.0017915194701933431, "grad_norm": 2.154901812900789e-05, "learning_rate": 0.00019873551318098026, "loss": 46.0, "step": 89 }, { "epoch": 0.0018116489024427066, "grad_norm": 1.7668218788458034e-05, "learning_rate": 0.00019870336804394338, "loss": 46.0, "step": 90 }, { "epoch": 0.0018317783346920701, "grad_norm": 2.4439837943646125e-05, "learning_rate": 0.00019867082209617563, "loss": 46.0, "step": 91 }, { "epoch": 0.0018519077669414334, "grad_norm": 2.07311622943962e-05, "learning_rate": 0.00019863787546983832, "loss": 46.0, "step": 92 }, { "epoch": 0.001872037199190797, "grad_norm": 2.8900734832859598e-05, "learning_rate": 0.00019860452829871975, "loss": 46.0, "step": 93 }, { "epoch": 0.0018921666314401602, "grad_norm": 2.661554935912136e-05, "learning_rate": 0.00019857078071823484, "loss": 46.0, "step": 94 }, { "epoch": 0.0019122960636895237, "grad_norm": 3.1965726520866156e-05, "learning_rate": 0.00019853663286542442, "loss": 46.0, "step": 95 }, { "epoch": 0.001932425495938887, "grad_norm": 2.5141018340946175e-05, "learning_rate": 0.00019850208487895475, "loss": 46.0, "step": 96 }, { "epoch": 0.0019525549281882504, "grad_norm": 2.246517760795541e-05, "learning_rate": 0.0001984671368991169, "loss": 46.0, "step": 97 }, { "epoch": 0.001972684360437614, "grad_norm": 1.959139626706019e-05, "learning_rate": 0.00019843178906782624, "loss": 46.0, "step": 98 }, { "epoch": 0.0019928137926869772, "grad_norm": 1.0403034139017109e-05, "learning_rate": 0.0001983960415286219, "loss": 46.0, "step": 99 }, { "epoch": 0.0020129432249363405, "grad_norm": 2.894277531595435e-05, "learning_rate": 0.000198359894426666, "loss": 46.0, "step": 100 }, { "epoch": 0.002033072657185704, "grad_norm": 2.3059441446093842e-05, "learning_rate": 0.00019832334790874332, "loss": 46.0, "step": 101 }, { "epoch": 0.0020532020894350675, "grad_norm": 3.624501550802961e-05, "learning_rate": 0.00019828640212326046, "loss": 46.0, "step": 102 }, { "epoch": 0.0020733315216844308, "grad_norm": 2.7872463761013933e-05, "learning_rate": 0.00019824905722024542, "loss": 46.0, "step": 103 }, { "epoch": 0.0020934609539337945, "grad_norm": 4.196954978397116e-05, "learning_rate": 0.00019821131335134696, "loss": 46.0, "step": 104 }, { "epoch": 0.0021135903861831578, "grad_norm": 4.869971962762065e-05, "learning_rate": 0.00019817317066983382, "loss": 46.0, "step": 105 }, { "epoch": 0.002133719818432521, "grad_norm": 2.689683788048569e-05, "learning_rate": 0.00019813462933059435, "loss": 46.0, "step": 106 }, { "epoch": 0.0021538492506818843, "grad_norm": 2.0828028937103227e-05, "learning_rate": 0.0001980956894901356, "loss": 46.0, "step": 107 }, { "epoch": 0.002173978682931248, "grad_norm": 1.8326103599974886e-05, "learning_rate": 0.00019805635130658306, "loss": 46.0, "step": 108 }, { "epoch": 0.0021941081151806113, "grad_norm": 2.5773550078156404e-05, "learning_rate": 0.00019801661493967955, "loss": 46.0, "step": 109 }, { "epoch": 0.0022142375474299746, "grad_norm": 1.7421609300072305e-05, "learning_rate": 0.00019797648055078503, "loss": 46.0, "step": 110 }, { "epoch": 0.0022343669796793383, "grad_norm": 2.473703534633387e-05, "learning_rate": 0.0001979359483028756, "loss": 46.0, "step": 111 }, { "epoch": 0.0022544964119287016, "grad_norm": 4.040896601509303e-05, "learning_rate": 0.00019789501836054297, "loss": 46.0, "step": 112 }, { "epoch": 0.002274625844178065, "grad_norm": 3.786892557400279e-05, "learning_rate": 0.00019785369088999387, "loss": 46.0, "step": 113 }, { "epoch": 0.0022947552764274286, "grad_norm": 2.3734346541459672e-05, "learning_rate": 0.0001978119660590493, "loss": 46.0, "step": 114 }, { "epoch": 0.002314884708676792, "grad_norm": 4.281644578441046e-05, "learning_rate": 0.0001977698440371437, "loss": 46.0, "step": 115 }, { "epoch": 0.002335014140926155, "grad_norm": 6.363167631207034e-05, "learning_rate": 0.0001977273249953246, "loss": 46.0, "step": 116 }, { "epoch": 0.0023551435731755184, "grad_norm": 2.2506428649649024e-05, "learning_rate": 0.00019768440910625162, "loss": 46.0, "step": 117 }, { "epoch": 0.002375273005424882, "grad_norm": 2.2376898414222524e-05, "learning_rate": 0.00019764109654419584, "loss": 46.0, "step": 118 }, { "epoch": 0.0023954024376742454, "grad_norm": 4.303127570892684e-05, "learning_rate": 0.0001975973874850393, "loss": 46.0, "step": 119 }, { "epoch": 0.0024155318699236087, "grad_norm": 2.1347035726648755e-05, "learning_rate": 0.00019755328210627394, "loss": 46.0, "step": 120 }, { "epoch": 0.0024356613021729724, "grad_norm": 3.3754025935195386e-05, "learning_rate": 0.00019750878058700117, "loss": 46.0, "step": 121 }, { "epoch": 0.0024557907344223357, "grad_norm": 1.353104380541481e-05, "learning_rate": 0.000197463883107931, "loss": 46.0, "step": 122 }, { "epoch": 0.002475920166671699, "grad_norm": 4.797324072569609e-05, "learning_rate": 0.00019741858985138132, "loss": 46.0, "step": 123 }, { "epoch": 0.0024960495989210622, "grad_norm": 2.413903348497115e-05, "learning_rate": 0.00019737290100127722, "loss": 46.0, "step": 124 }, { "epoch": 0.002516179031170426, "grad_norm": 1.5329667803598568e-05, "learning_rate": 0.00019732681674315014, "loss": 46.0, "step": 125 }, { "epoch": 0.0025363084634197892, "grad_norm": 1.7863807443063706e-05, "learning_rate": 0.00019728033726413723, "loss": 46.0, "step": 126 }, { "epoch": 0.0025564378956691525, "grad_norm": 3.0265011446317658e-05, "learning_rate": 0.00019723346275298052, "loss": 46.0, "step": 127 }, { "epoch": 0.0025765673279185162, "grad_norm": 2.1787187506561168e-05, "learning_rate": 0.00019718619340002618, "loss": 46.0, "step": 128 }, { "epoch": 0.0025966967601678795, "grad_norm": 2.615424818941392e-05, "learning_rate": 0.0001971385293972237, "loss": 46.0, "step": 129 }, { "epoch": 0.0026168261924172428, "grad_norm": 2.6950148821924813e-05, "learning_rate": 0.0001970904709381252, "loss": 46.0, "step": 130 }, { "epoch": 0.0026369556246666065, "grad_norm": 2.9200287826824933e-05, "learning_rate": 0.00019704201821788456, "loss": 46.0, "step": 131 }, { "epoch": 0.0026570850569159698, "grad_norm": 4.081759470864199e-05, "learning_rate": 0.00019699317143325666, "loss": 46.0, "step": 132 }, { "epoch": 0.002677214489165333, "grad_norm": 3.662336166598834e-05, "learning_rate": 0.0001969439307825966, "loss": 46.0, "step": 133 }, { "epoch": 0.0026973439214146963, "grad_norm": 3.1114377634366974e-05, "learning_rate": 0.0001968942964658589, "loss": 46.0, "step": 134 }, { "epoch": 0.00271747335366406, "grad_norm": 2.8435193598852493e-05, "learning_rate": 0.00019684426868459655, "loss": 46.0, "step": 135 }, { "epoch": 0.0027376027859134233, "grad_norm": 2.6921919925371185e-05, "learning_rate": 0.00019679384764196046, "loss": 46.0, "step": 136 }, { "epoch": 0.0027577322181627866, "grad_norm": 2.3536973458249122e-05, "learning_rate": 0.00019674303354269833, "loss": 46.0, "step": 137 }, { "epoch": 0.0027778616504121503, "grad_norm": 1.310581592406379e-05, "learning_rate": 0.00019669182659315412, "loss": 46.0, "step": 138 }, { "epoch": 0.0027979910826615136, "grad_norm": 3.121258123428561e-05, "learning_rate": 0.00019664022700126695, "loss": 46.0, "step": 139 }, { "epoch": 0.002818120514910877, "grad_norm": 3.1170515285339206e-05, "learning_rate": 0.00019658823497657038, "loss": 46.0, "step": 140 }, { "epoch": 0.00283824994716024, "grad_norm": 2.6195963073405437e-05, "learning_rate": 0.0001965358507301916, "loss": 46.0, "step": 141 }, { "epoch": 0.002858379379409604, "grad_norm": 2.5877065127133392e-05, "learning_rate": 0.00019648307447485048, "loss": 46.0, "step": 142 }, { "epoch": 0.002878508811658967, "grad_norm": 2.742213837336749e-05, "learning_rate": 0.00019642990642485875, "loss": 46.0, "step": 143 }, { "epoch": 0.0028986382439083304, "grad_norm": 3.452887904131785e-05, "learning_rate": 0.0001963763467961191, "loss": 46.0, "step": 144 }, { "epoch": 0.002918767676157694, "grad_norm": 3.4890996175818145e-05, "learning_rate": 0.00019632239580612436, "loss": 46.0, "step": 145 }, { "epoch": 0.0029388971084070574, "grad_norm": 2.4831771952449344e-05, "learning_rate": 0.0001962680536739566, "loss": 46.0, "step": 146 }, { "epoch": 0.0029590265406564207, "grad_norm": 2.926559500338044e-05, "learning_rate": 0.00019621332062028617, "loss": 46.0, "step": 147 }, { "epoch": 0.0029791559729057844, "grad_norm": 3.801756975008175e-05, "learning_rate": 0.00019615819686737092, "loss": 46.0, "step": 148 }, { "epoch": 0.0029992854051551477, "grad_norm": 4.2626343201845884e-05, "learning_rate": 0.00019610268263905515, "loss": 46.0, "step": 149 }, { "epoch": 0.003019414837404511, "grad_norm": 5.3078922064742073e-05, "learning_rate": 0.00019604677816076888, "loss": 46.0, "step": 150 }, { "epoch": 0.0030395442696538742, "grad_norm": 3.865420876536518e-05, "learning_rate": 0.00019599048365952682, "loss": 46.0, "step": 151 }, { "epoch": 0.003059673701903238, "grad_norm": 2.114846029144246e-05, "learning_rate": 0.00019593379936392742, "loss": 46.0, "step": 152 }, { "epoch": 0.0030798031341526012, "grad_norm": 2.966085958178155e-05, "learning_rate": 0.00019587672550415203, "loss": 46.0, "step": 153 }, { "epoch": 0.0030999325664019645, "grad_norm": 2.12357390410034e-05, "learning_rate": 0.00019581926231196391, "loss": 46.0, "step": 154 }, { "epoch": 0.0031200619986513282, "grad_norm": 2.5047153030755e-05, "learning_rate": 0.00019576141002070738, "loss": 46.0, "step": 155 }, { "epoch": 0.0031401914309006915, "grad_norm": 2.9844281016266905e-05, "learning_rate": 0.00019570316886530665, "loss": 46.0, "step": 156 }, { "epoch": 0.003160320863150055, "grad_norm": 2.7546439014258794e-05, "learning_rate": 0.00019564453908226515, "loss": 46.0, "step": 157 }, { "epoch": 0.003180450295399418, "grad_norm": 5.0760227168211713e-05, "learning_rate": 0.00019558552090966435, "loss": 46.0, "step": 158 }, { "epoch": 0.0032005797276487818, "grad_norm": 4.231133061693981e-05, "learning_rate": 0.00019552611458716296, "loss": 46.0, "step": 159 }, { "epoch": 0.003220709159898145, "grad_norm": 5.593464447883889e-05, "learning_rate": 0.0001954663203559958, "loss": 46.0, "step": 160 }, { "epoch": 0.0032408385921475083, "grad_norm": 3.7881276512052864e-05, "learning_rate": 0.00019540613845897288, "loss": 46.0, "step": 161 }, { "epoch": 0.003260968024396872, "grad_norm": 3.1589570426149294e-05, "learning_rate": 0.00019534556914047851, "loss": 46.0, "step": 162 }, { "epoch": 0.0032810974566462353, "grad_norm": 7.080697105266154e-05, "learning_rate": 0.00019528461264647014, "loss": 46.0, "step": 163 }, { "epoch": 0.0033012268888955986, "grad_norm": 6.128560198703781e-05, "learning_rate": 0.00019522326922447755, "loss": 46.0, "step": 164 }, { "epoch": 0.0033213563211449623, "grad_norm": 6.614374433411285e-05, "learning_rate": 0.00019516153912360165, "loss": 46.0, "step": 165 }, { "epoch": 0.0033414857533943256, "grad_norm": 4.147323124925606e-05, "learning_rate": 0.00019509942259451357, "loss": 46.0, "step": 166 }, { "epoch": 0.003361615185643689, "grad_norm": 3.474095865385607e-05, "learning_rate": 0.00019503691988945367, "loss": 46.0, "step": 167 }, { "epoch": 0.003381744617893052, "grad_norm": 2.644029700604733e-05, "learning_rate": 0.00019497403126223048, "loss": 46.0, "step": 168 }, { "epoch": 0.003401874050142416, "grad_norm": 2.13767089007888e-05, "learning_rate": 0.00019491075696821962, "loss": 46.0, "step": 169 }, { "epoch": 0.003422003482391779, "grad_norm": 2.432280598441139e-05, "learning_rate": 0.00019484709726436282, "loss": 46.0, "step": 170 }, { "epoch": 0.0034421329146411424, "grad_norm": 2.13755301956553e-05, "learning_rate": 0.00019478305240916698, "loss": 46.0, "step": 171 }, { "epoch": 0.003462262346890506, "grad_norm": 2.9935839847894385e-05, "learning_rate": 0.0001947186226627028, "loss": 46.0, "step": 172 }, { "epoch": 0.0034823917791398694, "grad_norm": 6.0657053836621344e-05, "learning_rate": 0.0001946538082866041, "loss": 46.0, "step": 173 }, { "epoch": 0.0035025212113892327, "grad_norm": 3.512473631417379e-05, "learning_rate": 0.00019458860954406655, "loss": 46.0, "step": 174 }, { "epoch": 0.003522650643638596, "grad_norm": 3.1507472158409655e-05, "learning_rate": 0.00019452302669984662, "loss": 46.0, "step": 175 }, { "epoch": 0.0035427800758879597, "grad_norm": 9.249807771993801e-05, "learning_rate": 0.00019445706002026048, "loss": 46.0, "step": 176 }, { "epoch": 0.003562909508137323, "grad_norm": 2.893183773267083e-05, "learning_rate": 0.000194390709773183, "loss": 46.0, "step": 177 }, { "epoch": 0.0035830389403866863, "grad_norm": 6.318593659671023e-05, "learning_rate": 0.00019432397622804674, "loss": 46.0, "step": 178 }, { "epoch": 0.00360316837263605, "grad_norm": 5.4799009376438335e-05, "learning_rate": 0.00019425685965584056, "loss": 46.0, "step": 179 }, { "epoch": 0.0036232978048854132, "grad_norm": 5.187106944504194e-05, "learning_rate": 0.0001941893603291088, "loss": 46.0, "step": 180 }, { "epoch": 0.0036434272371347765, "grad_norm": 3.32783383782953e-05, "learning_rate": 0.00019412147852195007, "loss": 46.0, "step": 181 }, { "epoch": 0.0036635566693841402, "grad_norm": 2.430532913422212e-05, "learning_rate": 0.00019405321451001605, "loss": 46.0, "step": 182 }, { "epoch": 0.0036836861016335035, "grad_norm": 5.952031642664224e-05, "learning_rate": 0.00019398456857051065, "loss": 46.0, "step": 183 }, { "epoch": 0.003703815533882867, "grad_norm": 3.728271258296445e-05, "learning_rate": 0.00019391554098218853, "loss": 46.0, "step": 184 }, { "epoch": 0.00372394496613223, "grad_norm": 4.849689139518887e-05, "learning_rate": 0.00019384613202535418, "loss": 46.0, "step": 185 }, { "epoch": 0.003744074398381594, "grad_norm": 6.212339212652296e-05, "learning_rate": 0.00019377634198186077, "loss": 46.0, "step": 186 }, { "epoch": 0.003764203830630957, "grad_norm": 0.00010154359188163653, "learning_rate": 0.0001937061711351089, "loss": 46.0, "step": 187 }, { "epoch": 0.0037843332628803203, "grad_norm": 4.9923335609491915e-05, "learning_rate": 0.00019363561977004564, "loss": 46.0, "step": 188 }, { "epoch": 0.003804462695129684, "grad_norm": 0.00011626673949649557, "learning_rate": 0.00019356468817316311, "loss": 46.0, "step": 189 }, { "epoch": 0.0038245921273790473, "grad_norm": 9.168522228719667e-05, "learning_rate": 0.0001934933766324976, "loss": 46.0, "step": 190 }, { "epoch": 0.0038447215596284106, "grad_norm": 3.176367317792028e-05, "learning_rate": 0.00019342168543762814, "loss": 46.0, "step": 191 }, { "epoch": 0.003864850991877774, "grad_norm": 5.836347190779634e-05, "learning_rate": 0.0001933496148796755, "loss": 46.0, "step": 192 }, { "epoch": 0.0038849804241271376, "grad_norm": 2.9238137358333915e-05, "learning_rate": 0.00019327716525130098, "loss": 46.0, "step": 193 }, { "epoch": 0.003905109856376501, "grad_norm": 3.159691550536081e-05, "learning_rate": 0.00019320433684670514, "loss": 46.0, "step": 194 }, { "epoch": 0.003925239288625864, "grad_norm": 3.837713666143827e-05, "learning_rate": 0.00019313112996162667, "loss": 46.0, "step": 195 }, { "epoch": 0.003945368720875228, "grad_norm": 3.1190254958346486e-05, "learning_rate": 0.00019305754489334125, "loss": 46.0, "step": 196 }, { "epoch": 0.003965498153124591, "grad_norm": 9.271525777876377e-05, "learning_rate": 0.00019298358194066016, "loss": 46.0, "step": 197 }, { "epoch": 0.0039856275853739544, "grad_norm": 3.302880941191688e-05, "learning_rate": 0.00019290924140392921, "loss": 46.0, "step": 198 }, { "epoch": 0.004005757017623318, "grad_norm": 3.4837332350434735e-05, "learning_rate": 0.0001928345235850276, "loss": 46.0, "step": 199 }, { "epoch": 0.004025886449872681, "grad_norm": 2.956252865260467e-05, "learning_rate": 0.0001927594287873664, "loss": 46.0, "step": 200 }, { "epoch": 0.004046015882122045, "grad_norm": 4.961419472238049e-05, "learning_rate": 0.00019268395731588764, "loss": 46.0, "step": 201 }, { "epoch": 0.004066145314371408, "grad_norm": 7.009352702880278e-05, "learning_rate": 0.00019260810947706287, "loss": 46.0, "step": 202 }, { "epoch": 0.004086274746620771, "grad_norm": 4.309253199608065e-05, "learning_rate": 0.000192531885578892, "loss": 46.0, "step": 203 }, { "epoch": 0.004106404178870135, "grad_norm": 0.00012773476191796362, "learning_rate": 0.00019245528593090204, "loss": 46.0, "step": 204 }, { "epoch": 0.004126533611119499, "grad_norm": 2.8066795493941754e-05, "learning_rate": 0.00019237831084414577, "loss": 46.0, "step": 205 }, { "epoch": 0.0041466630433688615, "grad_norm": 5.905181751586497e-05, "learning_rate": 0.0001923009606312006, "loss": 46.0, "step": 206 }, { "epoch": 0.004166792475618225, "grad_norm": 4.193755376036279e-05, "learning_rate": 0.0001922232356061672, "loss": 46.0, "step": 207 }, { "epoch": 0.004186921907867589, "grad_norm": 4.6146677050273865e-05, "learning_rate": 0.00019214513608466826, "loss": 46.0, "step": 208 }, { "epoch": 0.004207051340116952, "grad_norm": 2.824837065418251e-05, "learning_rate": 0.00019206666238384728, "loss": 46.0, "step": 209 }, { "epoch": 0.0042271807723663155, "grad_norm": 3.296473005320877e-05, "learning_rate": 0.0001919878148223671, "loss": 46.0, "step": 210 }, { "epoch": 0.004247310204615679, "grad_norm": 3.6901357816532254e-05, "learning_rate": 0.00019190859372040882, "loss": 46.0, "step": 211 }, { "epoch": 0.004267439636865042, "grad_norm": 4.644416912924498e-05, "learning_rate": 0.00019182899939967034, "loss": 46.0, "step": 212 }, { "epoch": 0.004287569069114406, "grad_norm": 5.2605690143536776e-05, "learning_rate": 0.00019174903218336511, "loss": 46.0, "step": 213 }, { "epoch": 0.004307698501363769, "grad_norm": 2.7000423870049417e-05, "learning_rate": 0.00019166869239622085, "loss": 46.0, "step": 214 }, { "epoch": 0.004327827933613132, "grad_norm": 4.584537600749172e-05, "learning_rate": 0.00019158798036447822, "loss": 46.0, "step": 215 }, { "epoch": 0.004347957365862496, "grad_norm": 2.657137156347744e-05, "learning_rate": 0.0001915068964158894, "loss": 46.0, "step": 216 }, { "epoch": 0.004368086798111859, "grad_norm": 4.8142963350983337e-05, "learning_rate": 0.00019142544087971693, "loss": 46.0, "step": 217 }, { "epoch": 0.004388216230361223, "grad_norm": 3.945273419958539e-05, "learning_rate": 0.00019134361408673216, "loss": 46.0, "step": 218 }, { "epoch": 0.004408345662610586, "grad_norm": 6.008298441884108e-05, "learning_rate": 0.00019126141636921414, "loss": 46.0, "step": 219 }, { "epoch": 0.004428475094859949, "grad_norm": 5.5544471251778305e-05, "learning_rate": 0.0001911788480609481, "loss": 46.0, "step": 220 }, { "epoch": 0.004448604527109313, "grad_norm": 5.3515970648732036e-05, "learning_rate": 0.00019109590949722413, "loss": 46.0, "step": 221 }, { "epoch": 0.004468733959358677, "grad_norm": 4.5226741349324584e-05, "learning_rate": 0.00019101260101483592, "loss": 46.0, "step": 222 }, { "epoch": 0.0044888633916080395, "grad_norm": 5.253091512713581e-05, "learning_rate": 0.0001909289229520792, "loss": 46.0, "step": 223 }, { "epoch": 0.004508992823857403, "grad_norm": 7.821829058229923e-05, "learning_rate": 0.0001908448756487506, "loss": 46.0, "step": 224 }, { "epoch": 0.004529122256106767, "grad_norm": 2.5970837668864988e-05, "learning_rate": 0.00019076045944614603, "loss": 46.0, "step": 225 }, { "epoch": 0.00454925168835613, "grad_norm": 8.79172730492428e-05, "learning_rate": 0.0001906756746870595, "loss": 46.0, "step": 226 }, { "epoch": 0.0045693811206054934, "grad_norm": 5.294303991831839e-05, "learning_rate": 0.00019059052171578155, "loss": 46.0, "step": 227 }, { "epoch": 0.004589510552854857, "grad_norm": 3.699533408507705e-05, "learning_rate": 0.00019050500087809807, "loss": 46.0, "step": 228 }, { "epoch": 0.00460963998510422, "grad_norm": 5.711496851290576e-05, "learning_rate": 0.00019041911252128864, "loss": 46.0, "step": 229 }, { "epoch": 0.004629769417353584, "grad_norm": 5.045397483627312e-05, "learning_rate": 0.00019033285699412533, "loss": 46.0, "step": 230 }, { "epoch": 0.0046498988496029466, "grad_norm": 7.707828626735136e-05, "learning_rate": 0.00019024623464687114, "loss": 46.0, "step": 231 }, { "epoch": 0.00467002828185231, "grad_norm": 4.182373595540412e-05, "learning_rate": 0.00019015924583127872, "loss": 46.0, "step": 232 }, { "epoch": 0.004690157714101674, "grad_norm": 4.5557317207567394e-05, "learning_rate": 0.00019007189090058878, "loss": 46.0, "step": 233 }, { "epoch": 0.004710287146351037, "grad_norm": 4.820396861759946e-05, "learning_rate": 0.0001899841702095287, "loss": 46.0, "step": 234 }, { "epoch": 0.0047304165786004005, "grad_norm": 5.630170926451683e-05, "learning_rate": 0.00018989608411431135, "loss": 46.0, "step": 235 }, { "epoch": 0.004750546010849764, "grad_norm": 3.503153857309371e-05, "learning_rate": 0.0001898076329726331, "loss": 46.0, "step": 236 }, { "epoch": 0.004770675443099127, "grad_norm": 8.119984704535455e-05, "learning_rate": 0.00018971881714367295, "loss": 46.0, "step": 237 }, { "epoch": 0.004790804875348491, "grad_norm": 0.0001170546529465355, "learning_rate": 0.00018962963698809063, "loss": 46.0, "step": 238 }, { "epoch": 0.0048109343075978545, "grad_norm": 3.3851210901048034e-05, "learning_rate": 0.00018954009286802545, "loss": 46.0, "step": 239 }, { "epoch": 0.004831063739847217, "grad_norm": 4.396312215249054e-05, "learning_rate": 0.0001894501851470946, "loss": 46.0, "step": 240 }, { "epoch": 0.004851193172096581, "grad_norm": 6.271849269978702e-05, "learning_rate": 0.00018935991419039176, "loss": 46.0, "step": 241 }, { "epoch": 0.004871322604345945, "grad_norm": 4.544670082395896e-05, "learning_rate": 0.00018926928036448572, "loss": 46.0, "step": 242 }, { "epoch": 0.004891452036595308, "grad_norm": 3.364813528605737e-05, "learning_rate": 0.0001891782840374187, "loss": 46.0, "step": 243 }, { "epoch": 0.004911581468844671, "grad_norm": 3.067620491492562e-05, "learning_rate": 0.000189086925578705, "loss": 46.0, "step": 244 }, { "epoch": 0.004931710901094035, "grad_norm": 3.76120260625612e-05, "learning_rate": 0.00018899520535932938, "loss": 46.0, "step": 245 }, { "epoch": 0.004951840333343398, "grad_norm": 5.46066730748862e-05, "learning_rate": 0.00018890312375174578, "loss": 46.0, "step": 246 }, { "epoch": 0.004971969765592762, "grad_norm": 4.003910726169124e-05, "learning_rate": 0.0001888106811298755, "loss": 46.0, "step": 247 }, { "epoch": 0.0049920991978421245, "grad_norm": 6.587472307728603e-05, "learning_rate": 0.00018871787786910583, "loss": 46.0, "step": 248 }, { "epoch": 0.005012228630091488, "grad_norm": 3.237745113437995e-05, "learning_rate": 0.0001886247143462886, "loss": 46.0, "step": 249 }, { "epoch": 0.005032358062340852, "grad_norm": 4.901425199932419e-05, "learning_rate": 0.00018853119093973863, "loss": 46.0, "step": 250 }, { "epoch": 0.005052487494590215, "grad_norm": 0.00016870767285581678, "learning_rate": 0.00018843730802923202, "loss": 46.0, "step": 251 }, { "epoch": 0.0050726169268395785, "grad_norm": 5.0396236474625766e-05, "learning_rate": 0.00018834306599600472, "loss": 46.0, "step": 252 }, { "epoch": 0.005092746359088942, "grad_norm": 4.4291067752055824e-05, "learning_rate": 0.00018824846522275113, "loss": 46.0, "step": 253 }, { "epoch": 0.005112875791338305, "grad_norm": 4.502312367549166e-05, "learning_rate": 0.0001881535060936223, "loss": 46.0, "step": 254 }, { "epoch": 0.005133005223587669, "grad_norm": 9.577722084941342e-05, "learning_rate": 0.00018805818899422447, "loss": 46.0, "step": 255 }, { "epoch": 0.0051531346558370324, "grad_norm": 3.933937841793522e-05, "learning_rate": 0.0001879625143116176, "loss": 46.0, "step": 256 }, { "epoch": 0.005173264088086395, "grad_norm": 5.2159059123368934e-05, "learning_rate": 0.00018786648243431363, "loss": 46.0, "step": 257 }, { "epoch": 0.005193393520335759, "grad_norm": 6.076386125641875e-05, "learning_rate": 0.000187770093752275, "loss": 46.0, "step": 258 }, { "epoch": 0.005213522952585123, "grad_norm": 7.954567990964279e-05, "learning_rate": 0.0001876733486569131, "loss": 46.0, "step": 259 }, { "epoch": 0.0052336523848344856, "grad_norm": 9.186089300783351e-05, "learning_rate": 0.0001875762475410865, "loss": 46.0, "step": 260 }, { "epoch": 0.005253781817083849, "grad_norm": 6.62936654407531e-05, "learning_rate": 0.00018747879079909963, "loss": 46.0, "step": 261 }, { "epoch": 0.005273911249333213, "grad_norm": 8.344445814145729e-05, "learning_rate": 0.00018738097882670097, "loss": 46.0, "step": 262 }, { "epoch": 0.005294040681582576, "grad_norm": 4.7971618187148124e-05, "learning_rate": 0.0001872828120210815, "loss": 46.0, "step": 263 }, { "epoch": 0.0053141701138319395, "grad_norm": 6.027764902682975e-05, "learning_rate": 0.00018718429078087306, "loss": 46.0, "step": 264 }, { "epoch": 0.005334299546081302, "grad_norm": 3.437638588366099e-05, "learning_rate": 0.00018708541550614688, "loss": 46.0, "step": 265 }, { "epoch": 0.005354428978330666, "grad_norm": 0.00015669086133129895, "learning_rate": 0.00018698618659841168, "loss": 46.0, "step": 266 }, { "epoch": 0.00537455841058003, "grad_norm": 5.7055072829825804e-05, "learning_rate": 0.00018688660446061235, "loss": 46.0, "step": 267 }, { "epoch": 0.005394687842829393, "grad_norm": 9.243898966815323e-05, "learning_rate": 0.00018678666949712805, "loss": 46.0, "step": 268 }, { "epoch": 0.005414817275078756, "grad_norm": 6.301647226791829e-05, "learning_rate": 0.00018668638211377075, "loss": 46.0, "step": 269 }, { "epoch": 0.00543494670732812, "grad_norm": 5.150353172211908e-05, "learning_rate": 0.00018658574271778345, "loss": 46.0, "step": 270 }, { "epoch": 0.005455076139577483, "grad_norm": 7.764642214169726e-05, "learning_rate": 0.0001864847517178387, "loss": 46.0, "step": 271 }, { "epoch": 0.005475205571826847, "grad_norm": 6.848713383078575e-05, "learning_rate": 0.0001863834095240367, "loss": 46.0, "step": 272 }, { "epoch": 0.00549533500407621, "grad_norm": 6.603610381716862e-05, "learning_rate": 0.00018628171654790383, "loss": 46.0, "step": 273 }, { "epoch": 0.005515464436325573, "grad_norm": 4.008671749033965e-05, "learning_rate": 0.00018617967320239088, "loss": 46.0, "step": 274 }, { "epoch": 0.005535593868574937, "grad_norm": 0.00014578885748051107, "learning_rate": 0.00018607727990187147, "loss": 46.0, "step": 275 }, { "epoch": 0.005555723300824301, "grad_norm": 0.0001186303561553359, "learning_rate": 0.00018597453706214025, "loss": 46.0, "step": 276 }, { "epoch": 0.0055758527330736635, "grad_norm": 6.097505320212804e-05, "learning_rate": 0.00018587144510041128, "loss": 46.0, "step": 277 }, { "epoch": 0.005595982165323027, "grad_norm": 3.146566086797975e-05, "learning_rate": 0.0001857680044353163, "loss": 46.0, "step": 278 }, { "epoch": 0.005616111597572391, "grad_norm": 3.7260102544678375e-05, "learning_rate": 0.0001856642154869031, "loss": 46.0, "step": 279 }, { "epoch": 0.005636241029821754, "grad_norm": 9.276531636714935e-05, "learning_rate": 0.0001855600786766337, "loss": 46.0, "step": 280 }, { "epoch": 0.0056563704620711175, "grad_norm": 4.5973942178534344e-05, "learning_rate": 0.00018545559442738273, "loss": 46.0, "step": 281 }, { "epoch": 0.00567649989432048, "grad_norm": 6.683785613859072e-05, "learning_rate": 0.00018535076316343575, "loss": 46.0, "step": 282 }, { "epoch": 0.005696629326569844, "grad_norm": 8.007440192159265e-05, "learning_rate": 0.00018524558531048737, "loss": 46.0, "step": 283 }, { "epoch": 0.005716758758819208, "grad_norm": 4.978142897016369e-05, "learning_rate": 0.00018514006129563966, "loss": 46.0, "step": 284 }, { "epoch": 0.005736888191068571, "grad_norm": 0.00010632863268256187, "learning_rate": 0.00018503419154740035, "loss": 46.0, "step": 285 }, { "epoch": 0.005757017623317934, "grad_norm": 2.972048423544038e-05, "learning_rate": 0.00018492797649568115, "loss": 46.0, "step": 286 }, { "epoch": 0.005777147055567298, "grad_norm": 4.370976603240706e-05, "learning_rate": 0.00018482141657179594, "loss": 46.0, "step": 287 }, { "epoch": 0.005797276487816661, "grad_norm": 7.786518835928291e-05, "learning_rate": 0.00018471451220845902, "loss": 46.0, "step": 288 }, { "epoch": 0.0058174059200660246, "grad_norm": 0.00010520989599172026, "learning_rate": 0.00018460726383978337, "loss": 46.0, "step": 289 }, { "epoch": 0.005837535352315388, "grad_norm": 7.304285827558488e-05, "learning_rate": 0.000184499671901279, "loss": 46.0, "step": 290 }, { "epoch": 0.005857664784564751, "grad_norm": 3.885753540089354e-05, "learning_rate": 0.00018439173682985094, "loss": 46.0, "step": 291 }, { "epoch": 0.005877794216814115, "grad_norm": 9.42759434110485e-05, "learning_rate": 0.00018428345906379767, "loss": 46.0, "step": 292 }, { "epoch": 0.0058979236490634785, "grad_norm": 7.605970313306898e-05, "learning_rate": 0.00018417483904280925, "loss": 46.0, "step": 293 }, { "epoch": 0.005918053081312841, "grad_norm": 7.958237983984873e-05, "learning_rate": 0.00018406587720796555, "loss": 46.0, "step": 294 }, { "epoch": 0.005938182513562205, "grad_norm": 0.00015608093235641718, "learning_rate": 0.00018395657400173453, "loss": 46.0, "step": 295 }, { "epoch": 0.005958311945811569, "grad_norm": 6.675553595414385e-05, "learning_rate": 0.00018384692986797026, "loss": 46.0, "step": 296 }, { "epoch": 0.005978441378060932, "grad_norm": 4.7439232730539516e-05, "learning_rate": 0.00018373694525191138, "loss": 46.0, "step": 297 }, { "epoch": 0.005998570810310295, "grad_norm": 7.789856317685917e-05, "learning_rate": 0.00018362662060017896, "loss": 46.0, "step": 298 }, { "epoch": 0.006018700242559658, "grad_norm": 5.332715591066517e-05, "learning_rate": 0.00018351595636077509, "loss": 46.0, "step": 299 }, { "epoch": 0.006038829674809022, "grad_norm": 9.82348938123323e-05, "learning_rate": 0.00018340495298308063, "loss": 46.0, "step": 300 }, { "epoch": 0.006058959107058386, "grad_norm": 0.00010532526357565075, "learning_rate": 0.0001832936109178538, "loss": 46.0, "step": 301 }, { "epoch": 0.0060790885393077485, "grad_norm": 5.648566730087623e-05, "learning_rate": 0.00018318193061722795, "loss": 46.0, "step": 302 }, { "epoch": 0.006099217971557112, "grad_norm": 0.00012587102537509054, "learning_rate": 0.00018306991253471013, "loss": 46.0, "step": 303 }, { "epoch": 0.006119347403806476, "grad_norm": 0.00011527648894116282, "learning_rate": 0.00018295755712517887, "loss": 46.0, "step": 304 }, { "epoch": 0.006139476836055839, "grad_norm": 6.82896061334759e-05, "learning_rate": 0.00018284486484488257, "loss": 46.0, "step": 305 }, { "epoch": 0.0061596062683052025, "grad_norm": 6.256354390643537e-05, "learning_rate": 0.00018273183615143764, "loss": 46.0, "step": 306 }, { "epoch": 0.006179735700554566, "grad_norm": 6.483913603005931e-05, "learning_rate": 0.00018261847150382644, "loss": 46.0, "step": 307 }, { "epoch": 0.006199865132803929, "grad_norm": 9.90207918221131e-05, "learning_rate": 0.00018250477136239572, "loss": 46.0, "step": 308 }, { "epoch": 0.006219994565053293, "grad_norm": 8.644862100481987e-05, "learning_rate": 0.00018239073618885447, "loss": 46.0, "step": 309 }, { "epoch": 0.0062401239973026565, "grad_norm": 5.34062746737618e-05, "learning_rate": 0.00018227636644627224, "loss": 46.0, "step": 310 }, { "epoch": 0.006260253429552019, "grad_norm": 5.9164103731745854e-05, "learning_rate": 0.00018216166259907713, "loss": 46.0, "step": 311 }, { "epoch": 0.006280382861801383, "grad_norm": 0.00011165209434693679, "learning_rate": 0.000182046625113054, "loss": 46.0, "step": 312 }, { "epoch": 0.006300512294050747, "grad_norm": 9.209036215906963e-05, "learning_rate": 0.00018193125445534252, "loss": 46.0, "step": 313 }, { "epoch": 0.00632064172630011, "grad_norm": 9.060541924554855e-05, "learning_rate": 0.00018181555109443527, "loss": 46.0, "step": 314 }, { "epoch": 0.006340771158549473, "grad_norm": 0.00010777592979138717, "learning_rate": 0.0001816995155001759, "loss": 46.0, "step": 315 }, { "epoch": 0.006360900590798836, "grad_norm": 6.211627623997629e-05, "learning_rate": 0.00018158314814375716, "loss": 46.0, "step": 316 }, { "epoch": 0.0063810300230482, "grad_norm": 0.00011468301818240434, "learning_rate": 0.000181466449497719, "loss": 46.0, "step": 317 }, { "epoch": 0.0064011594552975636, "grad_norm": 0.00011545659799594432, "learning_rate": 0.00018134942003594665, "loss": 46.0, "step": 318 }, { "epoch": 0.006421288887546926, "grad_norm": 7.793011172907427e-05, "learning_rate": 0.00018123206023366875, "loss": 46.0, "step": 319 }, { "epoch": 0.00644141831979629, "grad_norm": 6.144792860141024e-05, "learning_rate": 0.00018111437056745532, "loss": 46.0, "step": 320 }, { "epoch": 0.006461547752045654, "grad_norm": 7.264247687999159e-05, "learning_rate": 0.00018099635151521586, "loss": 46.0, "step": 321 }, { "epoch": 0.006481677184295017, "grad_norm": 0.0001523289829492569, "learning_rate": 0.00018087800355619753, "loss": 46.0, "step": 322 }, { "epoch": 0.00650180661654438, "grad_norm": 0.00013660687545780092, "learning_rate": 0.000180759327170983, "loss": 46.0, "step": 323 }, { "epoch": 0.006521936048793744, "grad_norm": 0.0001787421788321808, "learning_rate": 0.00018064032284148868, "loss": 46.0, "step": 324 }, { "epoch": 0.006542065481043107, "grad_norm": 0.00017624157771933824, "learning_rate": 0.0001805209910509626, "loss": 46.0, "step": 325 }, { "epoch": 0.006562194913292471, "grad_norm": 7.056714821374044e-05, "learning_rate": 0.0001804013322839826, "loss": 46.0, "step": 326 }, { "epoch": 0.006582324345541834, "grad_norm": 0.00016265243175439537, "learning_rate": 0.00018028134702645425, "loss": 46.0, "step": 327 }, { "epoch": 0.006602453777791197, "grad_norm": 0.00015621079364791512, "learning_rate": 0.00018016103576560895, "loss": 46.0, "step": 328 }, { "epoch": 0.006622583210040561, "grad_norm": 0.00015462673036381602, "learning_rate": 0.00018004039899000186, "loss": 46.0, "step": 329 }, { "epoch": 0.006642712642289925, "grad_norm": 9.932387911248952e-05, "learning_rate": 0.0001799194371895101, "loss": 46.0, "step": 330 }, { "epoch": 0.0066628420745392875, "grad_norm": 0.0001572413748363033, "learning_rate": 0.00017979815085533048, "loss": 46.0, "step": 331 }, { "epoch": 0.006682971506788651, "grad_norm": 0.00013717268302571028, "learning_rate": 0.00017967654047997784, "loss": 46.0, "step": 332 }, { "epoch": 0.006703100939038014, "grad_norm": 0.00017757757450453937, "learning_rate": 0.0001795546065572827, "loss": 46.0, "step": 333 }, { "epoch": 0.006723230371287378, "grad_norm": 9.130597027251497e-05, "learning_rate": 0.00017943234958238952, "loss": 46.0, "step": 334 }, { "epoch": 0.0067433598035367415, "grad_norm": 6.078862497815862e-05, "learning_rate": 0.00017930977005175465, "loss": 46.0, "step": 335 }, { "epoch": 0.006763489235786104, "grad_norm": 0.00018025643657892942, "learning_rate": 0.0001791868684631441, "loss": 46.0, "step": 336 }, { "epoch": 0.006783618668035468, "grad_norm": 0.0001726304617477581, "learning_rate": 0.00017906364531563185, "loss": 46.0, "step": 337 }, { "epoch": 0.006803748100284832, "grad_norm": 8.799460192676634e-05, "learning_rate": 0.00017894010110959755, "loss": 46.0, "step": 338 }, { "epoch": 0.006823877532534195, "grad_norm": 0.00012871818034909666, "learning_rate": 0.00017881623634672465, "loss": 46.0, "step": 339 }, { "epoch": 0.006844006964783558, "grad_norm": 0.00010133467003470287, "learning_rate": 0.00017869205152999822, "loss": 46.0, "step": 340 }, { "epoch": 0.006864136397032922, "grad_norm": 0.00010885829397011548, "learning_rate": 0.00017856754716370313, "loss": 46.0, "step": 341 }, { "epoch": 0.006884265829282285, "grad_norm": 9.338463132735342e-05, "learning_rate": 0.0001784427237534217, "loss": 46.0, "step": 342 }, { "epoch": 0.006904395261531649, "grad_norm": 9.378411778016016e-05, "learning_rate": 0.00017831758180603195, "loss": 46.0, "step": 343 }, { "epoch": 0.006924524693781012, "grad_norm": 0.00012989221431780607, "learning_rate": 0.00017819212182970535, "loss": 46.0, "step": 344 }, { "epoch": 0.006944654126030375, "grad_norm": 0.00017901930550578982, "learning_rate": 0.00017806634433390476, "loss": 46.0, "step": 345 }, { "epoch": 0.006964783558279739, "grad_norm": 9.102857438847423e-05, "learning_rate": 0.00017794024982938252, "loss": 46.0, "step": 346 }, { "epoch": 0.0069849129905291026, "grad_norm": 7.371963874902576e-05, "learning_rate": 0.00017781383882817811, "loss": 46.0, "step": 347 }, { "epoch": 0.007005042422778465, "grad_norm": 7.886077219154686e-05, "learning_rate": 0.00017768711184361645, "loss": 46.0, "step": 348 }, { "epoch": 0.007025171855027829, "grad_norm": 0.0001304990437347442, "learning_rate": 0.00017756006939030535, "loss": 46.0, "step": 349 }, { "epoch": 0.007045301287277192, "grad_norm": 0.00014258353621698916, "learning_rate": 0.00017743271198413386, "loss": 46.0, "step": 350 }, { "epoch": 0.007065430719526556, "grad_norm": 0.00015988641825970262, "learning_rate": 0.00017730504014226982, "loss": 46.0, "step": 351 }, { "epoch": 0.007085560151775919, "grad_norm": 8.829456783132628e-05, "learning_rate": 0.00017717705438315804, "loss": 46.0, "step": 352 }, { "epoch": 0.007105689584025282, "grad_norm": 0.00018408888718113303, "learning_rate": 0.00017704875522651806, "loss": 46.0, "step": 353 }, { "epoch": 0.007125819016274646, "grad_norm": 0.0001536206982564181, "learning_rate": 0.0001769201431933419, "loss": 46.0, "step": 354 }, { "epoch": 0.00714594844852401, "grad_norm": 0.00015358305245172232, "learning_rate": 0.00017679121880589236, "loss": 46.0, "step": 355 }, { "epoch": 0.0071660778807733725, "grad_norm": 0.00028956442838534713, "learning_rate": 0.00017666198258770038, "loss": 46.0, "step": 356 }, { "epoch": 0.007186207313022736, "grad_norm": 0.0001954118488356471, "learning_rate": 0.00017653243506356332, "loss": 46.0, "step": 357 }, { "epoch": 0.0072063367452721, "grad_norm": 0.0001885920064523816, "learning_rate": 0.00017640257675954264, "loss": 46.0, "step": 358 }, { "epoch": 0.007226466177521463, "grad_norm": 7.952122541610152e-05, "learning_rate": 0.00017627240820296177, "loss": 46.0, "step": 359 }, { "epoch": 0.0072465956097708265, "grad_norm": 6.53123643132858e-05, "learning_rate": 0.00017614192992240413, "loss": 46.0, "step": 360 }, { "epoch": 0.00726672504202019, "grad_norm": 0.0001405112270731479, "learning_rate": 0.00017601114244771067, "loss": 46.0, "step": 361 }, { "epoch": 0.007286854474269553, "grad_norm": 0.00019535009050741792, "learning_rate": 0.000175880046309978, "loss": 46.0, "step": 362 }, { "epoch": 0.007306983906518917, "grad_norm": 0.0003268709115218371, "learning_rate": 0.00017574864204155614, "loss": 46.0, "step": 363 }, { "epoch": 0.0073271133387682805, "grad_norm": 0.00010955316975014284, "learning_rate": 0.00017561693017604637, "loss": 46.0, "step": 364 }, { "epoch": 0.007347242771017643, "grad_norm": 0.00027552066603675485, "learning_rate": 0.000175484911248299, "loss": 46.0, "step": 365 }, { "epoch": 0.007367372203267007, "grad_norm": 0.00019117463671136647, "learning_rate": 0.0001753525857944112, "loss": 46.0, "step": 366 }, { "epoch": 0.00738750163551637, "grad_norm": 9.778481035027653e-05, "learning_rate": 0.00017521995435172504, "loss": 46.0, "step": 367 }, { "epoch": 0.007407631067765734, "grad_norm": 3.404769449844025e-05, "learning_rate": 0.000175087017458825, "loss": 46.0, "step": 368 }, { "epoch": 0.007427760500015097, "grad_norm": 0.00010666289017535746, "learning_rate": 0.00017495377565553594, "loss": 46.0, "step": 369 }, { "epoch": 0.00744788993226446, "grad_norm": 0.0001144933485193178, "learning_rate": 0.0001748202294829209, "loss": 46.0, "step": 370 }, { "epoch": 0.007468019364513824, "grad_norm": 0.00013504338858183473, "learning_rate": 0.00017468637948327894, "loss": 46.0, "step": 371 }, { "epoch": 0.007488148796763188, "grad_norm": 0.00010847981320694089, "learning_rate": 0.00017455222620014276, "loss": 46.0, "step": 372 }, { "epoch": 0.00750827822901255, "grad_norm": 0.00020060865790583193, "learning_rate": 0.00017441777017827677, "loss": 46.0, "step": 373 }, { "epoch": 0.007528407661261914, "grad_norm": 0.00019206189608667046, "learning_rate": 0.00017428301196367464, "loss": 46.0, "step": 374 }, { "epoch": 0.007548537093511278, "grad_norm": 0.00011548047768883407, "learning_rate": 0.0001741479521035572, "loss": 46.0, "step": 375 }, { "epoch": 0.007568666525760641, "grad_norm": 0.0001220878621097654, "learning_rate": 0.00017401259114637014, "loss": 46.0, "step": 376 }, { "epoch": 0.007588795958010004, "grad_norm": 0.00027704107924364507, "learning_rate": 0.00017387692964178198, "loss": 46.0, "step": 377 }, { "epoch": 0.007608925390259368, "grad_norm": 8.742011414142326e-05, "learning_rate": 0.00017374096814068145, "loss": 46.0, "step": 378 }, { "epoch": 0.007629054822508731, "grad_norm": 0.0001393141719745472, "learning_rate": 0.00017360470719517577, "loss": 46.0, "step": 379 }, { "epoch": 0.007649184254758095, "grad_norm": 0.00011780137720052153, "learning_rate": 0.00017346814735858792, "loss": 46.0, "step": 380 }, { "epoch": 0.007669313687007458, "grad_norm": 0.00013582094106823206, "learning_rate": 0.0001733312891854547, "loss": 46.0, "step": 381 }, { "epoch": 0.007689443119256821, "grad_norm": 7.030325650703162e-05, "learning_rate": 0.00017319413323152436, "loss": 46.0, "step": 382 }, { "epoch": 0.007709572551506185, "grad_norm": 0.00016730479546822608, "learning_rate": 0.00017305668005375435, "loss": 46.0, "step": 383 }, { "epoch": 0.007729701983755548, "grad_norm": 0.00027646831586025655, "learning_rate": 0.00017291893021030913, "loss": 46.0, "step": 384 }, { "epoch": 0.0077498314160049115, "grad_norm": 0.0001369678502669558, "learning_rate": 0.0001727808842605578, "loss": 46.0, "step": 385 }, { "epoch": 0.007769960848254275, "grad_norm": 0.00011632432142505422, "learning_rate": 0.00017264254276507188, "loss": 46.0, "step": 386 }, { "epoch": 0.007790090280503638, "grad_norm": 0.00013326382031664252, "learning_rate": 0.00017250390628562303, "loss": 46.0, "step": 387 }, { "epoch": 0.007810219712753002, "grad_norm": 0.0002664460625965148, "learning_rate": 0.00017236497538518082, "loss": 46.0, "step": 388 }, { "epoch": 0.007830349145002365, "grad_norm": 0.00013524248788598925, "learning_rate": 0.00017222575062791033, "loss": 46.0, "step": 389 }, { "epoch": 0.007850478577251728, "grad_norm": 0.00023887053248472512, "learning_rate": 0.00017208623257916993, "loss": 46.0, "step": 390 }, { "epoch": 0.007870608009501092, "grad_norm": 0.0002952871145680547, "learning_rate": 0.000171946421805509, "loss": 46.0, "step": 391 }, { "epoch": 0.007890737441750456, "grad_norm": 0.00014817963528912514, "learning_rate": 0.00017180631887466562, "loss": 46.0, "step": 392 }, { "epoch": 0.00791086687399982, "grad_norm": 5.316930764820427e-05, "learning_rate": 0.0001716659243555642, "loss": 46.0, "step": 393 }, { "epoch": 0.00791086687399982, "eval_loss": 11.5, "eval_runtime": 130.3433, "eval_samples_per_second": 160.484, "eval_steps_per_second": 80.242, "step": 393 }, { "epoch": 0.007930996306249181, "grad_norm": 7.42626580176875e-05, "learning_rate": 0.00017152523881831325, "loss": 46.0, "step": 394 }, { "epoch": 0.007951125738498545, "grad_norm": 0.00012892778613604605, "learning_rate": 0.00017138426283420304, "loss": 46.0, "step": 395 }, { "epoch": 0.007971255170747909, "grad_norm": 0.00026775835431180894, "learning_rate": 0.00017124299697570327, "loss": 46.0, "step": 396 }, { "epoch": 0.007991384602997273, "grad_norm": 0.00010640334949130192, "learning_rate": 0.00017110144181646072, "loss": 46.0, "step": 397 }, { "epoch": 0.008011514035246636, "grad_norm": 0.00044169218745082617, "learning_rate": 0.00017095959793129705, "loss": 46.0, "step": 398 }, { "epoch": 0.008031643467496, "grad_norm": 0.0001671955396886915, "learning_rate": 0.0001708174658962062, "loss": 46.0, "step": 399 }, { "epoch": 0.008051772899745362, "grad_norm": 0.0001959178625838831, "learning_rate": 0.00017067504628835237, "loss": 46.0, "step": 400 }, { "epoch": 0.008071902331994726, "grad_norm": 0.0003456271078903228, "learning_rate": 0.00017053233968606745, "loss": 46.0, "step": 401 }, { "epoch": 0.00809203176424409, "grad_norm": 0.00020114498329348862, "learning_rate": 0.00017038934666884878, "loss": 46.0, "step": 402 }, { "epoch": 0.008112161196493453, "grad_norm": 0.00013646352454088628, "learning_rate": 0.00017024606781735675, "loss": 46.0, "step": 403 }, { "epoch": 0.008132290628742817, "grad_norm": 0.0001477425394114107, "learning_rate": 0.00017010250371341244, "loss": 46.0, "step": 404 }, { "epoch": 0.00815242006099218, "grad_norm": 0.00025215549976564944, "learning_rate": 0.00016995865493999528, "loss": 46.0, "step": 405 }, { "epoch": 0.008172549493241543, "grad_norm": 0.0003362063434906304, "learning_rate": 0.00016981452208124064, "loss": 46.0, "step": 406 }, { "epoch": 0.008192678925490906, "grad_norm": 0.00011808531417045742, "learning_rate": 0.00016967010572243758, "loss": 46.0, "step": 407 }, { "epoch": 0.00821280835774027, "grad_norm": 0.00017271251999773085, "learning_rate": 0.00016952540645002632, "loss": 46.0, "step": 408 }, { "epoch": 0.008232937789989634, "grad_norm": 0.00015558266022708267, "learning_rate": 0.00016938042485159594, "loss": 46.0, "step": 409 }, { "epoch": 0.008253067222238997, "grad_norm": 0.00015800447727087885, "learning_rate": 0.000169235161515882, "loss": 46.0, "step": 410 }, { "epoch": 0.00827319665448836, "grad_norm": 0.00020300566393416375, "learning_rate": 0.00016908961703276406, "loss": 46.0, "step": 411 }, { "epoch": 0.008293326086737723, "grad_norm": 0.00015362584963440895, "learning_rate": 0.0001689437919932634, "loss": 46.0, "step": 412 }, { "epoch": 0.008313455518987087, "grad_norm": 0.0001552566682221368, "learning_rate": 0.0001687976869895406, "loss": 46.0, "step": 413 }, { "epoch": 0.00833358495123645, "grad_norm": 0.0002456993970554322, "learning_rate": 0.00016865130261489305, "loss": 46.0, "step": 414 }, { "epoch": 0.008353714383485814, "grad_norm": 0.00025272692437283695, "learning_rate": 0.00016850463946375266, "loss": 46.0, "step": 415 }, { "epoch": 0.008373843815735178, "grad_norm": 0.0001329753577010706, "learning_rate": 0.00016835769813168332, "loss": 46.0, "step": 416 }, { "epoch": 0.00839397324798454, "grad_norm": 0.00011102524149464443, "learning_rate": 0.00016821047921537858, "loss": 46.0, "step": 417 }, { "epoch": 0.008414102680233904, "grad_norm": 0.0005011210450902581, "learning_rate": 0.0001680629833126592, "loss": 46.0, "step": 418 }, { "epoch": 0.008434232112483267, "grad_norm": 0.00018243804515805095, "learning_rate": 0.0001679152110224707, "loss": 46.0, "step": 419 }, { "epoch": 0.008454361544732631, "grad_norm": 0.00032836064929142594, "learning_rate": 0.00016776716294488099, "loss": 46.0, "step": 420 }, { "epoch": 0.008474490976981995, "grad_norm": 0.00017685361672192812, "learning_rate": 0.00016761883968107775, "loss": 46.0, "step": 421 }, { "epoch": 0.008494620409231358, "grad_norm": 0.0002569703501649201, "learning_rate": 0.0001674702418333663, "loss": 46.0, "step": 422 }, { "epoch": 0.00851474984148072, "grad_norm": 8.305059600388631e-05, "learning_rate": 0.00016732137000516684, "loss": 46.0, "step": 423 }, { "epoch": 0.008534879273730084, "grad_norm": 0.0002819601504597813, "learning_rate": 0.00016717222480101221, "loss": 46.0, "step": 424 }, { "epoch": 0.008555008705979448, "grad_norm": 0.00020890127052552998, "learning_rate": 0.00016702280682654542, "loss": 46.0, "step": 425 }, { "epoch": 0.008575138138228812, "grad_norm": 0.00015081673336680979, "learning_rate": 0.00016687311668851703, "loss": 46.0, "step": 426 }, { "epoch": 0.008595267570478175, "grad_norm": 0.00014410317817237228, "learning_rate": 0.0001667231549947828, "loss": 46.0, "step": 427 }, { "epoch": 0.008615397002727537, "grad_norm": 0.0002492456405889243, "learning_rate": 0.00016657292235430126, "loss": 46.0, "step": 428 }, { "epoch": 0.008635526434976901, "grad_norm": 0.0001906536053866148, "learning_rate": 0.0001664224193771312, "loss": 46.0, "step": 429 }, { "epoch": 0.008655655867226265, "grad_norm": 0.00024850506451912224, "learning_rate": 0.0001662716466744291, "loss": 46.0, "step": 430 }, { "epoch": 0.008675785299475628, "grad_norm": 0.00013132646563462913, "learning_rate": 0.0001661206048584468, "loss": 46.0, "step": 431 }, { "epoch": 0.008695914731724992, "grad_norm": 0.00026154195074923337, "learning_rate": 0.00016596929454252895, "loss": 46.0, "step": 432 }, { "epoch": 0.008716044163974356, "grad_norm": 0.00015794049249961972, "learning_rate": 0.0001658177163411105, "loss": 46.0, "step": 433 }, { "epoch": 0.008736173596223718, "grad_norm": 0.00023368936672341079, "learning_rate": 0.00016566587086971416, "loss": 46.0, "step": 434 }, { "epoch": 0.008756303028473082, "grad_norm": 0.0002248157252324745, "learning_rate": 0.00016551375874494805, "loss": 46.0, "step": 435 }, { "epoch": 0.008776432460722445, "grad_norm": 0.0003579051699489355, "learning_rate": 0.00016536138058450309, "loss": 46.0, "step": 436 }, { "epoch": 0.008796561892971809, "grad_norm": 0.00031197903444990516, "learning_rate": 0.00016520873700715045, "loss": 46.0, "step": 437 }, { "epoch": 0.008816691325221173, "grad_norm": 0.0001378994493279606, "learning_rate": 0.0001650558286327391, "loss": 46.0, "step": 438 }, { "epoch": 0.008836820757470536, "grad_norm": 0.0001752666721586138, "learning_rate": 0.0001649026560821934, "loss": 46.0, "step": 439 }, { "epoch": 0.008856950189719898, "grad_norm": 0.00017502061382401735, "learning_rate": 0.0001647492199775103, "loss": 46.0, "step": 440 }, { "epoch": 0.008877079621969262, "grad_norm": 0.00026857954799197614, "learning_rate": 0.0001645955209417571, "loss": 46.0, "step": 441 }, { "epoch": 0.008897209054218626, "grad_norm": 0.0002821139642037451, "learning_rate": 0.00016444155959906875, "loss": 46.0, "step": 442 }, { "epoch": 0.00891733848646799, "grad_norm": 0.00018881195865105838, "learning_rate": 0.0001642873365746454, "loss": 46.0, "step": 443 }, { "epoch": 0.008937467918717353, "grad_norm": 0.00030518523999489844, "learning_rate": 0.00016413285249474975, "loss": 46.0, "step": 444 }, { "epoch": 0.008957597350966715, "grad_norm": 0.00015108012303244323, "learning_rate": 0.0001639781079867047, "loss": 46.0, "step": 445 }, { "epoch": 0.008977726783216079, "grad_norm": 0.00034121968201361597, "learning_rate": 0.0001638231036788906, "loss": 46.0, "step": 446 }, { "epoch": 0.008997856215465443, "grad_norm": 0.00013062897778581828, "learning_rate": 0.00016366784020074282, "loss": 46.0, "step": 447 }, { "epoch": 0.009017985647714806, "grad_norm": 0.00035385601222515106, "learning_rate": 0.0001635123181827491, "loss": 46.0, "step": 448 }, { "epoch": 0.00903811507996417, "grad_norm": 0.00014690958778373897, "learning_rate": 0.00016335653825644717, "loss": 46.0, "step": 449 }, { "epoch": 0.009058244512213534, "grad_norm": 0.0001020775962388143, "learning_rate": 0.00016320050105442192, "loss": 46.0, "step": 450 }, { "epoch": 0.009078373944462896, "grad_norm": 0.00028834721888415515, "learning_rate": 0.00016304420721030308, "loss": 46.0, "step": 451 }, { "epoch": 0.00909850337671226, "grad_norm": 0.00015054053801577538, "learning_rate": 0.00016288765735876254, "loss": 46.0, "step": 452 }, { "epoch": 0.009118632808961623, "grad_norm": 0.0002609645889606327, "learning_rate": 0.00016273085213551166, "loss": 46.0, "step": 453 }, { "epoch": 0.009138762241210987, "grad_norm": 0.00016032745770644397, "learning_rate": 0.00016257379217729897, "loss": 46.0, "step": 454 }, { "epoch": 0.00915889167346035, "grad_norm": 0.00017535104416310787, "learning_rate": 0.00016241647812190724, "loss": 46.0, "step": 455 }, { "epoch": 0.009179021105709714, "grad_norm": 0.00018267772975377738, "learning_rate": 0.00016225891060815128, "loss": 46.0, "step": 456 }, { "epoch": 0.009199150537959076, "grad_norm": 0.00029334655846469104, "learning_rate": 0.00016210109027587494, "loss": 46.0, "step": 457 }, { "epoch": 0.00921927997020844, "grad_norm": 0.000231573183555156, "learning_rate": 0.00016194301776594876, "loss": 46.0, "step": 458 }, { "epoch": 0.009239409402457804, "grad_norm": 0.0002635002601891756, "learning_rate": 0.0001617846937202674, "loss": 46.0, "step": 459 }, { "epoch": 0.009259538834707167, "grad_norm": 0.00032035779440775514, "learning_rate": 0.00016162611878174678, "loss": 46.0, "step": 460 }, { "epoch": 0.009279668266956531, "grad_norm": 0.00026974373031407595, "learning_rate": 0.00016146729359432183, "loss": 46.0, "step": 461 }, { "epoch": 0.009299797699205893, "grad_norm": 0.0001251360954483971, "learning_rate": 0.00016130821880294354, "loss": 46.0, "step": 462 }, { "epoch": 0.009319927131455257, "grad_norm": 0.00034513213904574513, "learning_rate": 0.00016114889505357654, "loss": 46.0, "step": 463 }, { "epoch": 0.00934005656370462, "grad_norm": 0.00021668773842975497, "learning_rate": 0.00016098932299319642, "loss": 46.0, "step": 464 }, { "epoch": 0.009360185995953984, "grad_norm": 0.00024054896493908018, "learning_rate": 0.00016082950326978707, "loss": 46.0, "step": 465 }, { "epoch": 0.009380315428203348, "grad_norm": 0.00023018120555207133, "learning_rate": 0.00016066943653233808, "loss": 46.0, "step": 466 }, { "epoch": 0.009400444860452712, "grad_norm": 0.000292058102786541, "learning_rate": 0.00016050912343084216, "loss": 46.0, "step": 467 }, { "epoch": 0.009420574292702074, "grad_norm": 0.00015946109488140792, "learning_rate": 0.0001603485646162924, "loss": 46.0, "step": 468 }, { "epoch": 0.009440703724951437, "grad_norm": 0.0003883461467921734, "learning_rate": 0.00016018776074067965, "loss": 46.0, "step": 469 }, { "epoch": 0.009460833157200801, "grad_norm": 0.00025555226602591574, "learning_rate": 0.00016002671245698999, "loss": 46.0, "step": 470 }, { "epoch": 0.009480962589450165, "grad_norm": 0.0003913817636203021, "learning_rate": 0.00015986542041920184, "loss": 46.0, "step": 471 }, { "epoch": 0.009501092021699529, "grad_norm": 0.00036681946949101985, "learning_rate": 0.00015970388528228354, "loss": 46.0, "step": 472 }, { "epoch": 0.009521221453948892, "grad_norm": 0.00019019895989913493, "learning_rate": 0.00015954210770219063, "loss": 46.0, "step": 473 }, { "epoch": 0.009541350886198254, "grad_norm": 0.00026755756698548794, "learning_rate": 0.00015938008833586307, "loss": 46.0, "step": 474 }, { "epoch": 0.009561480318447618, "grad_norm": 0.0002743021759670228, "learning_rate": 0.00015921782784122273, "loss": 46.0, "step": 475 }, { "epoch": 0.009581609750696982, "grad_norm": 0.0004780637682415545, "learning_rate": 0.00015905532687717053, "loss": 46.0, "step": 476 }, { "epoch": 0.009601739182946345, "grad_norm": 0.00016790846711955965, "learning_rate": 0.00015889258610358398, "loss": 46.0, "step": 477 }, { "epoch": 0.009621868615195709, "grad_norm": 0.0003423172456678003, "learning_rate": 0.00015872960618131443, "loss": 46.0, "step": 478 }, { "epoch": 0.009641998047445071, "grad_norm": 0.0004558518703561276, "learning_rate": 0.00015856638777218422, "loss": 46.0, "step": 479 }, { "epoch": 0.009662127479694435, "grad_norm": 0.0003485089691821486, "learning_rate": 0.00015840293153898428, "loss": 46.0, "step": 480 }, { "epoch": 0.009682256911943798, "grad_norm": 0.0005973344668745995, "learning_rate": 0.00015823923814547116, "loss": 46.0, "step": 481 }, { "epoch": 0.009702386344193162, "grad_norm": 0.0003374523075763136, "learning_rate": 0.0001580753082563645, "loss": 46.0, "step": 482 }, { "epoch": 0.009722515776442526, "grad_norm": 0.0004212489875499159, "learning_rate": 0.00015791114253734437, "loss": 46.0, "step": 483 }, { "epoch": 0.00974264520869189, "grad_norm": 0.0002458385133650154, "learning_rate": 0.0001577467416550484, "loss": 46.0, "step": 484 }, { "epoch": 0.009762774640941252, "grad_norm": 0.0006288140430115163, "learning_rate": 0.00015758210627706917, "loss": 46.0, "step": 485 }, { "epoch": 0.009782904073190615, "grad_norm": 0.0004315089900046587, "learning_rate": 0.0001574172370719515, "loss": 46.0, "step": 486 }, { "epoch": 0.009803033505439979, "grad_norm": 0.00017863186076283455, "learning_rate": 0.00015725213470918977, "loss": 46.0, "step": 487 }, { "epoch": 0.009823162937689343, "grad_norm": 0.00017475250933784992, "learning_rate": 0.0001570867998592251, "loss": 46.0, "step": 488 }, { "epoch": 0.009843292369938706, "grad_norm": 0.0007568314322270453, "learning_rate": 0.00015692123319344272, "loss": 46.0, "step": 489 }, { "epoch": 0.00986342180218807, "grad_norm": 0.00021712924353778362, "learning_rate": 0.00015675543538416916, "loss": 46.0, "step": 490 }, { "epoch": 0.009883551234437432, "grad_norm": 0.0002802063536364585, "learning_rate": 0.00015658940710466964, "loss": 46.0, "step": 491 }, { "epoch": 0.009903680666686796, "grad_norm": 0.0004394160059746355, "learning_rate": 0.0001564231490291452, "loss": 46.0, "step": 492 }, { "epoch": 0.00992381009893616, "grad_norm": 0.0002763732336461544, "learning_rate": 0.0001562566618327301, "loss": 46.0, "step": 493 }, { "epoch": 0.009943939531185523, "grad_norm": 0.0002444623096380383, "learning_rate": 0.00015608994619148886, "loss": 46.0, "step": 494 }, { "epoch": 0.009964068963434887, "grad_norm": 0.0003327823942527175, "learning_rate": 0.00015592300278241384, "loss": 46.0, "step": 495 }, { "epoch": 0.009984198395684249, "grad_norm": 0.00026393041480332613, "learning_rate": 0.00015575583228342226, "loss": 46.0, "step": 496 }, { "epoch": 0.010004327827933613, "grad_norm": 0.00029584183357656, "learning_rate": 0.00015558843537335338, "loss": 46.0, "step": 497 }, { "epoch": 0.010024457260182976, "grad_norm": 0.0006626403192058206, "learning_rate": 0.00015542081273196598, "loss": 46.0, "step": 498 }, { "epoch": 0.01004458669243234, "grad_norm": 0.0007279766141436994, "learning_rate": 0.00015525296503993548, "loss": 46.0, "step": 499 }, { "epoch": 0.010064716124681704, "grad_norm": 0.0004254883388057351, "learning_rate": 0.0001550848929788511, "loss": 46.0, "step": 500 }, { "epoch": 0.010084845556931068, "grad_norm": 0.0004729441716335714, "learning_rate": 0.00015491659723121325, "loss": 46.0, "step": 501 }, { "epoch": 0.01010497498918043, "grad_norm": 0.00022078775509726256, "learning_rate": 0.0001547480784804306, "loss": 46.0, "step": 502 }, { "epoch": 0.010125104421429793, "grad_norm": 0.0003951303951907903, "learning_rate": 0.00015457933741081745, "loss": 46.0, "step": 503 }, { "epoch": 0.010145233853679157, "grad_norm": 0.00033989755320362747, "learning_rate": 0.0001544103747075909, "loss": 46.0, "step": 504 }, { "epoch": 0.01016536328592852, "grad_norm": 0.0005033800262026489, "learning_rate": 0.00015424119105686792, "loss": 46.0, "step": 505 }, { "epoch": 0.010185492718177884, "grad_norm": 0.0004154644557274878, "learning_rate": 0.00015407178714566287, "loss": 46.0, "step": 506 }, { "epoch": 0.010205622150427248, "grad_norm": 0.00023219191643875092, "learning_rate": 0.0001539021636618844, "loss": 46.0, "step": 507 }, { "epoch": 0.01022575158267661, "grad_norm": 0.0003007667255587876, "learning_rate": 0.0001537323212943328, "loss": 46.0, "step": 508 }, { "epoch": 0.010245881014925974, "grad_norm": 0.00034918496385216713, "learning_rate": 0.00015356226073269736, "loss": 46.0, "step": 509 }, { "epoch": 0.010266010447175337, "grad_norm": 0.00021761894458904862, "learning_rate": 0.00015339198266755316, "loss": 46.0, "step": 510 }, { "epoch": 0.010286139879424701, "grad_norm": 0.0007356986752711236, "learning_rate": 0.00015322148779035869, "loss": 46.0, "step": 511 }, { "epoch": 0.010306269311674065, "grad_norm": 0.000666945765260607, "learning_rate": 0.00015305077679345276, "loss": 46.0, "step": 512 }, { "epoch": 0.010326398743923427, "grad_norm": 0.0003056855348404497, "learning_rate": 0.00015287985037005182, "loss": 46.0, "step": 513 }, { "epoch": 0.01034652817617279, "grad_norm": 0.0006946488283574581, "learning_rate": 0.00015270870921424721, "loss": 46.0, "step": 514 }, { "epoch": 0.010366657608422154, "grad_norm": 0.0005418279324658215, "learning_rate": 0.0001525373540210021, "loss": 46.0, "step": 515 }, { "epoch": 0.010386787040671518, "grad_norm": 0.00027551245875656605, "learning_rate": 0.00015236578548614887, "loss": 46.0, "step": 516 }, { "epoch": 0.010406916472920882, "grad_norm": 0.0004473023291211575, "learning_rate": 0.0001521940043063863, "loss": 46.0, "step": 517 }, { "epoch": 0.010427045905170245, "grad_norm": 0.0005578941782005131, "learning_rate": 0.00015202201117927656, "loss": 46.0, "step": 518 }, { "epoch": 0.010447175337419607, "grad_norm": 0.00012505419726949185, "learning_rate": 0.00015184980680324248, "loss": 46.0, "step": 519 }, { "epoch": 0.010467304769668971, "grad_norm": 0.00039335055043920875, "learning_rate": 0.00015167739187756487, "loss": 46.0, "step": 520 }, { "epoch": 0.010487434201918335, "grad_norm": 0.00027171571855433285, "learning_rate": 0.0001515047671023794, "loss": 46.0, "step": 521 }, { "epoch": 0.010507563634167699, "grad_norm": 0.0005522433784790337, "learning_rate": 0.00015133193317867392, "loss": 46.0, "step": 522 }, { "epoch": 0.010527693066417062, "grad_norm": 0.00027844231226481497, "learning_rate": 0.00015115889080828557, "loss": 46.0, "step": 523 }, { "epoch": 0.010547822498666426, "grad_norm": 0.0007594394846819341, "learning_rate": 0.000150985640693898, "loss": 46.0, "step": 524 }, { "epoch": 0.010567951930915788, "grad_norm": 0.00045375648187473416, "learning_rate": 0.00015081218353903838, "loss": 46.0, "step": 525 }, { "epoch": 0.010588081363165152, "grad_norm": 0.0003794842632487416, "learning_rate": 0.0001506385200480747, "loss": 46.0, "step": 526 }, { "epoch": 0.010608210795414515, "grad_norm": 0.0006085368804633617, "learning_rate": 0.00015046465092621278, "loss": 46.0, "step": 527 }, { "epoch": 0.010628340227663879, "grad_norm": 0.00021481052681338042, "learning_rate": 0.00015029057687949347, "loss": 46.0, "step": 528 }, { "epoch": 0.010648469659913243, "grad_norm": 0.0003222424420528114, "learning_rate": 0.0001501162986147897, "loss": 46.0, "step": 529 }, { "epoch": 0.010668599092162605, "grad_norm": 0.0006585840019397438, "learning_rate": 0.00014994181683980387, "loss": 46.0, "step": 530 }, { "epoch": 0.010688728524411968, "grad_norm": 0.0006019362481310964, "learning_rate": 0.00014976713226306457, "loss": 46.0, "step": 531 }, { "epoch": 0.010708857956661332, "grad_norm": 0.0005435794009827077, "learning_rate": 0.00014959224559392406, "loss": 46.0, "step": 532 }, { "epoch": 0.010728987388910696, "grad_norm": 0.0007174118072725832, "learning_rate": 0.00014941715754255522, "loss": 46.0, "step": 533 }, { "epoch": 0.01074911682116006, "grad_norm": 0.001002269797027111, "learning_rate": 0.00014924186881994867, "loss": 46.0, "step": 534 }, { "epoch": 0.010769246253409423, "grad_norm": 0.0002345768007216975, "learning_rate": 0.0001490663801379099, "loss": 46.0, "step": 535 }, { "epoch": 0.010789375685658785, "grad_norm": 0.0002722369390539825, "learning_rate": 0.00014889069220905637, "loss": 46.0, "step": 536 }, { "epoch": 0.010809505117908149, "grad_norm": 0.0003452429664321244, "learning_rate": 0.00014871480574681477, "loss": 46.0, "step": 537 }, { "epoch": 0.010829634550157513, "grad_norm": 0.000284095061942935, "learning_rate": 0.0001485387214654178, "loss": 46.0, "step": 538 }, { "epoch": 0.010849763982406876, "grad_norm": 0.0008951055933721364, "learning_rate": 0.00014836244007990156, "loss": 46.0, "step": 539 }, { "epoch": 0.01086989341465624, "grad_norm": 0.0003389440244063735, "learning_rate": 0.00014818596230610254, "loss": 46.0, "step": 540 }, { "epoch": 0.010890022846905604, "grad_norm": 0.0007038054754957557, "learning_rate": 0.0001480092888606547, "loss": 46.0, "step": 541 }, { "epoch": 0.010910152279154966, "grad_norm": 0.0007687349570915103, "learning_rate": 0.00014783242046098653, "loss": 46.0, "step": 542 }, { "epoch": 0.01093028171140433, "grad_norm": 0.00043066966463811696, "learning_rate": 0.00014765535782531832, "loss": 46.0, "step": 543 }, { "epoch": 0.010950411143653693, "grad_norm": 0.0010278800036758184, "learning_rate": 0.00014747810167265894, "loss": 46.0, "step": 544 }, { "epoch": 0.010970540575903057, "grad_norm": 0.0003619072958827019, "learning_rate": 0.0001473006527228032, "loss": 46.0, "step": 545 }, { "epoch": 0.01099067000815242, "grad_norm": 0.0005286497180350125, "learning_rate": 0.0001471230116963287, "loss": 46.0, "step": 546 }, { "epoch": 0.011010799440401783, "grad_norm": 0.00021179212490096688, "learning_rate": 0.00014694517931459317, "loss": 46.0, "step": 547 }, { "epoch": 0.011030928872651146, "grad_norm": 0.0006336824735626578, "learning_rate": 0.0001467671562997313, "loss": 46.0, "step": 548 }, { "epoch": 0.01105105830490051, "grad_norm": 0.00037215143674984574, "learning_rate": 0.00014658894337465187, "loss": 46.0, "step": 549 }, { "epoch": 0.011071187737149874, "grad_norm": 0.0005753418663516641, "learning_rate": 0.0001464105412630349, "loss": 46.0, "step": 550 }, { "epoch": 0.011091317169399238, "grad_norm": 0.0005321545177139342, "learning_rate": 0.0001462319506893286, "loss": 46.0, "step": 551 }, { "epoch": 0.011111446601648601, "grad_norm": 0.0005020481185056269, "learning_rate": 0.00014605317237874655, "loss": 46.0, "step": 552 }, { "epoch": 0.011131576033897963, "grad_norm": 0.0007133895414881408, "learning_rate": 0.00014587420705726458, "loss": 46.0, "step": 553 }, { "epoch": 0.011151705466147327, "grad_norm": 0.0005411367164924741, "learning_rate": 0.000145695055451618, "loss": 46.0, "step": 554 }, { "epoch": 0.01117183489839669, "grad_norm": 0.00042524756281636655, "learning_rate": 0.0001455157182892986, "loss": 46.0, "step": 555 }, { "epoch": 0.011191964330646054, "grad_norm": 0.0009429487981833518, "learning_rate": 0.00014533619629855158, "loss": 46.0, "step": 556 }, { "epoch": 0.011212093762895418, "grad_norm": 0.0002087266038870439, "learning_rate": 0.00014515649020837277, "loss": 46.0, "step": 557 }, { "epoch": 0.011232223195144782, "grad_norm": 0.0004085498512722552, "learning_rate": 0.00014497660074850552, "loss": 46.0, "step": 558 }, { "epoch": 0.011252352627394144, "grad_norm": 0.0005626246565952897, "learning_rate": 0.00014479652864943788, "loss": 46.0, "step": 559 }, { "epoch": 0.011272482059643507, "grad_norm": 0.00035937741631641984, "learning_rate": 0.00014461627464239948, "loss": 46.0, "step": 560 }, { "epoch": 0.011292611491892871, "grad_norm": 0.0006745000719092786, "learning_rate": 0.0001444358394593586, "loss": 46.0, "step": 561 }, { "epoch": 0.011312740924142235, "grad_norm": 0.0004202695854473859, "learning_rate": 0.0001442552238330194, "loss": 46.0, "step": 562 }, { "epoch": 0.011332870356391599, "grad_norm": 0.0005303717334754765, "learning_rate": 0.0001440744284968186, "loss": 46.0, "step": 563 }, { "epoch": 0.01135299978864096, "grad_norm": 0.001384332892484963, "learning_rate": 0.00014389345418492272, "loss": 46.0, "step": 564 }, { "epoch": 0.011373129220890324, "grad_norm": 0.0014389336574822664, "learning_rate": 0.00014371230163222516, "loss": 46.0, "step": 565 }, { "epoch": 0.011393258653139688, "grad_norm": 0.0007280406425707042, "learning_rate": 0.00014353097157434298, "loss": 46.0, "step": 566 }, { "epoch": 0.011413388085389052, "grad_norm": 0.0007825639913789928, "learning_rate": 0.00014334946474761412, "loss": 46.0, "step": 567 }, { "epoch": 0.011433517517638415, "grad_norm": 0.0011532383505254984, "learning_rate": 0.0001431677818890943, "loss": 46.0, "step": 568 }, { "epoch": 0.01145364694988778, "grad_norm": 0.00037170344148762524, "learning_rate": 0.00014298592373655414, "loss": 46.0, "step": 569 }, { "epoch": 0.011473776382137141, "grad_norm": 0.0004953424213454127, "learning_rate": 0.00014280389102847596, "loss": 46.0, "step": 570 }, { "epoch": 0.011493905814386505, "grad_norm": 0.00048051271005533636, "learning_rate": 0.000142621684504051, "loss": 46.0, "step": 571 }, { "epoch": 0.011514035246635869, "grad_norm": 0.0007658221293240786, "learning_rate": 0.0001424393049031763, "loss": 46.0, "step": 572 }, { "epoch": 0.011534164678885232, "grad_norm": 0.00040762132266536355, "learning_rate": 0.00014225675296645178, "loss": 46.0, "step": 573 }, { "epoch": 0.011554294111134596, "grad_norm": 0.00042765153921209276, "learning_rate": 0.00014207402943517707, "loss": 46.0, "step": 574 }, { "epoch": 0.01157442354338396, "grad_norm": 0.0006899941363371909, "learning_rate": 0.00014189113505134866, "loss": 46.0, "step": 575 }, { "epoch": 0.011594552975633322, "grad_norm": 0.0007566651329398155, "learning_rate": 0.00014170807055765682, "loss": 46.0, "step": 576 }, { "epoch": 0.011614682407882685, "grad_norm": 0.0007781738531775773, "learning_rate": 0.0001415248366974826, "loss": 46.0, "step": 577 }, { "epoch": 0.011634811840132049, "grad_norm": 0.0006567966192960739, "learning_rate": 0.00014134143421489482, "loss": 46.0, "step": 578 }, { "epoch": 0.011654941272381413, "grad_norm": 0.0004726462357211858, "learning_rate": 0.00014115786385464704, "loss": 46.0, "step": 579 }, { "epoch": 0.011675070704630777, "grad_norm": 0.0003221970109734684, "learning_rate": 0.00014097412636217448, "loss": 46.0, "step": 580 }, { "epoch": 0.011695200136880139, "grad_norm": 0.0010498060146346688, "learning_rate": 0.00014079022248359113, "loss": 46.0, "step": 581 }, { "epoch": 0.011715329569129502, "grad_norm": 0.0005236300639808178, "learning_rate": 0.0001406061529656865, "loss": 46.0, "step": 582 }, { "epoch": 0.011735459001378866, "grad_norm": 0.0005190221127122641, "learning_rate": 0.00014042191855592284, "loss": 46.0, "step": 583 }, { "epoch": 0.01175558843362823, "grad_norm": 0.0009433837258256972, "learning_rate": 0.000140237520002432, "loss": 46.0, "step": 584 }, { "epoch": 0.011775717865877593, "grad_norm": 0.0005737603642046452, "learning_rate": 0.00014005295805401226, "loss": 46.0, "step": 585 }, { "epoch": 0.011795847298126957, "grad_norm": 0.0004122421960346401, "learning_rate": 0.00013986823346012552, "loss": 46.0, "step": 586 }, { "epoch": 0.011815976730376319, "grad_norm": 0.000715672445949167, "learning_rate": 0.00013968334697089406, "loss": 46.0, "step": 587 }, { "epoch": 0.011836106162625683, "grad_norm": 0.0009091185638681054, "learning_rate": 0.00013949829933709767, "loss": 46.0, "step": 588 }, { "epoch": 0.011856235594875046, "grad_norm": 0.0005809186259284616, "learning_rate": 0.00013931309131017046, "loss": 46.0, "step": 589 }, { "epoch": 0.01187636502712441, "grad_norm": 0.0011282520135864615, "learning_rate": 0.0001391277236421978, "loss": 46.0, "step": 590 }, { "epoch": 0.011896494459373774, "grad_norm": 0.0011749881086871028, "learning_rate": 0.0001389421970859134, "loss": 46.0, "step": 591 }, { "epoch": 0.011916623891623138, "grad_norm": 0.0011483165435492992, "learning_rate": 0.0001387565123946962, "loss": 46.0, "step": 592 }, { "epoch": 0.0119367533238725, "grad_norm": 0.0005833703908137977, "learning_rate": 0.0001385706703225672, "loss": 46.0, "step": 593 }, { "epoch": 0.011956882756121863, "grad_norm": 0.0004976568161509931, "learning_rate": 0.00013838467162418652, "loss": 46.0, "step": 594 }, { "epoch": 0.011977012188371227, "grad_norm": 0.0004910955904051661, "learning_rate": 0.00013819851705485035, "loss": 46.0, "step": 595 }, { "epoch": 0.01199714162062059, "grad_norm": 0.0005480287945829332, "learning_rate": 0.00013801220737048777, "loss": 46.0, "step": 596 }, { "epoch": 0.012017271052869954, "grad_norm": 0.0005976366810500622, "learning_rate": 0.0001378257433276578, "loss": 46.0, "step": 597 }, { "epoch": 0.012037400485119316, "grad_norm": 0.0009460100554861128, "learning_rate": 0.00013763912568354625, "loss": 46.0, "step": 598 }, { "epoch": 0.01205752991736868, "grad_norm": 0.00048293505096808076, "learning_rate": 0.00013745235519596263, "loss": 46.0, "step": 599 }, { "epoch": 0.012077659349618044, "grad_norm": 0.0014719015453010798, "learning_rate": 0.00013726543262333721, "loss": 46.0, "step": 600 }, { "epoch": 0.012097788781867408, "grad_norm": 0.000909750466234982, "learning_rate": 0.00013707835872471771, "loss": 46.0, "step": 601 }, { "epoch": 0.012117918214116771, "grad_norm": 0.0008785554673522711, "learning_rate": 0.0001368911342597664, "loss": 46.0, "step": 602 }, { "epoch": 0.012138047646366135, "grad_norm": 0.0009704609983600676, "learning_rate": 0.00013670375998875708, "loss": 46.0, "step": 603 }, { "epoch": 0.012158177078615497, "grad_norm": 0.0004874320875387639, "learning_rate": 0.00013651623667257164, "loss": 46.0, "step": 604 }, { "epoch": 0.01217830651086486, "grad_norm": 0.0003640170325525105, "learning_rate": 0.00013632856507269744, "loss": 46.0, "step": 605 }, { "epoch": 0.012198435943114224, "grad_norm": 0.00045160859008319676, "learning_rate": 0.00013614074595122387, "loss": 46.0, "step": 606 }, { "epoch": 0.012218565375363588, "grad_norm": 0.0012795224320143461, "learning_rate": 0.00013595278007083933, "loss": 46.0, "step": 607 }, { "epoch": 0.012238694807612952, "grad_norm": 0.0006611685384996235, "learning_rate": 0.00013576466819482832, "loss": 46.0, "step": 608 }, { "epoch": 0.012258824239862316, "grad_norm": 0.001468715607188642, "learning_rate": 0.000135576411087068, "loss": 46.0, "step": 609 }, { "epoch": 0.012278953672111678, "grad_norm": 0.0008079329272732139, "learning_rate": 0.00013538800951202546, "loss": 46.0, "step": 610 }, { "epoch": 0.012299083104361041, "grad_norm": 0.0014699992025271058, "learning_rate": 0.0001351994642347543, "loss": 46.0, "step": 611 }, { "epoch": 0.012319212536610405, "grad_norm": 0.000965460145380348, "learning_rate": 0.0001350107760208918, "loss": 46.0, "step": 612 }, { "epoch": 0.012339341968859769, "grad_norm": 0.0008890178869478405, "learning_rate": 0.00013482194563665554, "loss": 46.0, "step": 613 }, { "epoch": 0.012359471401109132, "grad_norm": 0.0003420621796976775, "learning_rate": 0.00013463297384884047, "loss": 46.0, "step": 614 }, { "epoch": 0.012379600833358494, "grad_norm": 0.0015883035957813263, "learning_rate": 0.00013444386142481574, "loss": 46.0, "step": 615 }, { "epoch": 0.012399730265607858, "grad_norm": 0.0009616951574571431, "learning_rate": 0.00013425460913252165, "loss": 46.0, "step": 616 }, { "epoch": 0.012419859697857222, "grad_norm": 0.0015981622273102403, "learning_rate": 0.00013406521774046636, "loss": 46.0, "step": 617 }, { "epoch": 0.012439989130106585, "grad_norm": 0.0006598389591090381, "learning_rate": 0.000133875688017723, "loss": 46.0, "step": 618 }, { "epoch": 0.01246011856235595, "grad_norm": 0.0010206311708316207, "learning_rate": 0.00013368602073392626, "loss": 46.0, "step": 619 }, { "epoch": 0.012480247994605313, "grad_norm": 0.0010332156671211123, "learning_rate": 0.00013349621665926966, "loss": 46.0, "step": 620 }, { "epoch": 0.012500377426854675, "grad_norm": 0.0005513799260370433, "learning_rate": 0.00013330627656450199, "loss": 46.0, "step": 621 }, { "epoch": 0.012520506859104039, "grad_norm": 0.0005332003347575665, "learning_rate": 0.00013311620122092454, "loss": 46.0, "step": 622 }, { "epoch": 0.012540636291353402, "grad_norm": 0.0011620650766417384, "learning_rate": 0.0001329259914003877, "loss": 46.0, "step": 623 }, { "epoch": 0.012560765723602766, "grad_norm": 0.0013277794932946563, "learning_rate": 0.00013273564787528796, "loss": 46.0, "step": 624 }, { "epoch": 0.01258089515585213, "grad_norm": 0.0003230631700716913, "learning_rate": 0.00013254517141856483, "loss": 46.0, "step": 625 }, { "epoch": 0.012601024588101493, "grad_norm": 0.0017639078432694077, "learning_rate": 0.00013235456280369753, "loss": 46.0, "step": 626 }, { "epoch": 0.012621154020350855, "grad_norm": 0.0006892398814670742, "learning_rate": 0.000132163822804702, "loss": 46.0, "step": 627 }, { "epoch": 0.01264128345260022, "grad_norm": 0.0012958202278241515, "learning_rate": 0.00013197295219612767, "loss": 46.0, "step": 628 }, { "epoch": 0.012661412884849583, "grad_norm": 0.00060327781829983, "learning_rate": 0.00013178195175305438, "loss": 46.0, "step": 629 }, { "epoch": 0.012681542317098947, "grad_norm": 0.002238104585558176, "learning_rate": 0.0001315908222510891, "loss": 46.0, "step": 630 }, { "epoch": 0.01270167174934831, "grad_norm": 0.0008538436959497631, "learning_rate": 0.00013139956446636304, "loss": 46.0, "step": 631 }, { "epoch": 0.012721801181597672, "grad_norm": 0.0005125590832903981, "learning_rate": 0.00013120817917552816, "loss": 46.0, "step": 632 }, { "epoch": 0.012741930613847036, "grad_norm": 0.0015798620879650116, "learning_rate": 0.00013101666715575435, "loss": 46.0, "step": 633 }, { "epoch": 0.0127620600460964, "grad_norm": 0.0008807751582935452, "learning_rate": 0.000130825029184726, "loss": 46.0, "step": 634 }, { "epoch": 0.012782189478345763, "grad_norm": 0.0018555463757365942, "learning_rate": 0.00013063326604063896, "loss": 46.0, "step": 635 }, { "epoch": 0.012802318910595127, "grad_norm": 0.0017406801925972104, "learning_rate": 0.0001304413785021975, "loss": 46.0, "step": 636 }, { "epoch": 0.01282244834284449, "grad_norm": 0.0007575178751721978, "learning_rate": 0.00013024936734861087, "loss": 46.0, "step": 637 }, { "epoch": 0.012842577775093853, "grad_norm": 0.000731868261937052, "learning_rate": 0.0001300572333595904, "loss": 46.0, "step": 638 }, { "epoch": 0.012862707207343217, "grad_norm": 0.0005787058616988361, "learning_rate": 0.00012986497731534618, "loss": 46.0, "step": 639 }, { "epoch": 0.01288283663959258, "grad_norm": 0.0014929514145478606, "learning_rate": 0.00012967259999658402, "loss": 46.0, "step": 640 }, { "epoch": 0.012902966071841944, "grad_norm": 0.0007031034911051393, "learning_rate": 0.00012948010218450198, "loss": 46.0, "step": 641 }, { "epoch": 0.012923095504091308, "grad_norm": 0.0007022300269454718, "learning_rate": 0.00012928748466078767, "loss": 46.0, "step": 642 }, { "epoch": 0.012943224936340671, "grad_norm": 0.0007273477385751903, "learning_rate": 0.00012909474820761463, "loss": 46.0, "step": 643 }, { "epoch": 0.012963354368590033, "grad_norm": 0.0007245743181556463, "learning_rate": 0.0001289018936076395, "loss": 46.0, "step": 644 }, { "epoch": 0.012983483800839397, "grad_norm": 0.0007635979563929141, "learning_rate": 0.00012870892164399856, "loss": 46.0, "step": 645 }, { "epoch": 0.01300361323308876, "grad_norm": 0.0019391605164855719, "learning_rate": 0.00012851583310030467, "loss": 46.0, "step": 646 }, { "epoch": 0.013023742665338124, "grad_norm": 0.0011577141704037786, "learning_rate": 0.00012832262876064427, "loss": 46.0, "step": 647 }, { "epoch": 0.013043872097587488, "grad_norm": 0.0006742352270521224, "learning_rate": 0.00012812930940957386, "loss": 46.0, "step": 648 }, { "epoch": 0.01306400152983685, "grad_norm": 0.001967653399333358, "learning_rate": 0.00012793587583211693, "loss": 46.0, "step": 649 }, { "epoch": 0.013084130962086214, "grad_norm": 0.001183089567348361, "learning_rate": 0.000127742328813761, "loss": 46.0, "step": 650 }, { "epoch": 0.013104260394335578, "grad_norm": 0.000578385079279542, "learning_rate": 0.00012754866914045402, "loss": 46.0, "step": 651 }, { "epoch": 0.013124389826584941, "grad_norm": 0.0018523032777011395, "learning_rate": 0.00012735489759860166, "loss": 46.0, "step": 652 }, { "epoch": 0.013144519258834305, "grad_norm": 0.0011416682973504066, "learning_rate": 0.00012716101497506365, "loss": 46.0, "step": 653 }, { "epoch": 0.013164648691083669, "grad_norm": 0.0004267425974830985, "learning_rate": 0.00012696702205715088, "loss": 46.0, "step": 654 }, { "epoch": 0.01318477812333303, "grad_norm": 0.0008205072954297066, "learning_rate": 0.00012677291963262218, "loss": 46.0, "step": 655 }, { "epoch": 0.013204907555582394, "grad_norm": 0.0016005141660571098, "learning_rate": 0.00012657870848968092, "loss": 46.0, "step": 656 }, { "epoch": 0.013225036987831758, "grad_norm": 0.0009871599031612277, "learning_rate": 0.00012638438941697206, "loss": 46.0, "step": 657 }, { "epoch": 0.013245166420081122, "grad_norm": 0.0008003399707376957, "learning_rate": 0.00012618996320357877, "loss": 46.0, "step": 658 }, { "epoch": 0.013265295852330486, "grad_norm": 0.0006417233380489051, "learning_rate": 0.00012599543063901935, "loss": 46.0, "step": 659 }, { "epoch": 0.01328542528457985, "grad_norm": 0.001283377525396645, "learning_rate": 0.00012580079251324394, "loss": 46.0, "step": 660 }, { "epoch": 0.013305554716829211, "grad_norm": 0.0003393135848455131, "learning_rate": 0.00012560604961663128, "loss": 46.0, "step": 661 }, { "epoch": 0.013325684149078575, "grad_norm": 0.0011401561787351966, "learning_rate": 0.0001254112027399857, "loss": 46.0, "step": 662 }, { "epoch": 0.013345813581327939, "grad_norm": 0.000948408676777035, "learning_rate": 0.0001252162526745337, "loss": 46.0, "step": 663 }, { "epoch": 0.013365943013577302, "grad_norm": 0.0010545322438701987, "learning_rate": 0.0001250212002119207, "loss": 46.0, "step": 664 }, { "epoch": 0.013386072445826666, "grad_norm": 0.0008792767766863108, "learning_rate": 0.00012482604614420806, "loss": 46.0, "step": 665 }, { "epoch": 0.013406201878076028, "grad_norm": 0.0009220750071108341, "learning_rate": 0.0001246307912638697, "loss": 46.0, "step": 666 }, { "epoch": 0.013426331310325392, "grad_norm": 0.002156344009563327, "learning_rate": 0.0001244354363637889, "loss": 46.0, "step": 667 }, { "epoch": 0.013446460742574756, "grad_norm": 0.0012686087284237146, "learning_rate": 0.00012423998223725513, "loss": 46.0, "step": 668 }, { "epoch": 0.01346659017482412, "grad_norm": 0.0011429619044065475, "learning_rate": 0.00012404442967796077, "loss": 46.0, "step": 669 }, { "epoch": 0.013486719607073483, "grad_norm": 0.0009963412303477526, "learning_rate": 0.00012384877947999793, "loss": 46.0, "step": 670 }, { "epoch": 0.013506849039322847, "grad_norm": 0.0010601101676002145, "learning_rate": 0.00012365303243785513, "loss": 46.0, "step": 671 }, { "epoch": 0.013526978471572209, "grad_norm": 0.0007964776596054435, "learning_rate": 0.00012345718934641425, "loss": 46.0, "step": 672 }, { "epoch": 0.013547107903821572, "grad_norm": 0.0011618619319051504, "learning_rate": 0.00012326125100094716, "loss": 46.0, "step": 673 }, { "epoch": 0.013567237336070936, "grad_norm": 0.002301463857293129, "learning_rate": 0.0001230652181971126, "loss": 46.0, "step": 674 }, { "epoch": 0.0135873667683203, "grad_norm": 0.0009032952948473394, "learning_rate": 0.0001228690917309527, "loss": 46.0, "step": 675 }, { "epoch": 0.013607496200569663, "grad_norm": 0.001307567348703742, "learning_rate": 0.00012267287239889013, "loss": 46.0, "step": 676 }, { "epoch": 0.013627625632819027, "grad_norm": 0.0010554592590779066, "learning_rate": 0.0001224765609977246, "loss": 46.0, "step": 677 }, { "epoch": 0.01364775506506839, "grad_norm": 0.0008427874417975545, "learning_rate": 0.0001222801583246296, "loss": 46.0, "step": 678 }, { "epoch": 0.013667884497317753, "grad_norm": 0.0006029874202795327, "learning_rate": 0.00012208366517714946, "loss": 46.0, "step": 679 }, { "epoch": 0.013688013929567117, "grad_norm": 0.0012924791080877185, "learning_rate": 0.00012188708235319565, "loss": 46.0, "step": 680 }, { "epoch": 0.01370814336181648, "grad_norm": 0.00043431291123852134, "learning_rate": 0.00012169041065104401, "loss": 46.0, "step": 681 }, { "epoch": 0.013728272794065844, "grad_norm": 0.000775394553784281, "learning_rate": 0.00012149365086933115, "loss": 46.0, "step": 682 }, { "epoch": 0.013748402226315206, "grad_norm": 0.0011056979419663548, "learning_rate": 0.00012129680380705144, "loss": 46.0, "step": 683 }, { "epoch": 0.01376853165856457, "grad_norm": 0.0011312337592244148, "learning_rate": 0.0001210998702635536, "loss": 46.0, "step": 684 }, { "epoch": 0.013788661090813933, "grad_norm": 0.0010967912385240197, "learning_rate": 0.00012090285103853764, "loss": 46.0, "step": 685 }, { "epoch": 0.013808790523063297, "grad_norm": 0.002103852340951562, "learning_rate": 0.00012070574693205138, "loss": 46.0, "step": 686 }, { "epoch": 0.01382891995531266, "grad_norm": 0.0009759899112395942, "learning_rate": 0.00012050855874448737, "loss": 46.0, "step": 687 }, { "epoch": 0.013849049387562025, "grad_norm": 0.0012610235717147589, "learning_rate": 0.00012031128727657963, "loss": 46.0, "step": 688 }, { "epoch": 0.013869178819811387, "grad_norm": 0.0022020682226866484, "learning_rate": 0.0001201139333294003, "loss": 46.0, "step": 689 }, { "epoch": 0.01388930825206075, "grad_norm": 0.0011697233421728015, "learning_rate": 0.0001199164977043565, "loss": 46.0, "step": 690 }, { "epoch": 0.013909437684310114, "grad_norm": 0.001423744368366897, "learning_rate": 0.00011971898120318699, "loss": 46.0, "step": 691 }, { "epoch": 0.013929567116559478, "grad_norm": 0.001964231953024864, "learning_rate": 0.00011952138462795897, "loss": 46.0, "step": 692 }, { "epoch": 0.013949696548808841, "grad_norm": 0.001953084603883326, "learning_rate": 0.00011932370878106477, "loss": 46.0, "step": 693 }, { "epoch": 0.013969825981058205, "grad_norm": 0.0017078432720154524, "learning_rate": 0.00011912595446521868, "loss": 46.0, "step": 694 }, { "epoch": 0.013989955413307567, "grad_norm": 0.0008826723205856979, "learning_rate": 0.00011892812248345358, "loss": 46.0, "step": 695 }, { "epoch": 0.01401008484555693, "grad_norm": 0.00104327907320112, "learning_rate": 0.00011873021363911779, "loss": 46.0, "step": 696 }, { "epoch": 0.014030214277806295, "grad_norm": 0.001443845801986754, "learning_rate": 0.00011853222873587167, "loss": 46.0, "step": 697 }, { "epoch": 0.014050343710055658, "grad_norm": 0.0008737000171095133, "learning_rate": 0.00011833416857768447, "loss": 46.0, "step": 698 }, { "epoch": 0.014070473142305022, "grad_norm": 0.001212298753671348, "learning_rate": 0.00011813603396883108, "loss": 46.0, "step": 699 }, { "epoch": 0.014090602574554384, "grad_norm": 0.0007996526546776295, "learning_rate": 0.00011793782571388865, "loss": 46.0, "step": 700 }, { "epoch": 0.014110732006803748, "grad_norm": 0.0007419726462103426, "learning_rate": 0.00011773954461773344, "loss": 46.0, "step": 701 }, { "epoch": 0.014130861439053111, "grad_norm": 0.001537975505925715, "learning_rate": 0.00011754119148553746, "loss": 46.0, "step": 702 }, { "epoch": 0.014150990871302475, "grad_norm": 0.001321911346167326, "learning_rate": 0.00011734276712276528, "loss": 46.0, "step": 703 }, { "epoch": 0.014171120303551839, "grad_norm": 0.0020747899543493986, "learning_rate": 0.00011714427233517069, "loss": 46.0, "step": 704 }, { "epoch": 0.014191249735801202, "grad_norm": 0.0013918217737227678, "learning_rate": 0.00011694570792879345, "loss": 46.0, "step": 705 }, { "epoch": 0.014211379168050564, "grad_norm": 0.0008428136934526265, "learning_rate": 0.00011674707470995608, "loss": 46.0, "step": 706 }, { "epoch": 0.014231508600299928, "grad_norm": 0.0007644314900971949, "learning_rate": 0.00011654837348526044, "loss": 46.0, "step": 707 }, { "epoch": 0.014251638032549292, "grad_norm": 0.001396226929500699, "learning_rate": 0.00011634960506158465, "loss": 46.0, "step": 708 }, { "epoch": 0.014271767464798656, "grad_norm": 0.0008691162220202386, "learning_rate": 0.00011615077024607965, "loss": 46.0, "step": 709 }, { "epoch": 0.01429189689704802, "grad_norm": 0.0011933896457776427, "learning_rate": 0.00011595186984616598, "loss": 46.0, "step": 710 }, { "epoch": 0.014312026329297383, "grad_norm": 0.001465336186811328, "learning_rate": 0.00011575290466953054, "loss": 46.0, "step": 711 }, { "epoch": 0.014332155761546745, "grad_norm": 0.0012663495726883411, "learning_rate": 0.0001155538755241232, "loss": 46.0, "step": 712 }, { "epoch": 0.014352285193796109, "grad_norm": 0.0017936511430889368, "learning_rate": 0.00011535478321815366, "loss": 46.0, "step": 713 }, { "epoch": 0.014372414626045472, "grad_norm": 0.0014043014962226152, "learning_rate": 0.00011515562856008808, "loss": 46.0, "step": 714 }, { "epoch": 0.014392544058294836, "grad_norm": 0.001723953988403082, "learning_rate": 0.00011495641235864581, "loss": 46.0, "step": 715 }, { "epoch": 0.0144126734905442, "grad_norm": 0.0010820517782121897, "learning_rate": 0.00011475713542279612, "loss": 46.0, "step": 716 }, { "epoch": 0.014432802922793562, "grad_norm": 0.001104168244637549, "learning_rate": 0.00011455779856175488, "loss": 46.0, "step": 717 }, { "epoch": 0.014452932355042926, "grad_norm": 0.0006916458951309323, "learning_rate": 0.00011435840258498139, "loss": 46.0, "step": 718 }, { "epoch": 0.01447306178729229, "grad_norm": 0.002727057319134474, "learning_rate": 0.00011415894830217486, "loss": 46.0, "step": 719 }, { "epoch": 0.014493191219541653, "grad_norm": 0.002052182564511895, "learning_rate": 0.00011395943652327141, "loss": 46.0, "step": 720 }, { "epoch": 0.014513320651791017, "grad_norm": 0.0010489120613783598, "learning_rate": 0.00011375986805844054, "loss": 46.0, "step": 721 }, { "epoch": 0.01453345008404038, "grad_norm": 0.0022547508124262094, "learning_rate": 0.000113560243718082, "loss": 46.0, "step": 722 }, { "epoch": 0.014553579516289742, "grad_norm": 0.0013953811721876264, "learning_rate": 0.00011336056431282238, "loss": 46.0, "step": 723 }, { "epoch": 0.014573708948539106, "grad_norm": 0.0013688750332221389, "learning_rate": 0.00011316083065351195, "loss": 46.0, "step": 724 }, { "epoch": 0.01459383838078847, "grad_norm": 0.0008434226620011032, "learning_rate": 0.00011296104355122126, "loss": 46.0, "step": 725 }, { "epoch": 0.014613967813037834, "grad_norm": 0.0006940681487321854, "learning_rate": 0.00011276120381723779, "loss": 46.0, "step": 726 }, { "epoch": 0.014634097245287197, "grad_norm": 0.0007419649627991021, "learning_rate": 0.00011256131226306288, "loss": 46.0, "step": 727 }, { "epoch": 0.014654226677536561, "grad_norm": 0.0008091644267551601, "learning_rate": 0.00011236136970040823, "loss": 46.0, "step": 728 }, { "epoch": 0.014674356109785923, "grad_norm": 0.0014601018046960235, "learning_rate": 0.00011216137694119271, "loss": 46.0, "step": 729 }, { "epoch": 0.014694485542035287, "grad_norm": 0.0016407629009336233, "learning_rate": 0.00011196133479753894, "loss": 46.0, "step": 730 }, { "epoch": 0.01471461497428465, "grad_norm": 0.0018916395492851734, "learning_rate": 0.0001117612440817702, "loss": 46.0, "step": 731 }, { "epoch": 0.014734744406534014, "grad_norm": 0.0015089900698512793, "learning_rate": 0.00011156110560640693, "loss": 46.0, "step": 732 }, { "epoch": 0.014754873838783378, "grad_norm": 0.0007338857976719737, "learning_rate": 0.00011136092018416356, "loss": 46.0, "step": 733 }, { "epoch": 0.01477500327103274, "grad_norm": 0.001960652880370617, "learning_rate": 0.00011116068862794506, "loss": 46.0, "step": 734 }, { "epoch": 0.014795132703282103, "grad_norm": 0.0007831032853573561, "learning_rate": 0.00011096041175084386, "loss": 46.0, "step": 735 }, { "epoch": 0.014815262135531467, "grad_norm": 0.0011528864270076156, "learning_rate": 0.00011076009036613637, "loss": 46.0, "step": 736 }, { "epoch": 0.014835391567780831, "grad_norm": 0.001367407850921154, "learning_rate": 0.00011055972528727973, "loss": 46.0, "step": 737 }, { "epoch": 0.014855521000030195, "grad_norm": 0.0011748921824619174, "learning_rate": 0.00011035931732790856, "loss": 46.0, "step": 738 }, { "epoch": 0.014875650432279558, "grad_norm": 0.0011586399050429463, "learning_rate": 0.00011015886730183152, "loss": 46.0, "step": 739 }, { "epoch": 0.01489577986452892, "grad_norm": 0.001220092410221696, "learning_rate": 0.00010995837602302819, "loss": 46.0, "step": 740 }, { "epoch": 0.014915909296778284, "grad_norm": 0.0012157183373346925, "learning_rate": 0.00010975784430564558, "loss": 46.0, "step": 741 }, { "epoch": 0.014936038729027648, "grad_norm": 0.000734594592358917, "learning_rate": 0.00010955727296399496, "loss": 46.0, "step": 742 }, { "epoch": 0.014956168161277011, "grad_norm": 0.0010155554627999663, "learning_rate": 0.00010935666281254853, "loss": 46.0, "step": 743 }, { "epoch": 0.014976297593526375, "grad_norm": 0.001596163841895759, "learning_rate": 0.00010915601466593604, "loss": 46.0, "step": 744 }, { "epoch": 0.014996427025775739, "grad_norm": 0.0010005077347159386, "learning_rate": 0.0001089553293389415, "loss": 46.0, "step": 745 }, { "epoch": 0.0150165564580251, "grad_norm": 0.0015270253643393517, "learning_rate": 0.00010875460764649998, "loss": 46.0, "step": 746 }, { "epoch": 0.015036685890274465, "grad_norm": 0.0010284383315593004, "learning_rate": 0.00010855385040369419, "loss": 46.0, "step": 747 }, { "epoch": 0.015056815322523828, "grad_norm": 0.0006261624512262642, "learning_rate": 0.00010835305842575119, "loss": 46.0, "step": 748 }, { "epoch": 0.015076944754773192, "grad_norm": 0.0009544830536469817, "learning_rate": 0.0001081522325280391, "loss": 46.0, "step": 749 }, { "epoch": 0.015097074187022556, "grad_norm": 0.0009903472382575274, "learning_rate": 0.00010795137352606377, "loss": 46.0, "step": 750 }, { "epoch": 0.015117203619271918, "grad_norm": 0.0013472349382936954, "learning_rate": 0.00010775048223546551, "loss": 46.0, "step": 751 }, { "epoch": 0.015137333051521281, "grad_norm": 0.0021951631642878056, "learning_rate": 0.00010754955947201571, "loss": 46.0, "step": 752 }, { "epoch": 0.015157462483770645, "grad_norm": 0.0011615699622780085, "learning_rate": 0.00010734860605161355, "loss": 46.0, "step": 753 }, { "epoch": 0.015177591916020009, "grad_norm": 0.001115267863497138, "learning_rate": 0.00010714762279028275, "loss": 46.0, "step": 754 }, { "epoch": 0.015197721348269373, "grad_norm": 0.0008176314877346158, "learning_rate": 0.00010694661050416819, "loss": 46.0, "step": 755 }, { "epoch": 0.015217850780518736, "grad_norm": 0.0013733267551288009, "learning_rate": 0.00010674557000953258, "loss": 46.0, "step": 756 }, { "epoch": 0.015237980212768098, "grad_norm": 0.0006811064085923135, "learning_rate": 0.00010654450212275324, "loss": 46.0, "step": 757 }, { "epoch": 0.015258109645017462, "grad_norm": 0.0011723422212526202, "learning_rate": 0.00010634340766031868, "loss": 46.0, "step": 758 }, { "epoch": 0.015278239077266826, "grad_norm": 0.0008587664924561977, "learning_rate": 0.0001061422874388253, "loss": 46.0, "step": 759 }, { "epoch": 0.01529836850951619, "grad_norm": 0.0007777991122566164, "learning_rate": 0.00010594114227497419, "loss": 46.0, "step": 760 }, { "epoch": 0.015318497941765553, "grad_norm": 0.0022410592064261436, "learning_rate": 0.00010573997298556762, "loss": 46.0, "step": 761 }, { "epoch": 0.015338627374014917, "grad_norm": 0.0009275775519199669, "learning_rate": 0.00010553878038750591, "loss": 46.0, "step": 762 }, { "epoch": 0.015358756806264279, "grad_norm": 0.0011703603668138385, "learning_rate": 0.000105337565297784, "loss": 46.0, "step": 763 }, { "epoch": 0.015378886238513642, "grad_norm": 0.0008710163529030979, "learning_rate": 0.00010513632853348817, "loss": 46.0, "step": 764 }, { "epoch": 0.015399015670763006, "grad_norm": 0.0014304480282589793, "learning_rate": 0.00010493507091179267, "loss": 46.0, "step": 765 }, { "epoch": 0.01541914510301237, "grad_norm": 0.0010722475126385689, "learning_rate": 0.00010473379324995654, "loss": 46.0, "step": 766 }, { "epoch": 0.015439274535261734, "grad_norm": 0.0015626356471329927, "learning_rate": 0.00010453249636532007, "loss": 46.0, "step": 767 }, { "epoch": 0.015459403967511096, "grad_norm": 0.0005589164211414754, "learning_rate": 0.00010433118107530175, "loss": 46.0, "step": 768 }, { "epoch": 0.01547953339976046, "grad_norm": 0.0016512125730514526, "learning_rate": 0.00010412984819739473, "loss": 46.0, "step": 769 }, { "epoch": 0.015499662832009823, "grad_norm": 0.0007763996836729348, "learning_rate": 0.0001039284985491636, "loss": 46.0, "step": 770 }, { "epoch": 0.015519792264259187, "grad_norm": 0.0013635704526677728, "learning_rate": 0.00010372713294824102, "loss": 46.0, "step": 771 }, { "epoch": 0.01553992169650855, "grad_norm": 0.0014162887819111347, "learning_rate": 0.00010352575221232443, "loss": 46.0, "step": 772 }, { "epoch": 0.015560051128757914, "grad_norm": 0.002015099162235856, "learning_rate": 0.00010332435715917282, "loss": 46.0, "step": 773 }, { "epoch": 0.015580180561007276, "grad_norm": 0.0012888460187241435, "learning_rate": 0.00010312294860660319, "loss": 46.0, "step": 774 }, { "epoch": 0.01560030999325664, "grad_norm": 0.0013061100617051125, "learning_rate": 0.00010292152737248746, "loss": 46.0, "step": 775 }, { "epoch": 0.015620439425506004, "grad_norm": 0.001647230121307075, "learning_rate": 0.00010272009427474898, "loss": 46.0, "step": 776 }, { "epoch": 0.015640568857755367, "grad_norm": 0.0006732527981512249, "learning_rate": 0.00010251865013135931, "loss": 46.0, "step": 777 }, { "epoch": 0.01566069829000473, "grad_norm": 0.0018044369062408805, "learning_rate": 0.00010231719576033487, "loss": 46.0, "step": 778 }, { "epoch": 0.015680827722254095, "grad_norm": 0.002040853723883629, "learning_rate": 0.00010211573197973356, "loss": 46.0, "step": 779 }, { "epoch": 0.015700957154503457, "grad_norm": 0.0008194477995857596, "learning_rate": 0.00010191425960765159, "loss": 46.0, "step": 780 }, { "epoch": 0.015721086586752822, "grad_norm": 0.0022002204786986113, "learning_rate": 0.00010171277946221998, "loss": 46.0, "step": 781 }, { "epoch": 0.015741216019002184, "grad_norm": 0.001327970647253096, "learning_rate": 0.00010151129236160126, "loss": 46.0, "step": 782 }, { "epoch": 0.015761345451251546, "grad_norm": 0.0012340659741312265, "learning_rate": 0.00010130979912398635, "loss": 46.0, "step": 783 }, { "epoch": 0.01578147488350091, "grad_norm": 0.0020124921575188637, "learning_rate": 0.000101108300567591, "loss": 46.0, "step": 784 }, { "epoch": 0.015801604315750273, "grad_norm": 0.0010386345675215125, "learning_rate": 0.00010090679751065255, "loss": 46.0, "step": 785 }, { "epoch": 0.01582173374799964, "grad_norm": 0.0010036143939942122, "learning_rate": 0.00010070529077142665, "loss": 46.0, "step": 786 }, { "epoch": 0.01582173374799964, "eval_loss": 11.5, "eval_runtime": 129.297, "eval_samples_per_second": 161.783, "eval_steps_per_second": 80.891, "step": 786 }, { "epoch": 0.015841863180249, "grad_norm": 0.0016748905181884766, "learning_rate": 0.00010050378116818391, "loss": 46.0, "step": 787 }, { "epoch": 0.015861992612498363, "grad_norm": 0.0021699341014027596, "learning_rate": 0.00010030226951920654, "loss": 46.0, "step": 788 }, { "epoch": 0.01588212204474773, "grad_norm": 0.0006511384854093194, "learning_rate": 0.00010010075664278507, "loss": 46.0, "step": 789 }, { "epoch": 0.01590225147699709, "grad_norm": 0.00087300396990031, "learning_rate": 9.9899243357215e-05, "loss": 46.0, "step": 790 }, { "epoch": 0.015922380909246456, "grad_norm": 0.0009463525493629277, "learning_rate": 9.969773048079351e-05, "loss": 46.0, "step": 791 }, { "epoch": 0.015942510341495818, "grad_norm": 0.0008198622381314635, "learning_rate": 9.949621883181612e-05, "loss": 46.0, "step": 792 }, { "epoch": 0.015962639773745183, "grad_norm": 0.0014198910212144256, "learning_rate": 9.929470922857337e-05, "loss": 46.0, "step": 793 }, { "epoch": 0.015982769205994545, "grad_norm": 0.000913235591724515, "learning_rate": 9.909320248934747e-05, "loss": 46.0, "step": 794 }, { "epoch": 0.016002898638243907, "grad_norm": 0.0006811967468820512, "learning_rate": 9.889169943240903e-05, "loss": 46.0, "step": 795 }, { "epoch": 0.016023028070493273, "grad_norm": 0.0009198164916597307, "learning_rate": 9.869020087601365e-05, "loss": 46.0, "step": 796 }, { "epoch": 0.016043157502742635, "grad_norm": 0.0016066880198195577, "learning_rate": 9.848870763839877e-05, "loss": 46.0, "step": 797 }, { "epoch": 0.016063286934992, "grad_norm": 0.0016591864405199885, "learning_rate": 9.828722053778008e-05, "loss": 46.0, "step": 798 }, { "epoch": 0.016083416367241362, "grad_norm": 0.0011810685973614454, "learning_rate": 9.808574039234843e-05, "loss": 46.0, "step": 799 }, { "epoch": 0.016103545799490724, "grad_norm": 0.0011113261571153998, "learning_rate": 9.788426802026645e-05, "loss": 46.0, "step": 800 }, { "epoch": 0.01612367523174009, "grad_norm": 0.0012657048646360636, "learning_rate": 9.768280423966516e-05, "loss": 46.0, "step": 801 }, { "epoch": 0.01614380466398945, "grad_norm": 0.0013318355195224285, "learning_rate": 9.748134986864072e-05, "loss": 46.0, "step": 802 }, { "epoch": 0.016163934096238817, "grad_norm": 0.0016351451631635427, "learning_rate": 9.727990572525105e-05, "loss": 46.0, "step": 803 }, { "epoch": 0.01618406352848818, "grad_norm": 0.0012661207001656294, "learning_rate": 9.707847262751257e-05, "loss": 46.0, "step": 804 }, { "epoch": 0.01620419296073754, "grad_norm": 0.0015301022212952375, "learning_rate": 9.687705139339685e-05, "loss": 46.0, "step": 805 }, { "epoch": 0.016224322392986906, "grad_norm": 0.0011811705771833658, "learning_rate": 9.667564284082723e-05, "loss": 46.0, "step": 806 }, { "epoch": 0.016244451825236268, "grad_norm": 0.0018039242131635547, "learning_rate": 9.64742477876756e-05, "loss": 46.0, "step": 807 }, { "epoch": 0.016264581257485634, "grad_norm": 0.0017712223343551159, "learning_rate": 9.627286705175902e-05, "loss": 46.0, "step": 808 }, { "epoch": 0.016284710689734996, "grad_norm": 0.0017983190482482314, "learning_rate": 9.607150145083642e-05, "loss": 46.0, "step": 809 }, { "epoch": 0.01630484012198436, "grad_norm": 0.0020130311604589224, "learning_rate": 9.587015180260526e-05, "loss": 46.0, "step": 810 }, { "epoch": 0.016324969554233723, "grad_norm": 0.001603461685590446, "learning_rate": 9.566881892469824e-05, "loss": 46.0, "step": 811 }, { "epoch": 0.016345098986483085, "grad_norm": 0.001214203075505793, "learning_rate": 9.546750363467997e-05, "loss": 46.0, "step": 812 }, { "epoch": 0.01636522841873245, "grad_norm": 0.0015112390974536538, "learning_rate": 9.526620675004352e-05, "loss": 46.0, "step": 813 }, { "epoch": 0.016385357850981812, "grad_norm": 0.001148148556239903, "learning_rate": 9.506492908820737e-05, "loss": 46.0, "step": 814 }, { "epoch": 0.016405487283231178, "grad_norm": 0.0008940810221247375, "learning_rate": 9.486367146651187e-05, "loss": 46.0, "step": 815 }, { "epoch": 0.01642561671548054, "grad_norm": 0.0011365159880369902, "learning_rate": 9.466243470221602e-05, "loss": 46.0, "step": 816 }, { "epoch": 0.016445746147729902, "grad_norm": 0.0012618922628462315, "learning_rate": 9.44612196124941e-05, "loss": 46.0, "step": 817 }, { "epoch": 0.016465875579979267, "grad_norm": 0.0018559535965323448, "learning_rate": 9.42600270144324e-05, "loss": 46.0, "step": 818 }, { "epoch": 0.01648600501222863, "grad_norm": 0.0010438922327011824, "learning_rate": 9.405885772502582e-05, "loss": 46.0, "step": 819 }, { "epoch": 0.016506134444477995, "grad_norm": 0.0019696117378771305, "learning_rate": 9.385771256117473e-05, "loss": 46.0, "step": 820 }, { "epoch": 0.016526263876727357, "grad_norm": 0.0009223187807947397, "learning_rate": 9.365659233968136e-05, "loss": 46.0, "step": 821 }, { "epoch": 0.01654639330897672, "grad_norm": 0.001138696214184165, "learning_rate": 9.345549787724679e-05, "loss": 46.0, "step": 822 }, { "epoch": 0.016566522741226084, "grad_norm": 0.0010714115342125297, "learning_rate": 9.325442999046744e-05, "loss": 46.0, "step": 823 }, { "epoch": 0.016586652173475446, "grad_norm": 0.0024443184956908226, "learning_rate": 9.305338949583183e-05, "loss": 46.0, "step": 824 }, { "epoch": 0.01660678160572481, "grad_norm": 0.0012600711779668927, "learning_rate": 9.285237720971726e-05, "loss": 46.0, "step": 825 }, { "epoch": 0.016626911037974174, "grad_norm": 0.001455603982321918, "learning_rate": 9.265139394838646e-05, "loss": 46.0, "step": 826 }, { "epoch": 0.01664704047022354, "grad_norm": 0.001236987765878439, "learning_rate": 9.245044052798435e-05, "loss": 46.0, "step": 827 }, { "epoch": 0.0166671699024729, "grad_norm": 0.0023520609829574823, "learning_rate": 9.224951776453454e-05, "loss": 46.0, "step": 828 }, { "epoch": 0.016687299334722263, "grad_norm": 0.0009862706065177917, "learning_rate": 9.204862647393625e-05, "loss": 46.0, "step": 829 }, { "epoch": 0.01670742876697163, "grad_norm": 0.000905146764125675, "learning_rate": 9.184776747196092e-05, "loss": 46.0, "step": 830 }, { "epoch": 0.01672755819922099, "grad_norm": 0.0013413660926744342, "learning_rate": 9.164694157424882e-05, "loss": 46.0, "step": 831 }, { "epoch": 0.016747687631470356, "grad_norm": 0.0010207323357462883, "learning_rate": 9.144614959630583e-05, "loss": 46.0, "step": 832 }, { "epoch": 0.016767817063719718, "grad_norm": 0.001262878649868071, "learning_rate": 9.124539235350004e-05, "loss": 46.0, "step": 833 }, { "epoch": 0.01678794649596908, "grad_norm": 0.0010532139567658305, "learning_rate": 9.104467066105855e-05, "loss": 46.0, "step": 834 }, { "epoch": 0.016808075928218445, "grad_norm": 0.0016255469527095556, "learning_rate": 9.084398533406401e-05, "loss": 46.0, "step": 835 }, { "epoch": 0.016828205360467807, "grad_norm": 0.001301302807405591, "learning_rate": 9.06433371874515e-05, "loss": 46.0, "step": 836 }, { "epoch": 0.016848334792717173, "grad_norm": 0.0009694418986327946, "learning_rate": 9.044272703600505e-05, "loss": 46.0, "step": 837 }, { "epoch": 0.016868464224966535, "grad_norm": 0.0006478687282651663, "learning_rate": 9.024215569435443e-05, "loss": 46.0, "step": 838 }, { "epoch": 0.016888593657215897, "grad_norm": 0.0010540804360061884, "learning_rate": 9.004162397697183e-05, "loss": 46.0, "step": 839 }, { "epoch": 0.016908723089465262, "grad_norm": 0.001823692349717021, "learning_rate": 8.984113269816849e-05, "loss": 46.0, "step": 840 }, { "epoch": 0.016928852521714624, "grad_norm": 0.0018427857430651784, "learning_rate": 8.964068267209145e-05, "loss": 46.0, "step": 841 }, { "epoch": 0.01694898195396399, "grad_norm": 0.0011904591228812933, "learning_rate": 8.94402747127203e-05, "loss": 46.0, "step": 842 }, { "epoch": 0.01696911138621335, "grad_norm": 0.001324215205386281, "learning_rate": 8.923990963386367e-05, "loss": 46.0, "step": 843 }, { "epoch": 0.016989240818462717, "grad_norm": 0.0005633292021229863, "learning_rate": 8.903958824915616e-05, "loss": 46.0, "step": 844 }, { "epoch": 0.01700937025071208, "grad_norm": 0.0008070293697528541, "learning_rate": 8.883931137205496e-05, "loss": 46.0, "step": 845 }, { "epoch": 0.01702949968296144, "grad_norm": 0.0009888982167467475, "learning_rate": 8.863907981583648e-05, "loss": 46.0, "step": 846 }, { "epoch": 0.017049629115210806, "grad_norm": 0.001973828999325633, "learning_rate": 8.843889439359308e-05, "loss": 46.0, "step": 847 }, { "epoch": 0.01706975854746017, "grad_norm": 0.0009145489893853664, "learning_rate": 8.82387559182298e-05, "loss": 46.0, "step": 848 }, { "epoch": 0.017089887979709534, "grad_norm": 0.0007107039564289153, "learning_rate": 8.803866520246111e-05, "loss": 46.0, "step": 849 }, { "epoch": 0.017110017411958896, "grad_norm": 0.0007881961646489799, "learning_rate": 8.783862305880734e-05, "loss": 46.0, "step": 850 }, { "epoch": 0.017130146844208258, "grad_norm": 0.00188835512381047, "learning_rate": 8.76386302995918e-05, "loss": 46.0, "step": 851 }, { "epoch": 0.017150276276457623, "grad_norm": 0.0015146104851737618, "learning_rate": 8.743868773693715e-05, "loss": 46.0, "step": 852 }, { "epoch": 0.017170405708706985, "grad_norm": 0.0020661610178649426, "learning_rate": 8.723879618276224e-05, "loss": 46.0, "step": 853 }, { "epoch": 0.01719053514095635, "grad_norm": 0.0013012840645387769, "learning_rate": 8.703895644877877e-05, "loss": 46.0, "step": 854 }, { "epoch": 0.017210664573205713, "grad_norm": 0.0008128953049890697, "learning_rate": 8.683916934648804e-05, "loss": 46.0, "step": 855 }, { "epoch": 0.017230794005455075, "grad_norm": 0.0026957583613693714, "learning_rate": 8.663943568717763e-05, "loss": 46.0, "step": 856 }, { "epoch": 0.01725092343770444, "grad_norm": 0.002083521569147706, "learning_rate": 8.643975628191802e-05, "loss": 46.0, "step": 857 }, { "epoch": 0.017271052869953802, "grad_norm": 0.0012452022638171911, "learning_rate": 8.624013194155949e-05, "loss": 46.0, "step": 858 }, { "epoch": 0.017291182302203167, "grad_norm": 0.0020928813610225916, "learning_rate": 8.604056347672862e-05, "loss": 46.0, "step": 859 }, { "epoch": 0.01731131173445253, "grad_norm": 0.0012156859738752246, "learning_rate": 8.584105169782516e-05, "loss": 46.0, "step": 860 }, { "epoch": 0.017331441166701895, "grad_norm": 0.000812519050668925, "learning_rate": 8.564159741501863e-05, "loss": 46.0, "step": 861 }, { "epoch": 0.017351570598951257, "grad_norm": 0.0012246136320754886, "learning_rate": 8.544220143824511e-05, "loss": 46.0, "step": 862 }, { "epoch": 0.01737170003120062, "grad_norm": 0.0006080910097807646, "learning_rate": 8.524286457720389e-05, "loss": 46.0, "step": 863 }, { "epoch": 0.017391829463449984, "grad_norm": 0.0011760718189179897, "learning_rate": 8.504358764135423e-05, "loss": 46.0, "step": 864 }, { "epoch": 0.017411958895699346, "grad_norm": 0.0011525904992595315, "learning_rate": 8.484437143991195e-05, "loss": 46.0, "step": 865 }, { "epoch": 0.01743208832794871, "grad_norm": 0.0016694519435986876, "learning_rate": 8.464521678184637e-05, "loss": 46.0, "step": 866 }, { "epoch": 0.017452217760198074, "grad_norm": 0.0010596023639664054, "learning_rate": 8.444612447587683e-05, "loss": 46.0, "step": 867 }, { "epoch": 0.017472347192447436, "grad_norm": 0.0011948344763368368, "learning_rate": 8.424709533046948e-05, "loss": 46.0, "step": 868 }, { "epoch": 0.0174924766246968, "grad_norm": 0.0011381066869944334, "learning_rate": 8.404813015383402e-05, "loss": 46.0, "step": 869 }, { "epoch": 0.017512606056946163, "grad_norm": 0.000726166705135256, "learning_rate": 8.384922975392035e-05, "loss": 46.0, "step": 870 }, { "epoch": 0.01753273548919553, "grad_norm": 0.0010629513999447227, "learning_rate": 8.365039493841537e-05, "loss": 46.0, "step": 871 }, { "epoch": 0.01755286492144489, "grad_norm": 0.0018847067840397358, "learning_rate": 8.345162651473958e-05, "loss": 46.0, "step": 872 }, { "epoch": 0.017572994353694252, "grad_norm": 0.001316647743806243, "learning_rate": 8.325292529004396e-05, "loss": 46.0, "step": 873 }, { "epoch": 0.017593123785943618, "grad_norm": 0.0013123692478984594, "learning_rate": 8.305429207120657e-05, "loss": 46.0, "step": 874 }, { "epoch": 0.01761325321819298, "grad_norm": 0.0015352462651208043, "learning_rate": 8.285572766482934e-05, "loss": 46.0, "step": 875 }, { "epoch": 0.017633382650442345, "grad_norm": 0.0008602976449765265, "learning_rate": 8.265723287723474e-05, "loss": 46.0, "step": 876 }, { "epoch": 0.017653512082691707, "grad_norm": 0.0016124986577779055, "learning_rate": 8.245880851446255e-05, "loss": 46.0, "step": 877 }, { "epoch": 0.017673641514941073, "grad_norm": 0.0013488165568560362, "learning_rate": 8.226045538226657e-05, "loss": 46.0, "step": 878 }, { "epoch": 0.017693770947190435, "grad_norm": 0.0016552746528759599, "learning_rate": 8.20621742861114e-05, "loss": 46.0, "step": 879 }, { "epoch": 0.017713900379439797, "grad_norm": 0.0012239515781402588, "learning_rate": 8.186396603116897e-05, "loss": 46.0, "step": 880 }, { "epoch": 0.017734029811689162, "grad_norm": 0.0016428582603111863, "learning_rate": 8.166583142231557e-05, "loss": 46.0, "step": 881 }, { "epoch": 0.017754159243938524, "grad_norm": 0.0007930579595267773, "learning_rate": 8.146777126412837e-05, "loss": 46.0, "step": 882 }, { "epoch": 0.01777428867618789, "grad_norm": 0.002032884396612644, "learning_rate": 8.126978636088222e-05, "loss": 46.0, "step": 883 }, { "epoch": 0.01779441810843725, "grad_norm": 0.0012445595348253846, "learning_rate": 8.107187751654642e-05, "loss": 46.0, "step": 884 }, { "epoch": 0.017814547540686614, "grad_norm": 0.0013359242584556341, "learning_rate": 8.087404553478132e-05, "loss": 46.0, "step": 885 }, { "epoch": 0.01783467697293598, "grad_norm": 0.0010678229155018926, "learning_rate": 8.067629121893525e-05, "loss": 46.0, "step": 886 }, { "epoch": 0.01785480640518534, "grad_norm": 0.0020257853902876377, "learning_rate": 8.047861537204107e-05, "loss": 46.0, "step": 887 }, { "epoch": 0.017874935837434706, "grad_norm": 0.001662117661908269, "learning_rate": 8.028101879681304e-05, "loss": 46.0, "step": 888 }, { "epoch": 0.01789506526968407, "grad_norm": 0.0010253286454826593, "learning_rate": 8.008350229564351e-05, "loss": 46.0, "step": 889 }, { "epoch": 0.01791519470193343, "grad_norm": 0.001274330890737474, "learning_rate": 7.988606667059972e-05, "loss": 46.0, "step": 890 }, { "epoch": 0.017935324134182796, "grad_norm": 0.0004866346425842494, "learning_rate": 7.968871272342038e-05, "loss": 46.0, "step": 891 }, { "epoch": 0.017955453566432158, "grad_norm": 0.0010090246796607971, "learning_rate": 7.949144125551263e-05, "loss": 46.0, "step": 892 }, { "epoch": 0.017975582998681523, "grad_norm": 0.0010872040875256062, "learning_rate": 7.929425306794867e-05, "loss": 46.0, "step": 893 }, { "epoch": 0.017995712430930885, "grad_norm": 0.0005780845531262457, "learning_rate": 7.909714896146239e-05, "loss": 46.0, "step": 894 }, { "epoch": 0.01801584186318025, "grad_norm": 0.0005967924953438342, "learning_rate": 7.89001297364464e-05, "loss": 46.0, "step": 895 }, { "epoch": 0.018035971295429613, "grad_norm": 0.001067324192263186, "learning_rate": 7.870319619294859e-05, "loss": 46.0, "step": 896 }, { "epoch": 0.018056100727678975, "grad_norm": 0.0020970788318663836, "learning_rate": 7.850634913066887e-05, "loss": 46.0, "step": 897 }, { "epoch": 0.01807623015992834, "grad_norm": 0.00045425730058923364, "learning_rate": 7.830958934895602e-05, "loss": 46.0, "step": 898 }, { "epoch": 0.018096359592177702, "grad_norm": 0.000873990764375776, "learning_rate": 7.811291764680436e-05, "loss": 46.0, "step": 899 }, { "epoch": 0.018116489024427068, "grad_norm": 0.0016305999597534537, "learning_rate": 7.791633482285056e-05, "loss": 46.0, "step": 900 }, { "epoch": 0.01813661845667643, "grad_norm": 0.0013768981443718076, "learning_rate": 7.771984167537041e-05, "loss": 46.0, "step": 901 }, { "epoch": 0.01815674788892579, "grad_norm": 0.0021705874241888523, "learning_rate": 7.752343900227545e-05, "loss": 46.0, "step": 902 }, { "epoch": 0.018176877321175157, "grad_norm": 0.0011179293505847454, "learning_rate": 7.73271276011099e-05, "loss": 46.0, "step": 903 }, { "epoch": 0.01819700675342452, "grad_norm": 0.0013696214882656932, "learning_rate": 7.713090826904732e-05, "loss": 46.0, "step": 904 }, { "epoch": 0.018217136185673884, "grad_norm": 0.0017531095072627068, "learning_rate": 7.693478180288745e-05, "loss": 46.0, "step": 905 }, { "epoch": 0.018237265617923246, "grad_norm": 0.0006764591089449823, "learning_rate": 7.673874899905284e-05, "loss": 46.0, "step": 906 }, { "epoch": 0.01825739505017261, "grad_norm": 0.0009293857146985829, "learning_rate": 7.654281065358575e-05, "loss": 46.0, "step": 907 }, { "epoch": 0.018277524482421974, "grad_norm": 0.0015665123937651515, "learning_rate": 7.634696756214492e-05, "loss": 46.0, "step": 908 }, { "epoch": 0.018297653914671336, "grad_norm": 0.0009497758583165705, "learning_rate": 7.615122052000212e-05, "loss": 46.0, "step": 909 }, { "epoch": 0.0183177833469207, "grad_norm": 0.0009189668344333768, "learning_rate": 7.595557032203924e-05, "loss": 46.0, "step": 910 }, { "epoch": 0.018337912779170063, "grad_norm": 0.0012546752113848925, "learning_rate": 7.576001776274488e-05, "loss": 46.0, "step": 911 }, { "epoch": 0.01835804221141943, "grad_norm": 0.000910087488591671, "learning_rate": 7.556456363621112e-05, "loss": 46.0, "step": 912 }, { "epoch": 0.01837817164366879, "grad_norm": 0.0017347291577607393, "learning_rate": 7.536920873613034e-05, "loss": 46.0, "step": 913 }, { "epoch": 0.018398301075918153, "grad_norm": 0.0019821105524897575, "learning_rate": 7.517395385579198e-05, "loss": 46.0, "step": 914 }, { "epoch": 0.018418430508167518, "grad_norm": 0.0015641407808288932, "learning_rate": 7.497879978807934e-05, "loss": 46.0, "step": 915 }, { "epoch": 0.01843855994041688, "grad_norm": 0.0009593809954822063, "learning_rate": 7.478374732546635e-05, "loss": 46.0, "step": 916 }, { "epoch": 0.018458689372666245, "grad_norm": 0.001673855702392757, "learning_rate": 7.458879726001431e-05, "loss": 46.0, "step": 917 }, { "epoch": 0.018478818804915607, "grad_norm": 0.0014266518410295248, "learning_rate": 7.439395038336871e-05, "loss": 46.0, "step": 918 }, { "epoch": 0.01849894823716497, "grad_norm": 0.0007388940430246294, "learning_rate": 7.41992074867561e-05, "loss": 46.0, "step": 919 }, { "epoch": 0.018519077669414335, "grad_norm": 0.0009342276025563478, "learning_rate": 7.400456936098066e-05, "loss": 46.0, "step": 920 }, { "epoch": 0.018539207101663697, "grad_norm": 0.0018599577015265822, "learning_rate": 7.381003679642124e-05, "loss": 46.0, "step": 921 }, { "epoch": 0.018559336533913062, "grad_norm": 0.0007527298876084387, "learning_rate": 7.361561058302795e-05, "loss": 46.0, "step": 922 }, { "epoch": 0.018579465966162424, "grad_norm": 0.0017750163096934557, "learning_rate": 7.342129151031911e-05, "loss": 46.0, "step": 923 }, { "epoch": 0.018599595398411786, "grad_norm": 0.0008317319443449378, "learning_rate": 7.322708036737784e-05, "loss": 46.0, "step": 924 }, { "epoch": 0.01861972483066115, "grad_norm": 0.0015617224853485823, "learning_rate": 7.303297794284911e-05, "loss": 46.0, "step": 925 }, { "epoch": 0.018639854262910514, "grad_norm": 0.0008347228285856545, "learning_rate": 7.283898502493637e-05, "loss": 46.0, "step": 926 }, { "epoch": 0.01865998369515988, "grad_norm": 0.0011498607927933335, "learning_rate": 7.264510240139836e-05, "loss": 46.0, "step": 927 }, { "epoch": 0.01868011312740924, "grad_norm": 0.001822000602260232, "learning_rate": 7.245133085954598e-05, "loss": 46.0, "step": 928 }, { "epoch": 0.018700242559658607, "grad_norm": 0.001454083132557571, "learning_rate": 7.225767118623906e-05, "loss": 46.0, "step": 929 }, { "epoch": 0.01872037199190797, "grad_norm": 0.0008696825243532658, "learning_rate": 7.206412416788311e-05, "loss": 46.0, "step": 930 }, { "epoch": 0.01874050142415733, "grad_norm": 0.002098069293424487, "learning_rate": 7.18706905904262e-05, "loss": 46.0, "step": 931 }, { "epoch": 0.018760630856406696, "grad_norm": 0.0014457725919783115, "learning_rate": 7.167737123935574e-05, "loss": 46.0, "step": 932 }, { "epoch": 0.018780760288656058, "grad_norm": 0.0008266555378213525, "learning_rate": 7.148416689969533e-05, "loss": 46.0, "step": 933 }, { "epoch": 0.018800889720905423, "grad_norm": 0.0019346848130226135, "learning_rate": 7.129107835600149e-05, "loss": 46.0, "step": 934 }, { "epoch": 0.018821019153154785, "grad_norm": 0.0005959367263130844, "learning_rate": 7.109810639236051e-05, "loss": 46.0, "step": 935 }, { "epoch": 0.018841148585404147, "grad_norm": 0.0015214644372463226, "learning_rate": 7.090525179238538e-05, "loss": 46.0, "step": 936 }, { "epoch": 0.018861278017653513, "grad_norm": 0.0008678233716636896, "learning_rate": 7.071251533921235e-05, "loss": 46.0, "step": 937 }, { "epoch": 0.018881407449902875, "grad_norm": 0.0013784753391519189, "learning_rate": 7.051989781549806e-05, "loss": 46.0, "step": 938 }, { "epoch": 0.01890153688215224, "grad_norm": 0.000639195553958416, "learning_rate": 7.032740000341604e-05, "loss": 46.0, "step": 939 }, { "epoch": 0.018921666314401602, "grad_norm": 0.002253234386444092, "learning_rate": 7.013502268465382e-05, "loss": 46.0, "step": 940 }, { "epoch": 0.018941795746650964, "grad_norm": 0.0008279394824057817, "learning_rate": 6.994276664040962e-05, "loss": 46.0, "step": 941 }, { "epoch": 0.01896192517890033, "grad_norm": 0.0014505049912258983, "learning_rate": 6.975063265138915e-05, "loss": 46.0, "step": 942 }, { "epoch": 0.01898205461114969, "grad_norm": 0.0012778080999851227, "learning_rate": 6.955862149780251e-05, "loss": 46.0, "step": 943 }, { "epoch": 0.019002184043399057, "grad_norm": 0.0007853744900785387, "learning_rate": 6.936673395936103e-05, "loss": 46.0, "step": 944 }, { "epoch": 0.01902231347564842, "grad_norm": 0.0013930387794971466, "learning_rate": 6.917497081527405e-05, "loss": 46.0, "step": 945 }, { "epoch": 0.019042442907897784, "grad_norm": 0.0013855951838195324, "learning_rate": 6.898333284424568e-05, "loss": 46.0, "step": 946 }, { "epoch": 0.019062572340147146, "grad_norm": 0.000777574663516134, "learning_rate": 6.879182082447185e-05, "loss": 46.0, "step": 947 }, { "epoch": 0.01908270177239651, "grad_norm": 0.0013940025819465518, "learning_rate": 6.860043553363697e-05, "loss": 46.0, "step": 948 }, { "epoch": 0.019102831204645874, "grad_norm": 0.0007605886785313487, "learning_rate": 6.840917774891089e-05, "loss": 46.0, "step": 949 }, { "epoch": 0.019122960636895236, "grad_norm": 0.0017117703100666404, "learning_rate": 6.821804824694564e-05, "loss": 46.0, "step": 950 }, { "epoch": 0.0191430900691446, "grad_norm": 0.0016022106865420938, "learning_rate": 6.802704780387233e-05, "loss": 46.0, "step": 951 }, { "epoch": 0.019163219501393963, "grad_norm": 0.0010787155479192734, "learning_rate": 6.7836177195298e-05, "loss": 46.0, "step": 952 }, { "epoch": 0.019183348933643325, "grad_norm": 0.0008270377875305712, "learning_rate": 6.764543719630247e-05, "loss": 46.0, "step": 953 }, { "epoch": 0.01920347836589269, "grad_norm": 0.0011401512892916799, "learning_rate": 6.745482858143519e-05, "loss": 46.0, "step": 954 }, { "epoch": 0.019223607798142053, "grad_norm": 0.0010015569860115647, "learning_rate": 6.726435212471205e-05, "loss": 46.0, "step": 955 }, { "epoch": 0.019243737230391418, "grad_norm": 0.0017946928273886442, "learning_rate": 6.707400859961233e-05, "loss": 46.0, "step": 956 }, { "epoch": 0.01926386666264078, "grad_norm": 0.0014648939250037074, "learning_rate": 6.688379877907548e-05, "loss": 46.0, "step": 957 }, { "epoch": 0.019283996094890142, "grad_norm": 0.0010645122965797782, "learning_rate": 6.6693723435498e-05, "loss": 46.0, "step": 958 }, { "epoch": 0.019304125527139507, "grad_norm": 0.0009420996648259461, "learning_rate": 6.650378334073036e-05, "loss": 46.0, "step": 959 }, { "epoch": 0.01932425495938887, "grad_norm": 0.0014943941496312618, "learning_rate": 6.631397926607376e-05, "loss": 46.0, "step": 960 }, { "epoch": 0.019344384391638235, "grad_norm": 0.001286246464587748, "learning_rate": 6.612431198227707e-05, "loss": 46.0, "step": 961 }, { "epoch": 0.019364513823887597, "grad_norm": 0.0008747805841267109, "learning_rate": 6.593478225953366e-05, "loss": 46.0, "step": 962 }, { "epoch": 0.019384643256136962, "grad_norm": 0.0008240886963903904, "learning_rate": 6.574539086747837e-05, "loss": 46.0, "step": 963 }, { "epoch": 0.019404772688386324, "grad_norm": 0.001010640524327755, "learning_rate": 6.555613857518425e-05, "loss": 46.0, "step": 964 }, { "epoch": 0.019424902120635686, "grad_norm": 0.0015888873022049665, "learning_rate": 6.536702615115954e-05, "loss": 46.0, "step": 965 }, { "epoch": 0.019445031552885052, "grad_norm": 0.0007431007106788456, "learning_rate": 6.517805436334447e-05, "loss": 46.0, "step": 966 }, { "epoch": 0.019465160985134414, "grad_norm": 0.0009085469646379352, "learning_rate": 6.498922397910822e-05, "loss": 46.0, "step": 967 }, { "epoch": 0.01948529041738378, "grad_norm": 0.0016039622714743018, "learning_rate": 6.48005357652457e-05, "loss": 46.0, "step": 968 }, { "epoch": 0.01950541984963314, "grad_norm": 0.0009189580450765789, "learning_rate": 6.461199048797457e-05, "loss": 46.0, "step": 969 }, { "epoch": 0.019525549281882503, "grad_norm": 0.0008581737638451159, "learning_rate": 6.442358891293201e-05, "loss": 46.0, "step": 970 }, { "epoch": 0.01954567871413187, "grad_norm": 0.001615122426301241, "learning_rate": 6.423533180517171e-05, "loss": 46.0, "step": 971 }, { "epoch": 0.01956580814638123, "grad_norm": 0.001174842705950141, "learning_rate": 6.404721992916068e-05, "loss": 46.0, "step": 972 }, { "epoch": 0.019585937578630596, "grad_norm": 0.002247781027108431, "learning_rate": 6.385925404877616e-05, "loss": 46.0, "step": 973 }, { "epoch": 0.019606067010879958, "grad_norm": 0.0019079920602962375, "learning_rate": 6.367143492730257e-05, "loss": 46.0, "step": 974 }, { "epoch": 0.01962619644312932, "grad_norm": 0.0024635521695017815, "learning_rate": 6.34837633274284e-05, "loss": 46.0, "step": 975 }, { "epoch": 0.019646325875378685, "grad_norm": 0.0010142981773242354, "learning_rate": 6.329624001124297e-05, "loss": 46.0, "step": 976 }, { "epoch": 0.019666455307628047, "grad_norm": 0.0005801988299936056, "learning_rate": 6.310886574023362e-05, "loss": 46.0, "step": 977 }, { "epoch": 0.019686584739877413, "grad_norm": 0.001695830374956131, "learning_rate": 6.292164127528232e-05, "loss": 46.0, "step": 978 }, { "epoch": 0.019706714172126775, "grad_norm": 0.0005539690027944744, "learning_rate": 6.273456737666281e-05, "loss": 46.0, "step": 979 }, { "epoch": 0.01972684360437614, "grad_norm": 0.0017788794357329607, "learning_rate": 6.254764480403736e-05, "loss": 46.0, "step": 980 }, { "epoch": 0.019746973036625502, "grad_norm": 0.0011399569921195507, "learning_rate": 6.236087431645376e-05, "loss": 46.0, "step": 981 }, { "epoch": 0.019767102468874864, "grad_norm": 0.000878597202245146, "learning_rate": 6.217425667234223e-05, "loss": 46.0, "step": 982 }, { "epoch": 0.01978723190112423, "grad_norm": 0.0008538950351066887, "learning_rate": 6.198779262951225e-05, "loss": 46.0, "step": 983 }, { "epoch": 0.01980736133337359, "grad_norm": 0.0015830093761906028, "learning_rate": 6.180148294514969e-05, "loss": 46.0, "step": 984 }, { "epoch": 0.019827490765622957, "grad_norm": 0.0015480596339330077, "learning_rate": 6.161532837581352e-05, "loss": 46.0, "step": 985 }, { "epoch": 0.01984762019787232, "grad_norm": 0.001011079759337008, "learning_rate": 6.142932967743284e-05, "loss": 46.0, "step": 986 }, { "epoch": 0.01986774963012168, "grad_norm": 0.0016792715759947896, "learning_rate": 6.124348760530383e-05, "loss": 46.0, "step": 987 }, { "epoch": 0.019887879062371046, "grad_norm": 0.002266524126753211, "learning_rate": 6.10578029140866e-05, "loss": 46.0, "step": 988 }, { "epoch": 0.01990800849462041, "grad_norm": 0.0016408918891102076, "learning_rate": 6.087227635780225e-05, "loss": 46.0, "step": 989 }, { "epoch": 0.019928137926869774, "grad_norm": 0.00135867390781641, "learning_rate": 6.068690868982958e-05, "loss": 46.0, "step": 990 }, { "epoch": 0.019948267359119136, "grad_norm": 0.0008750604465603828, "learning_rate": 6.050170066290234e-05, "loss": 46.0, "step": 991 }, { "epoch": 0.019968396791368498, "grad_norm": 0.0007627055747434497, "learning_rate": 6.031665302910594e-05, "loss": 46.0, "step": 992 }, { "epoch": 0.019988526223617863, "grad_norm": 0.0013612302718684077, "learning_rate": 6.01317665398745e-05, "loss": 46.0, "step": 993 }, { "epoch": 0.020008655655867225, "grad_norm": 0.0021670411806553602, "learning_rate": 5.994704194598775e-05, "loss": 46.0, "step": 994 }, { "epoch": 0.02002878508811659, "grad_norm": 0.0007712049409747124, "learning_rate": 5.976247999756802e-05, "loss": 46.0, "step": 995 }, { "epoch": 0.020048914520365953, "grad_norm": 0.0009516848367638886, "learning_rate": 5.957808144407716e-05, "loss": 46.0, "step": 996 }, { "epoch": 0.020069043952615318, "grad_norm": 0.0009539611055515707, "learning_rate": 5.9393847034313544e-05, "loss": 46.0, "step": 997 }, { "epoch": 0.02008917338486468, "grad_norm": 0.0009101739851757884, "learning_rate": 5.9209777516408924e-05, "loss": 46.0, "step": 998 }, { "epoch": 0.020109302817114042, "grad_norm": 0.0015371376648545265, "learning_rate": 5.902587363782553e-05, "loss": 46.0, "step": 999 }, { "epoch": 0.020129432249363408, "grad_norm": 0.0016428247326985002, "learning_rate": 5.884213614535296e-05, "loss": 46.0, "step": 1000 }, { "epoch": 0.02014956168161277, "grad_norm": 0.0010306787444278598, "learning_rate": 5.865856578510518e-05, "loss": 46.0, "step": 1001 }, { "epoch": 0.020169691113862135, "grad_norm": 0.0024218596518039703, "learning_rate": 5.847516330251741e-05, "loss": 46.0, "step": 1002 }, { "epoch": 0.020189820546111497, "grad_norm": 0.0018254693131893873, "learning_rate": 5.829192944234321e-05, "loss": 46.0, "step": 1003 }, { "epoch": 0.02020994997836086, "grad_norm": 0.00036783842369914055, "learning_rate": 5.8108864948651385e-05, "loss": 46.0, "step": 1004 }, { "epoch": 0.020230079410610224, "grad_norm": 0.0017898846417665482, "learning_rate": 5.792597056482294e-05, "loss": 46.0, "step": 1005 }, { "epoch": 0.020250208842859586, "grad_norm": 0.0014998508850112557, "learning_rate": 5.774324703354824e-05, "loss": 46.0, "step": 1006 }, { "epoch": 0.020270338275108952, "grad_norm": 0.001964285271242261, "learning_rate": 5.756069509682368e-05, "loss": 46.0, "step": 1007 }, { "epoch": 0.020290467707358314, "grad_norm": 0.00039214518619701266, "learning_rate": 5.737831549594903e-05, "loss": 46.0, "step": 1008 }, { "epoch": 0.020310597139607676, "grad_norm": 0.001849150750786066, "learning_rate": 5.719610897152405e-05, "loss": 46.0, "step": 1009 }, { "epoch": 0.02033072657185704, "grad_norm": 0.0016469095135107636, "learning_rate": 5.70140762634459e-05, "loss": 46.0, "step": 1010 }, { "epoch": 0.020350856004106403, "grad_norm": 0.0011177381966263056, "learning_rate": 5.683221811090569e-05, "loss": 46.0, "step": 1011 }, { "epoch": 0.02037098543635577, "grad_norm": 0.001804203144274652, "learning_rate": 5.665053525238595e-05, "loss": 46.0, "step": 1012 }, { "epoch": 0.02039111486860513, "grad_norm": 0.0011881906539201736, "learning_rate": 5.646902842565707e-05, "loss": 46.0, "step": 1013 }, { "epoch": 0.020411244300854496, "grad_norm": 0.0010538804344832897, "learning_rate": 5.6287698367774897e-05, "loss": 46.0, "step": 1014 }, { "epoch": 0.020431373733103858, "grad_norm": 0.0013392227701842785, "learning_rate": 5.610654581507728e-05, "loss": 46.0, "step": 1015 }, { "epoch": 0.02045150316535322, "grad_norm": 0.0014469471061602235, "learning_rate": 5.592557150318145e-05, "loss": 46.0, "step": 1016 }, { "epoch": 0.020471632597602585, "grad_norm": 0.0014158233534544706, "learning_rate": 5.574477616698061e-05, "loss": 46.0, "step": 1017 }, { "epoch": 0.020491762029851947, "grad_norm": 0.00027452100766822696, "learning_rate": 5.5564160540641394e-05, "loss": 46.0, "step": 1018 }, { "epoch": 0.020511891462101313, "grad_norm": 0.001543979742564261, "learning_rate": 5.538372535760057e-05, "loss": 46.0, "step": 1019 }, { "epoch": 0.020532020894350675, "grad_norm": 0.0013933605514466763, "learning_rate": 5.520347135056212e-05, "loss": 46.0, "step": 1020 }, { "epoch": 0.020552150326600037, "grad_norm": 0.0014277072623372078, "learning_rate": 5.502339925149449e-05, "loss": 46.0, "step": 1021 }, { "epoch": 0.020572279758849402, "grad_norm": 0.001016245107166469, "learning_rate": 5.484350979162724e-05, "loss": 46.0, "step": 1022 }, { "epoch": 0.020592409191098764, "grad_norm": 0.0023218076676130295, "learning_rate": 5.466380370144843e-05, "loss": 46.0, "step": 1023 }, { "epoch": 0.02061253862334813, "grad_norm": 0.0016895633889362216, "learning_rate": 5.448428171070141e-05, "loss": 46.0, "step": 1024 }, { "epoch": 0.02063266805559749, "grad_norm": 0.0021997210569679737, "learning_rate": 5.430494454838202e-05, "loss": 46.0, "step": 1025 }, { "epoch": 0.020652797487846854, "grad_norm": 0.0012985903304070234, "learning_rate": 5.412579294273547e-05, "loss": 46.0, "step": 1026 }, { "epoch": 0.02067292692009622, "grad_norm": 0.0007923256489448249, "learning_rate": 5.3946827621253514e-05, "loss": 46.0, "step": 1027 }, { "epoch": 0.02069305635234558, "grad_norm": 0.0030492981895804405, "learning_rate": 5.376804931067141e-05, "loss": 46.0, "step": 1028 }, { "epoch": 0.020713185784594947, "grad_norm": 0.0012466289335861802, "learning_rate": 5.358945873696514e-05, "loss": 46.0, "step": 1029 }, { "epoch": 0.02073331521684431, "grad_norm": 0.0008472254266962409, "learning_rate": 5.3411056625348135e-05, "loss": 46.0, "step": 1030 }, { "epoch": 0.020753444649093674, "grad_norm": 0.00038629811024293303, "learning_rate": 5.323284370026873e-05, "loss": 46.0, "step": 1031 }, { "epoch": 0.020773574081343036, "grad_norm": 0.001900954986922443, "learning_rate": 5.3054820685406817e-05, "loss": 46.0, "step": 1032 }, { "epoch": 0.020793703513592398, "grad_norm": 0.0016893133288249373, "learning_rate": 5.2876988303671316e-05, "loss": 46.0, "step": 1033 }, { "epoch": 0.020813832945841763, "grad_norm": 0.001547012128867209, "learning_rate": 5.269934727719685e-05, "loss": 46.0, "step": 1034 }, { "epoch": 0.020833962378091125, "grad_norm": 0.0023651174269616604, "learning_rate": 5.252189832734108e-05, "loss": 46.0, "step": 1035 }, { "epoch": 0.02085409181034049, "grad_norm": 0.0014698889572173357, "learning_rate": 5.2344642174681716e-05, "loss": 46.0, "step": 1036 }, { "epoch": 0.020874221242589853, "grad_norm": 0.0011334436712786555, "learning_rate": 5.2167579539013456e-05, "loss": 46.0, "step": 1037 }, { "epoch": 0.020894350674839215, "grad_norm": 0.0009258949430659413, "learning_rate": 5.199071113934533e-05, "loss": 46.0, "step": 1038 }, { "epoch": 0.02091448010708858, "grad_norm": 0.0010753805981948972, "learning_rate": 5.1814037693897464e-05, "loss": 46.0, "step": 1039 }, { "epoch": 0.020934609539337942, "grad_norm": 0.0010734342504292727, "learning_rate": 5.1637559920098466e-05, "loss": 46.0, "step": 1040 }, { "epoch": 0.020954738971587308, "grad_norm": 0.0009417047840543091, "learning_rate": 5.146127853458225e-05, "loss": 46.0, "step": 1041 }, { "epoch": 0.02097486840383667, "grad_norm": 0.0005359546048566699, "learning_rate": 5.1285194253185295e-05, "loss": 46.0, "step": 1042 }, { "epoch": 0.02099499783608603, "grad_norm": 0.0010907109826803207, "learning_rate": 5.110930779094365e-05, "loss": 46.0, "step": 1043 }, { "epoch": 0.021015127268335397, "grad_norm": 0.0011225123889744282, "learning_rate": 5.093361986209015e-05, "loss": 46.0, "step": 1044 }, { "epoch": 0.02103525670058476, "grad_norm": 0.0012630325509235263, "learning_rate": 5.075813118005135e-05, "loss": 46.0, "step": 1045 }, { "epoch": 0.021055386132834124, "grad_norm": 0.00048818206414580345, "learning_rate": 5.05828424574448e-05, "loss": 46.0, "step": 1046 }, { "epoch": 0.021075515565083486, "grad_norm": 0.0010798347648233175, "learning_rate": 5.0407754406075926e-05, "loss": 46.0, "step": 1047 }, { "epoch": 0.021095644997332852, "grad_norm": 0.0014009498991072178, "learning_rate": 5.023286773693541e-05, "loss": 46.0, "step": 1048 }, { "epoch": 0.021115774429582214, "grad_norm": 0.0009970440296456218, "learning_rate": 5.005818316019618e-05, "loss": 46.0, "step": 1049 }, { "epoch": 0.021135903861831576, "grad_norm": 0.0017505913274362683, "learning_rate": 4.988370138521031e-05, "loss": 46.0, "step": 1050 }, { "epoch": 0.02115603329408094, "grad_norm": 0.0009628281113691628, "learning_rate": 4.970942312050659e-05, "loss": 46.0, "step": 1051 }, { "epoch": 0.021176162726330303, "grad_norm": 0.0015546507202088833, "learning_rate": 4.953534907378724e-05, "loss": 46.0, "step": 1052 }, { "epoch": 0.02119629215857967, "grad_norm": 0.0013801100431010127, "learning_rate": 4.9361479951925314e-05, "loss": 46.0, "step": 1053 }, { "epoch": 0.02121642159082903, "grad_norm": 0.0011676892172545195, "learning_rate": 4.918781646096161e-05, "loss": 46.0, "step": 1054 }, { "epoch": 0.021236551023078393, "grad_norm": 0.0014288736274465919, "learning_rate": 4.901435930610202e-05, "loss": 46.0, "step": 1055 }, { "epoch": 0.021256680455327758, "grad_norm": 0.0012626959942281246, "learning_rate": 4.884110919171446e-05, "loss": 46.0, "step": 1056 }, { "epoch": 0.02127680988757712, "grad_norm": 0.0011516448576003313, "learning_rate": 4.866806682132611e-05, "loss": 46.0, "step": 1057 }, { "epoch": 0.021296939319826486, "grad_norm": 0.0008224432240240276, "learning_rate": 4.849523289762063e-05, "loss": 46.0, "step": 1058 }, { "epoch": 0.021317068752075848, "grad_norm": 0.0012658087071031332, "learning_rate": 4.832260812243513e-05, "loss": 46.0, "step": 1059 }, { "epoch": 0.02133719818432521, "grad_norm": 0.0008029589662328362, "learning_rate": 4.8150193196757533e-05, "loss": 46.0, "step": 1060 }, { "epoch": 0.021357327616574575, "grad_norm": 0.0006066640489734709, "learning_rate": 4.7977988820723454e-05, "loss": 46.0, "step": 1061 }, { "epoch": 0.021377457048823937, "grad_norm": 0.0006281784153543413, "learning_rate": 4.78059956936137e-05, "loss": 46.0, "step": 1062 }, { "epoch": 0.021397586481073302, "grad_norm": 0.0009794096695259213, "learning_rate": 4.7634214513851125e-05, "loss": 46.0, "step": 1063 }, { "epoch": 0.021417715913322664, "grad_norm": 0.0011547215981408954, "learning_rate": 4.746264597899792e-05, "loss": 46.0, "step": 1064 }, { "epoch": 0.02143784534557203, "grad_norm": 0.0010263699805364013, "learning_rate": 4.7291290785752795e-05, "loss": 46.0, "step": 1065 }, { "epoch": 0.021457974777821392, "grad_norm": 0.0010340644512325525, "learning_rate": 4.7120149629948185e-05, "loss": 46.0, "step": 1066 }, { "epoch": 0.021478104210070754, "grad_norm": 0.0010057146428152919, "learning_rate": 4.694922320654727e-05, "loss": 46.0, "step": 1067 }, { "epoch": 0.02149823364232012, "grad_norm": 0.0014495252398774028, "learning_rate": 4.677851220964136e-05, "loss": 46.0, "step": 1068 }, { "epoch": 0.02151836307456948, "grad_norm": 0.0006856170948594809, "learning_rate": 4.660801733244685e-05, "loss": 46.0, "step": 1069 }, { "epoch": 0.021538492506818847, "grad_norm": 0.0015510255470871925, "learning_rate": 4.643773926730267e-05, "loss": 46.0, "step": 1070 }, { "epoch": 0.02155862193906821, "grad_norm": 0.0007154226186685264, "learning_rate": 4.626767870566722e-05, "loss": 46.0, "step": 1071 }, { "epoch": 0.02157875137131757, "grad_norm": 0.0010833822889253497, "learning_rate": 4.6097836338115626e-05, "loss": 46.0, "step": 1072 }, { "epoch": 0.021598880803566936, "grad_norm": 0.001424678135663271, "learning_rate": 4.592821285433716e-05, "loss": 46.0, "step": 1073 }, { "epoch": 0.021619010235816298, "grad_norm": 0.0014538168907165527, "learning_rate": 4.575880894313207e-05, "loss": 46.0, "step": 1074 }, { "epoch": 0.021639139668065663, "grad_norm": 0.0019246512092649937, "learning_rate": 4.558962529240913e-05, "loss": 46.0, "step": 1075 }, { "epoch": 0.021659269100315025, "grad_norm": 0.0011248672381043434, "learning_rate": 4.5420662589182525e-05, "loss": 46.0, "step": 1076 }, { "epoch": 0.021679398532564387, "grad_norm": 0.0008184146718122065, "learning_rate": 4.5251921519569404e-05, "loss": 46.0, "step": 1077 }, { "epoch": 0.021699527964813753, "grad_norm": 0.0008564481395296752, "learning_rate": 4.508340276878679e-05, "loss": 46.0, "step": 1078 }, { "epoch": 0.021719657397063115, "grad_norm": 0.001509159803390503, "learning_rate": 4.491510702114894e-05, "loss": 46.0, "step": 1079 }, { "epoch": 0.02173978682931248, "grad_norm": 0.0011579337297007442, "learning_rate": 4.474703496006455e-05, "loss": 46.0, "step": 1080 }, { "epoch": 0.021759916261561842, "grad_norm": 0.0016495742602273822, "learning_rate": 4.457918726803404e-05, "loss": 46.0, "step": 1081 }, { "epoch": 0.021780045693811208, "grad_norm": 0.002713642781600356, "learning_rate": 4.441156462664663e-05, "loss": 46.0, "step": 1082 }, { "epoch": 0.02180017512606057, "grad_norm": 0.0008418328943662345, "learning_rate": 4.424416771657778e-05, "loss": 46.0, "step": 1083 }, { "epoch": 0.02182030455830993, "grad_norm": 0.0012767758453264832, "learning_rate": 4.407699721758614e-05, "loss": 46.0, "step": 1084 }, { "epoch": 0.021840433990559297, "grad_norm": 0.0016243146965280175, "learning_rate": 4.391005380851115e-05, "loss": 46.0, "step": 1085 }, { "epoch": 0.02186056342280866, "grad_norm": 0.0008095060475170612, "learning_rate": 4.3743338167269955e-05, "loss": 46.0, "step": 1086 }, { "epoch": 0.021880692855058025, "grad_norm": 0.0006309272139333189, "learning_rate": 4.35768509708548e-05, "loss": 46.0, "step": 1087 }, { "epoch": 0.021900822287307387, "grad_norm": 0.0012147061061114073, "learning_rate": 4.3410592895330385e-05, "loss": 46.0, "step": 1088 }, { "epoch": 0.02192095171955675, "grad_norm": 0.0011163427261635661, "learning_rate": 4.324456461583084e-05, "loss": 46.0, "step": 1089 }, { "epoch": 0.021941081151806114, "grad_norm": 0.0011350977001711726, "learning_rate": 4.30787668065573e-05, "loss": 46.0, "step": 1090 }, { "epoch": 0.021961210584055476, "grad_norm": 0.0018085506744682789, "learning_rate": 4.291320014077488e-05, "loss": 46.0, "step": 1091 }, { "epoch": 0.02198134001630484, "grad_norm": 0.0012762745609506965, "learning_rate": 4.274786529081023e-05, "loss": 46.0, "step": 1092 }, { "epoch": 0.022001469448554203, "grad_norm": 0.0016571565065532923, "learning_rate": 4.2582762928048516e-05, "loss": 46.0, "step": 1093 }, { "epoch": 0.022021598880803565, "grad_norm": 0.0008646156056784093, "learning_rate": 4.241789372293087e-05, "loss": 46.0, "step": 1094 }, { "epoch": 0.02204172831305293, "grad_norm": 0.0007551155867986381, "learning_rate": 4.2253258344951616e-05, "loss": 46.0, "step": 1095 }, { "epoch": 0.022061857745302293, "grad_norm": 0.0014457149663940072, "learning_rate": 4.208885746265565e-05, "loss": 46.0, "step": 1096 }, { "epoch": 0.022081987177551658, "grad_norm": 0.0011477636871859431, "learning_rate": 4.1924691743635504e-05, "loss": 46.0, "step": 1097 }, { "epoch": 0.02210211660980102, "grad_norm": 0.0014144850429147482, "learning_rate": 4.1760761854528886e-05, "loss": 46.0, "step": 1098 }, { "epoch": 0.022122246042050386, "grad_norm": 0.0015863333828747272, "learning_rate": 4.159706846101574e-05, "loss": 46.0, "step": 1099 }, { "epoch": 0.022142375474299748, "grad_norm": 0.0018363846465945244, "learning_rate": 4.14336122278158e-05, "loss": 46.0, "step": 1100 }, { "epoch": 0.02216250490654911, "grad_norm": 0.0014002359239384532, "learning_rate": 4.127039381868561e-05, "loss": 46.0, "step": 1101 }, { "epoch": 0.022182634338798475, "grad_norm": 0.0017210880760103464, "learning_rate": 4.1107413896416026e-05, "loss": 46.0, "step": 1102 }, { "epoch": 0.022202763771047837, "grad_norm": 0.0009532097028568387, "learning_rate": 4.0944673122829515e-05, "loss": 46.0, "step": 1103 }, { "epoch": 0.022222893203297202, "grad_norm": 0.0010593609185889363, "learning_rate": 4.0782172158777296e-05, "loss": 46.0, "step": 1104 }, { "epoch": 0.022243022635546564, "grad_norm": 0.0011959448456764221, "learning_rate": 4.0619911664136935e-05, "loss": 46.0, "step": 1105 }, { "epoch": 0.022263152067795926, "grad_norm": 0.0016879923641681671, "learning_rate": 4.045789229780935e-05, "loss": 46.0, "step": 1106 }, { "epoch": 0.022283281500045292, "grad_norm": 0.002184116980060935, "learning_rate": 4.029611471771646e-05, "loss": 46.0, "step": 1107 }, { "epoch": 0.022303410932294654, "grad_norm": 0.0009731932077556849, "learning_rate": 4.0134579580798196e-05, "loss": 46.0, "step": 1108 }, { "epoch": 0.02232354036454402, "grad_norm": 0.0014566800091415644, "learning_rate": 3.9973287543010064e-05, "loss": 46.0, "step": 1109 }, { "epoch": 0.02234366979679338, "grad_norm": 0.0013168570585548878, "learning_rate": 3.981223925932036e-05, "loss": 46.0, "step": 1110 }, { "epoch": 0.022363799229042743, "grad_norm": 0.0014629282522946596, "learning_rate": 3.965143538370763e-05, "loss": 46.0, "step": 1111 }, { "epoch": 0.02238392866129211, "grad_norm": 0.00042389618465676904, "learning_rate": 3.949087656915784e-05, "loss": 46.0, "step": 1112 }, { "epoch": 0.02240405809354147, "grad_norm": 0.0019659867975860834, "learning_rate": 3.933056346766194e-05, "loss": 46.0, "step": 1113 }, { "epoch": 0.022424187525790836, "grad_norm": 0.000607995898462832, "learning_rate": 3.9170496730212944e-05, "loss": 46.0, "step": 1114 }, { "epoch": 0.022444316958040198, "grad_norm": 0.0007831425755284727, "learning_rate": 3.901067700680361e-05, "loss": 46.0, "step": 1115 }, { "epoch": 0.022464446390289564, "grad_norm": 0.0012595128500834107, "learning_rate": 3.885110494642349e-05, "loss": 46.0, "step": 1116 }, { "epoch": 0.022484575822538926, "grad_norm": 0.0016614568885415792, "learning_rate": 3.869178119705648e-05, "loss": 46.0, "step": 1117 }, { "epoch": 0.022504705254788288, "grad_norm": 0.001990359975025058, "learning_rate": 3.853270640567821e-05, "loss": 46.0, "step": 1118 }, { "epoch": 0.022524834687037653, "grad_norm": 0.0012406391324475408, "learning_rate": 3.837388121825323e-05, "loss": 46.0, "step": 1119 }, { "epoch": 0.022544964119287015, "grad_norm": 0.0009122318006120622, "learning_rate": 3.8215306279732654e-05, "loss": 46.0, "step": 1120 }, { "epoch": 0.02256509355153638, "grad_norm": 0.0009612272842787206, "learning_rate": 3.805698223405124e-05, "loss": 46.0, "step": 1121 }, { "epoch": 0.022585222983785742, "grad_norm": 0.0005490859039127827, "learning_rate": 3.789890972412512e-05, "loss": 46.0, "step": 1122 }, { "epoch": 0.022605352416035104, "grad_norm": 0.0007892982102930546, "learning_rate": 3.774108939184874e-05, "loss": 46.0, "step": 1123 }, { "epoch": 0.02262548184828447, "grad_norm": 0.0013402944896370173, "learning_rate": 3.7583521878092766e-05, "loss": 46.0, "step": 1124 }, { "epoch": 0.022645611280533832, "grad_norm": 0.001181807485409081, "learning_rate": 3.7426207822701055e-05, "loss": 46.0, "step": 1125 }, { "epoch": 0.022665740712783197, "grad_norm": 0.0008717543096281588, "learning_rate": 3.7269147864488366e-05, "loss": 46.0, "step": 1126 }, { "epoch": 0.02268587014503256, "grad_norm": 0.0019186298595741391, "learning_rate": 3.711234264123747e-05, "loss": 46.0, "step": 1127 }, { "epoch": 0.02270599957728192, "grad_norm": 0.0011491653276607394, "learning_rate": 3.695579278969693e-05, "loss": 46.0, "step": 1128 }, { "epoch": 0.022726129009531287, "grad_norm": 0.0029499900992959738, "learning_rate": 3.679949894557808e-05, "loss": 46.0, "step": 1129 }, { "epoch": 0.02274625844178065, "grad_norm": 0.0024853902868926525, "learning_rate": 3.664346174355285e-05, "loss": 46.0, "step": 1130 }, { "epoch": 0.022766387874030014, "grad_norm": 0.0006707283901050687, "learning_rate": 3.648768181725093e-05, "loss": 46.0, "step": 1131 }, { "epoch": 0.022786517306279376, "grad_norm": 0.0017571687931194901, "learning_rate": 3.6332159799257205e-05, "loss": 46.0, "step": 1132 }, { "epoch": 0.02280664673852874, "grad_norm": 0.0012608080869540572, "learning_rate": 3.617689632110942e-05, "loss": 46.0, "step": 1133 }, { "epoch": 0.022826776170778103, "grad_norm": 0.002300672000274062, "learning_rate": 3.60218920132953e-05, "loss": 46.0, "step": 1134 }, { "epoch": 0.022846905603027465, "grad_norm": 0.0005551140056923032, "learning_rate": 3.586714750525026e-05, "loss": 46.0, "step": 1135 }, { "epoch": 0.02286703503527683, "grad_norm": 0.0018374003702774644, "learning_rate": 3.571266342535461e-05, "loss": 46.0, "step": 1136 }, { "epoch": 0.022887164467526193, "grad_norm": 0.0016562079545110464, "learning_rate": 3.555844040093129e-05, "loss": 46.0, "step": 1137 }, { "epoch": 0.02290729389977556, "grad_norm": 0.0018418794497847557, "learning_rate": 3.540447905824293e-05, "loss": 46.0, "step": 1138 }, { "epoch": 0.02292742333202492, "grad_norm": 0.00215694191865623, "learning_rate": 3.525078002248974e-05, "loss": 46.0, "step": 1139 }, { "epoch": 0.022947552764274282, "grad_norm": 0.0012009814381599426, "learning_rate": 3.509734391780663e-05, "loss": 46.0, "step": 1140 }, { "epoch": 0.022967682196523648, "grad_norm": 0.0014138978440314531, "learning_rate": 3.494417136726091e-05, "loss": 46.0, "step": 1141 }, { "epoch": 0.02298781162877301, "grad_norm": 0.0006015094695612788, "learning_rate": 3.479126299284957e-05, "loss": 46.0, "step": 1142 }, { "epoch": 0.023007941061022375, "grad_norm": 0.0021540354937314987, "learning_rate": 3.463861941549693e-05, "loss": 46.0, "step": 1143 }, { "epoch": 0.023028070493271737, "grad_norm": 0.0009588591638021171, "learning_rate": 3.448624125505194e-05, "loss": 46.0, "step": 1144 }, { "epoch": 0.0230481999255211, "grad_norm": 0.0016799316508695483, "learning_rate": 3.433412913028585e-05, "loss": 46.0, "step": 1145 }, { "epoch": 0.023068329357770465, "grad_norm": 0.0027795052155852318, "learning_rate": 3.418228365888955e-05, "loss": 46.0, "step": 1146 }, { "epoch": 0.023088458790019827, "grad_norm": 0.001736775622703135, "learning_rate": 3.403070545747107e-05, "loss": 46.0, "step": 1147 }, { "epoch": 0.023108588222269192, "grad_norm": 0.0009725184645503759, "learning_rate": 3.3879395141553225e-05, "loss": 46.0, "step": 1148 }, { "epoch": 0.023128717654518554, "grad_norm": 0.001233744085766375, "learning_rate": 3.3728353325570915e-05, "loss": 46.0, "step": 1149 }, { "epoch": 0.02314884708676792, "grad_norm": 0.0011711488477885723, "learning_rate": 3.357758062286883e-05, "loss": 46.0, "step": 1150 }, { "epoch": 0.02316897651901728, "grad_norm": 0.0012906735064461827, "learning_rate": 3.342707764569873e-05, "loss": 46.0, "step": 1151 }, { "epoch": 0.023189105951266643, "grad_norm": 0.0016261962009593844, "learning_rate": 3.327684500521724e-05, "loss": 46.0, "step": 1152 }, { "epoch": 0.02320923538351601, "grad_norm": 0.0019309535855427384, "learning_rate": 3.312688331148299e-05, "loss": 46.0, "step": 1153 }, { "epoch": 0.02322936481576537, "grad_norm": 0.0018105946946889162, "learning_rate": 3.29771931734546e-05, "loss": 46.0, "step": 1154 }, { "epoch": 0.023249494248014736, "grad_norm": 0.0008983553270809352, "learning_rate": 3.282777519898779e-05, "loss": 46.0, "step": 1155 }, { "epoch": 0.023269623680264098, "grad_norm": 0.0006004321039654315, "learning_rate": 3.267862999483318e-05, "loss": 46.0, "step": 1156 }, { "epoch": 0.02328975311251346, "grad_norm": 0.0009507157956250012, "learning_rate": 3.252975816663375e-05, "loss": 46.0, "step": 1157 }, { "epoch": 0.023309882544762826, "grad_norm": 0.0011066242586821318, "learning_rate": 3.238116031892227e-05, "loss": 46.0, "step": 1158 }, { "epoch": 0.023330011977012188, "grad_norm": 0.0016599443042650819, "learning_rate": 3.223283705511908e-05, "loss": 46.0, "step": 1159 }, { "epoch": 0.023350141409261553, "grad_norm": 0.0013380105374380946, "learning_rate": 3.208478897752931e-05, "loss": 46.0, "step": 1160 }, { "epoch": 0.023370270841510915, "grad_norm": 0.0011801571818068624, "learning_rate": 3.193701668734083e-05, "loss": 46.0, "step": 1161 }, { "epoch": 0.023390400273760277, "grad_norm": 0.00122344761621207, "learning_rate": 3.178952078462143e-05, "loss": 46.0, "step": 1162 }, { "epoch": 0.023410529706009642, "grad_norm": 0.0011970382183790207, "learning_rate": 3.164230186831671e-05, "loss": 46.0, "step": 1163 }, { "epoch": 0.023430659138259004, "grad_norm": 0.0010325321927666664, "learning_rate": 3.149536053624735e-05, "loss": 46.0, "step": 1164 }, { "epoch": 0.02345078857050837, "grad_norm": 0.0016847345978021622, "learning_rate": 3.134869738510697e-05, "loss": 46.0, "step": 1165 }, { "epoch": 0.023470918002757732, "grad_norm": 0.0009486636845394969, "learning_rate": 3.1202313010459414e-05, "loss": 46.0, "step": 1166 }, { "epoch": 0.023491047435007097, "grad_norm": 0.0013974226312711835, "learning_rate": 3.1056208006736634e-05, "loss": 46.0, "step": 1167 }, { "epoch": 0.02351117686725646, "grad_norm": 0.002067785942927003, "learning_rate": 3.0910382967235995e-05, "loss": 46.0, "step": 1168 }, { "epoch": 0.02353130629950582, "grad_norm": 0.0010473356815055013, "learning_rate": 3.076483848411803e-05, "loss": 46.0, "step": 1169 }, { "epoch": 0.023551435731755187, "grad_norm": 0.0015565203502774239, "learning_rate": 3.061957514840408e-05, "loss": 46.0, "step": 1170 }, { "epoch": 0.02357156516400455, "grad_norm": 0.002033325145021081, "learning_rate": 3.0474593549973673e-05, "loss": 46.0, "step": 1171 }, { "epoch": 0.023591694596253914, "grad_norm": 0.001210788730531931, "learning_rate": 3.032989427756243e-05, "loss": 46.0, "step": 1172 }, { "epoch": 0.023611824028503276, "grad_norm": 0.0010564669501036406, "learning_rate": 3.0185477918759353e-05, "loss": 46.0, "step": 1173 }, { "epoch": 0.023631953460752638, "grad_norm": 0.0009370659245178103, "learning_rate": 3.0041345060004776e-05, "loss": 46.0, "step": 1174 }, { "epoch": 0.023652082893002004, "grad_norm": 0.0006624764064326882, "learning_rate": 2.989749628658759e-05, "loss": 46.0, "step": 1175 }, { "epoch": 0.023672212325251366, "grad_norm": 0.002800694201141596, "learning_rate": 2.9753932182643272e-05, "loss": 46.0, "step": 1176 }, { "epoch": 0.02369234175750073, "grad_norm": 0.001191929099150002, "learning_rate": 2.9610653331151216e-05, "loss": 46.0, "step": 1177 }, { "epoch": 0.023712471189750093, "grad_norm": 0.0008834595791995525, "learning_rate": 2.9467660313932565e-05, "loss": 46.0, "step": 1178 }, { "epoch": 0.023732600621999455, "grad_norm": 0.0013140215305611491, "learning_rate": 2.932495371164764e-05, "loss": 46.0, "step": 1179 }, { "epoch": 0.023732600621999455, "eval_loss": 11.5, "eval_runtime": 126.7638, "eval_samples_per_second": 165.016, "eval_steps_per_second": 82.508, "step": 1179 }, { "epoch": 0.02375273005424882, "grad_norm": 0.00120261637493968, "learning_rate": 2.9182534103793825e-05, "loss": 46.0, "step": 1180 }, { "epoch": 0.023772859486498182, "grad_norm": 0.0013184483395889401, "learning_rate": 2.9040402068702977e-05, "loss": 46.0, "step": 1181 }, { "epoch": 0.023792988918747548, "grad_norm": 0.0016656328225508332, "learning_rate": 2.8898558183539283e-05, "loss": 46.0, "step": 1182 }, { "epoch": 0.02381311835099691, "grad_norm": 0.0012708749854937196, "learning_rate": 2.8757003024296768e-05, "loss": 46.0, "step": 1183 }, { "epoch": 0.023833247783246275, "grad_norm": 0.0014254259876906872, "learning_rate": 2.8615737165796974e-05, "loss": 46.0, "step": 1184 }, { "epoch": 0.023853377215495637, "grad_norm": 0.0012592601124197245, "learning_rate": 2.8474761181686772e-05, "loss": 46.0, "step": 1185 }, { "epoch": 0.023873506647745, "grad_norm": 0.001736863050609827, "learning_rate": 2.8334075644435807e-05, "loss": 46.0, "step": 1186 }, { "epoch": 0.023893636079994365, "grad_norm": 0.0012428145855665207, "learning_rate": 2.8193681125334393e-05, "loss": 46.0, "step": 1187 }, { "epoch": 0.023913765512243727, "grad_norm": 0.001219391357153654, "learning_rate": 2.805357819449098e-05, "loss": 46.0, "step": 1188 }, { "epoch": 0.023933894944493092, "grad_norm": 0.0018028883496299386, "learning_rate": 2.7913767420830105e-05, "loss": 46.0, "step": 1189 }, { "epoch": 0.023954024376742454, "grad_norm": 0.001996217295527458, "learning_rate": 2.7774249372089688e-05, "loss": 46.0, "step": 1190 }, { "epoch": 0.023974153808991816, "grad_norm": 0.0012688592541962862, "learning_rate": 2.7635024614819205e-05, "loss": 46.0, "step": 1191 }, { "epoch": 0.02399428324124118, "grad_norm": 0.0006761057302355766, "learning_rate": 2.749609371437697e-05, "loss": 46.0, "step": 1192 }, { "epoch": 0.024014412673490543, "grad_norm": 0.0008217204012908041, "learning_rate": 2.735745723492815e-05, "loss": 46.0, "step": 1193 }, { "epoch": 0.02403454210573991, "grad_norm": 0.0008044593851082027, "learning_rate": 2.7219115739442215e-05, "loss": 46.0, "step": 1194 }, { "epoch": 0.02405467153798927, "grad_norm": 0.0011257351143285632, "learning_rate": 2.7081069789690883e-05, "loss": 46.0, "step": 1195 }, { "epoch": 0.024074800970238633, "grad_norm": 0.001486622029915452, "learning_rate": 2.694331994624567e-05, "loss": 46.0, "step": 1196 }, { "epoch": 0.024094930402488, "grad_norm": 0.0016462607309222221, "learning_rate": 2.6805866768475663e-05, "loss": 46.0, "step": 1197 }, { "epoch": 0.02411505983473736, "grad_norm": 0.0006136257434263825, "learning_rate": 2.666871081454533e-05, "loss": 46.0, "step": 1198 }, { "epoch": 0.024135189266986726, "grad_norm": 0.0008609068463556468, "learning_rate": 2.6531852641412082e-05, "loss": 46.0, "step": 1199 }, { "epoch": 0.024155318699236088, "grad_norm": 0.0013050955021753907, "learning_rate": 2.6395292804824244e-05, "loss": 46.0, "step": 1200 }, { "epoch": 0.024175448131485453, "grad_norm": 0.00126195780467242, "learning_rate": 2.625903185931853e-05, "loss": 46.0, "step": 1201 }, { "epoch": 0.024195577563734815, "grad_norm": 0.001260231714695692, "learning_rate": 2.612307035821805e-05, "loss": 46.0, "step": 1202 }, { "epoch": 0.024215706995984177, "grad_norm": 0.0008337153121829033, "learning_rate": 2.5987408853629846e-05, "loss": 46.0, "step": 1203 }, { "epoch": 0.024235836428233543, "grad_norm": 0.0017989326734095812, "learning_rate": 2.5852047896442853e-05, "loss": 46.0, "step": 1204 }, { "epoch": 0.024255965860482905, "grad_norm": 0.0008393581956624985, "learning_rate": 2.5716988036325386e-05, "loss": 46.0, "step": 1205 }, { "epoch": 0.02427609529273227, "grad_norm": 0.001092239166609943, "learning_rate": 2.5582229821723257e-05, "loss": 46.0, "step": 1206 }, { "epoch": 0.024296224724981632, "grad_norm": 0.0011140386341139674, "learning_rate": 2.5447773799857244e-05, "loss": 46.0, "step": 1207 }, { "epoch": 0.024316354157230994, "grad_norm": 0.0014232645044103265, "learning_rate": 2.5313620516721105e-05, "loss": 46.0, "step": 1208 }, { "epoch": 0.02433648358948036, "grad_norm": 0.0015523895854130387, "learning_rate": 2.5179770517079093e-05, "loss": 46.0, "step": 1209 }, { "epoch": 0.02435661302172972, "grad_norm": 0.0020029693841934204, "learning_rate": 2.5046224344464074e-05, "loss": 46.0, "step": 1210 }, { "epoch": 0.024376742453979087, "grad_norm": 0.0009358267998322845, "learning_rate": 2.4912982541175033e-05, "loss": 46.0, "step": 1211 }, { "epoch": 0.02439687188622845, "grad_norm": 0.0009875150863081217, "learning_rate": 2.4780045648274975e-05, "loss": 46.0, "step": 1212 }, { "epoch": 0.02441700131847781, "grad_norm": 0.0015209922567009926, "learning_rate": 2.4647414205588827e-05, "loss": 46.0, "step": 1213 }, { "epoch": 0.024437130750727176, "grad_norm": 0.0008846839773468673, "learning_rate": 2.451508875170104e-05, "loss": 46.0, "step": 1214 }, { "epoch": 0.024457260182976538, "grad_norm": 0.0009960222523659468, "learning_rate": 2.4383069823953663e-05, "loss": 46.0, "step": 1215 }, { "epoch": 0.024477389615225904, "grad_norm": 0.001352517050690949, "learning_rate": 2.4251357958443867e-05, "loss": 46.0, "step": 1216 }, { "epoch": 0.024497519047475266, "grad_norm": 0.0013506961986422539, "learning_rate": 2.4119953690022025e-05, "loss": 46.0, "step": 1217 }, { "epoch": 0.02451764847972463, "grad_norm": 0.0009574260911904275, "learning_rate": 2.3988857552289333e-05, "loss": 46.0, "step": 1218 }, { "epoch": 0.024537777911973993, "grad_norm": 0.0007269601919688284, "learning_rate": 2.3858070077595908e-05, "loss": 46.0, "step": 1219 }, { "epoch": 0.024557907344223355, "grad_norm": 0.0006699137156829238, "learning_rate": 2.372759179703822e-05, "loss": 46.0, "step": 1220 }, { "epoch": 0.02457803677647272, "grad_norm": 0.0015291008166968822, "learning_rate": 2.3597423240457395e-05, "loss": 46.0, "step": 1221 }, { "epoch": 0.024598166208722082, "grad_norm": 0.002010623225942254, "learning_rate": 2.3467564936436703e-05, "loss": 46.0, "step": 1222 }, { "epoch": 0.024618295640971448, "grad_norm": 0.0008198167197406292, "learning_rate": 2.3338017412299663e-05, "loss": 46.0, "step": 1223 }, { "epoch": 0.02463842507322081, "grad_norm": 0.0006754023488610983, "learning_rate": 2.3208781194107664e-05, "loss": 46.0, "step": 1224 }, { "epoch": 0.024658554505470172, "grad_norm": 0.0009974197018891573, "learning_rate": 2.3079856806658107e-05, "loss": 46.0, "step": 1225 }, { "epoch": 0.024678683937719537, "grad_norm": 0.0012742577819153666, "learning_rate": 2.2951244773481995e-05, "loss": 46.0, "step": 1226 }, { "epoch": 0.0246988133699689, "grad_norm": 0.0013213262427598238, "learning_rate": 2.2822945616841963e-05, "loss": 46.0, "step": 1227 }, { "epoch": 0.024718942802218265, "grad_norm": 0.0006177601171657443, "learning_rate": 2.2694959857730204e-05, "loss": 46.0, "step": 1228 }, { "epoch": 0.024739072234467627, "grad_norm": 0.0009409788763150573, "learning_rate": 2.256728801586616e-05, "loss": 46.0, "step": 1229 }, { "epoch": 0.02475920166671699, "grad_norm": 0.001638015266507864, "learning_rate": 2.2439930609694658e-05, "loss": 46.0, "step": 1230 }, { "epoch": 0.024779331098966354, "grad_norm": 0.000653863069601357, "learning_rate": 2.231288815638356e-05, "loss": 46.0, "step": 1231 }, { "epoch": 0.024799460531215716, "grad_norm": 0.001285205944441259, "learning_rate": 2.2186161171821885e-05, "loss": 46.0, "step": 1232 }, { "epoch": 0.02481958996346508, "grad_norm": 0.001490448834374547, "learning_rate": 2.2059750170617523e-05, "loss": 46.0, "step": 1233 }, { "epoch": 0.024839719395714444, "grad_norm": 0.0018423368455842137, "learning_rate": 2.1933655666095275e-05, "loss": 46.0, "step": 1234 }, { "epoch": 0.02485984882796381, "grad_norm": 0.0011196645209565759, "learning_rate": 2.180787817029468e-05, "loss": 46.0, "step": 1235 }, { "epoch": 0.02487997826021317, "grad_norm": 0.000826796458568424, "learning_rate": 2.168241819396808e-05, "loss": 46.0, "step": 1236 }, { "epoch": 0.024900107692462533, "grad_norm": 0.0005586759652942419, "learning_rate": 2.1557276246578307e-05, "loss": 46.0, "step": 1237 }, { "epoch": 0.0249202371247119, "grad_norm": 0.0011236952850595117, "learning_rate": 2.1432452836296914e-05, "loss": 46.0, "step": 1238 }, { "epoch": 0.02494036655696126, "grad_norm": 0.001449284260161221, "learning_rate": 2.1307948470001782e-05, "loss": 46.0, "step": 1239 }, { "epoch": 0.024960495989210626, "grad_norm": 0.0007015886367298663, "learning_rate": 2.1183763653275378e-05, "loss": 46.0, "step": 1240 }, { "epoch": 0.024980625421459988, "grad_norm": 0.0011342605575919151, "learning_rate": 2.1059898890402473e-05, "loss": 46.0, "step": 1241 }, { "epoch": 0.02500075485370935, "grad_norm": 0.0012565052602440119, "learning_rate": 2.0936354684368163e-05, "loss": 46.0, "step": 1242 }, { "epoch": 0.025020884285958715, "grad_norm": 0.001035231165587902, "learning_rate": 2.0813131536855913e-05, "loss": 46.0, "step": 1243 }, { "epoch": 0.025041013718208077, "grad_norm": 0.0019417139701545238, "learning_rate": 2.0690229948245365e-05, "loss": 46.0, "step": 1244 }, { "epoch": 0.025061143150457443, "grad_norm": 0.0015144539065659046, "learning_rate": 2.0567650417610485e-05, "loss": 46.0, "step": 1245 }, { "epoch": 0.025081272582706805, "grad_norm": 0.0014989918563514948, "learning_rate": 2.0445393442717308e-05, "loss": 46.0, "step": 1246 }, { "epoch": 0.025101402014956167, "grad_norm": 0.0018054774263873696, "learning_rate": 2.032345952002218e-05, "loss": 46.0, "step": 1247 }, { "epoch": 0.025121531447205532, "grad_norm": 0.0009529809467494488, "learning_rate": 2.0201849144669525e-05, "loss": 46.0, "step": 1248 }, { "epoch": 0.025141660879454894, "grad_norm": 0.002208658494055271, "learning_rate": 2.0080562810489935e-05, "loss": 46.0, "step": 1249 }, { "epoch": 0.02516179031170426, "grad_norm": 0.0007078711641952395, "learning_rate": 1.995960100999814e-05, "loss": 46.0, "step": 1250 }, { "epoch": 0.02518191974395362, "grad_norm": 0.0010136510245501995, "learning_rate": 1.983896423439109e-05, "loss": 46.0, "step": 1251 }, { "epoch": 0.025202049176202987, "grad_norm": 0.0018538066651672125, "learning_rate": 1.9718652973545758e-05, "loss": 46.0, "step": 1252 }, { "epoch": 0.02522217860845235, "grad_norm": 0.0011417543282732368, "learning_rate": 1.9598667716017417e-05, "loss": 46.0, "step": 1253 }, { "epoch": 0.02524230804070171, "grad_norm": 0.000996634247712791, "learning_rate": 1.947900894903739e-05, "loss": 46.0, "step": 1254 }, { "epoch": 0.025262437472951076, "grad_norm": 0.001392628182657063, "learning_rate": 1.9359677158511334e-05, "loss": 46.0, "step": 1255 }, { "epoch": 0.02528256690520044, "grad_norm": 0.000720780692063272, "learning_rate": 1.9240672829017014e-05, "loss": 46.0, "step": 1256 }, { "epoch": 0.025302696337449804, "grad_norm": 0.001160190673545003, "learning_rate": 1.9121996443802482e-05, "loss": 46.0, "step": 1257 }, { "epoch": 0.025322825769699166, "grad_norm": 0.002251293743029237, "learning_rate": 1.9003648484784166e-05, "loss": 46.0, "step": 1258 }, { "epoch": 0.025342955201948528, "grad_norm": 0.00043235233169980347, "learning_rate": 1.8885629432544717e-05, "loss": 46.0, "step": 1259 }, { "epoch": 0.025363084634197893, "grad_norm": 0.0007145190611481667, "learning_rate": 1.8767939766331287e-05, "loss": 46.0, "step": 1260 }, { "epoch": 0.025383214066447255, "grad_norm": 0.0006261473754420877, "learning_rate": 1.865057996405336e-05, "loss": 46.0, "step": 1261 }, { "epoch": 0.02540334349869662, "grad_norm": 0.0010795745765790343, "learning_rate": 1.8533550502281015e-05, "loss": 46.0, "step": 1262 }, { "epoch": 0.025423472930945983, "grad_norm": 0.0009830056224018335, "learning_rate": 1.8416851856242868e-05, "loss": 46.0, "step": 1263 }, { "epoch": 0.025443602363195345, "grad_norm": 0.0014814147725701332, "learning_rate": 1.83004844998241e-05, "loss": 46.0, "step": 1264 }, { "epoch": 0.02546373179544471, "grad_norm": 0.0011380594223737717, "learning_rate": 1.8184448905564743e-05, "loss": 46.0, "step": 1265 }, { "epoch": 0.025483861227694072, "grad_norm": 0.0006726674037054181, "learning_rate": 1.8068745544657484e-05, "loss": 46.0, "step": 1266 }, { "epoch": 0.025503990659943437, "grad_norm": 0.0017750106053426862, "learning_rate": 1.7953374886946006e-05, "loss": 46.0, "step": 1267 }, { "epoch": 0.0255241200921928, "grad_norm": 0.0011626784689724445, "learning_rate": 1.7838337400922855e-05, "loss": 46.0, "step": 1268 }, { "epoch": 0.025544249524442165, "grad_norm": 0.0009647434344515204, "learning_rate": 1.772363355372776e-05, "loss": 46.0, "step": 1269 }, { "epoch": 0.025564378956691527, "grad_norm": 0.002181010087952018, "learning_rate": 1.7609263811145537e-05, "loss": 46.0, "step": 1270 }, { "epoch": 0.02558450838894089, "grad_norm": 0.0012630668934434652, "learning_rate": 1.74952286376043e-05, "loss": 46.0, "step": 1271 }, { "epoch": 0.025604637821190254, "grad_norm": 0.0009720325469970703, "learning_rate": 1.738152849617356e-05, "loss": 46.0, "step": 1272 }, { "epoch": 0.025624767253439616, "grad_norm": 0.0007137717329896986, "learning_rate": 1.72681638485624e-05, "loss": 46.0, "step": 1273 }, { "epoch": 0.02564489668568898, "grad_norm": 0.0014153624651953578, "learning_rate": 1.715513515511743e-05, "loss": 46.0, "step": 1274 }, { "epoch": 0.025665026117938344, "grad_norm": 0.001465531880967319, "learning_rate": 1.7042442874821164e-05, "loss": 46.0, "step": 1275 }, { "epoch": 0.025685155550187706, "grad_norm": 0.0011013116454705596, "learning_rate": 1.693008746528988e-05, "loss": 46.0, "step": 1276 }, { "epoch": 0.02570528498243707, "grad_norm": 0.0005420059314928949, "learning_rate": 1.681806938277205e-05, "loss": 46.0, "step": 1277 }, { "epoch": 0.025725414414686433, "grad_norm": 0.0007358815055340528, "learning_rate": 1.6706389082146244e-05, "loss": 46.0, "step": 1278 }, { "epoch": 0.0257455438469358, "grad_norm": 0.00043846582411788404, "learning_rate": 1.6595047016919373e-05, "loss": 46.0, "step": 1279 }, { "epoch": 0.02576567327918516, "grad_norm": 0.0007551101734861732, "learning_rate": 1.6484043639224955e-05, "loss": 46.0, "step": 1280 }, { "epoch": 0.025785802711434522, "grad_norm": 0.0011386704863980412, "learning_rate": 1.6373379399821033e-05, "loss": 46.0, "step": 1281 }, { "epoch": 0.025805932143683888, "grad_norm": 0.0014110167976468801, "learning_rate": 1.6263054748088658e-05, "loss": 46.0, "step": 1282 }, { "epoch": 0.02582606157593325, "grad_norm": 0.0017660473240539432, "learning_rate": 1.6153070132029723e-05, "loss": 46.0, "step": 1283 }, { "epoch": 0.025846191008182615, "grad_norm": 0.0021693487651646137, "learning_rate": 1.604342599826548e-05, "loss": 46.0, "step": 1284 }, { "epoch": 0.025866320440431977, "grad_norm": 0.001375765772536397, "learning_rate": 1.593412279203447e-05, "loss": 46.0, "step": 1285 }, { "epoch": 0.025886449872681343, "grad_norm": 0.001402435707859695, "learning_rate": 1.5825160957190798e-05, "loss": 46.0, "step": 1286 }, { "epoch": 0.025906579304930705, "grad_norm": 0.002132172929123044, "learning_rate": 1.5716540936202363e-05, "loss": 46.0, "step": 1287 }, { "epoch": 0.025926708737180067, "grad_norm": 0.00138960184995085, "learning_rate": 1.5608263170149095e-05, "loss": 46.0, "step": 1288 }, { "epoch": 0.025946838169429432, "grad_norm": 0.0014584781602025032, "learning_rate": 1.5500328098721017e-05, "loss": 46.0, "step": 1289 }, { "epoch": 0.025966967601678794, "grad_norm": 0.0017797322943806648, "learning_rate": 1.5392736160216635e-05, "loss": 46.0, "step": 1290 }, { "epoch": 0.02598709703392816, "grad_norm": 0.002001388929784298, "learning_rate": 1.5285487791541e-05, "loss": 46.0, "step": 1291 }, { "epoch": 0.02600722646617752, "grad_norm": 0.0019802197348326445, "learning_rate": 1.5178583428204085e-05, "loss": 46.0, "step": 1292 }, { "epoch": 0.026027355898426884, "grad_norm": 0.0018057803390547633, "learning_rate": 1.5072023504318867e-05, "loss": 46.0, "step": 1293 }, { "epoch": 0.02604748533067625, "grad_norm": 0.0009447969496250153, "learning_rate": 1.496580845259965e-05, "loss": 46.0, "step": 1294 }, { "epoch": 0.02606761476292561, "grad_norm": 0.0007863205391913652, "learning_rate": 1.4859938704360365e-05, "loss": 46.0, "step": 1295 }, { "epoch": 0.026087744195174976, "grad_norm": 0.0012318964581936598, "learning_rate": 1.475441468951263e-05, "loss": 46.0, "step": 1296 }, { "epoch": 0.02610787362742434, "grad_norm": 0.001262698438949883, "learning_rate": 1.4649236836564263e-05, "loss": 46.0, "step": 1297 }, { "epoch": 0.0261280030596737, "grad_norm": 0.0010360804153606296, "learning_rate": 1.4544405572617259e-05, "loss": 46.0, "step": 1298 }, { "epoch": 0.026148132491923066, "grad_norm": 0.001501108636148274, "learning_rate": 1.4439921323366323e-05, "loss": 46.0, "step": 1299 }, { "epoch": 0.026168261924172428, "grad_norm": 0.0006216936744749546, "learning_rate": 1.4335784513096929e-05, "loss": 46.0, "step": 1300 }, { "epoch": 0.026188391356421793, "grad_norm": 0.0012842519208788872, "learning_rate": 1.4231995564683732e-05, "loss": 46.0, "step": 1301 }, { "epoch": 0.026208520788671155, "grad_norm": 0.0013369874795898795, "learning_rate": 1.412855489958873e-05, "loss": 46.0, "step": 1302 }, { "epoch": 0.02622865022092052, "grad_norm": 0.0011299944017082453, "learning_rate": 1.4025462937859768e-05, "loss": 46.0, "step": 1303 }, { "epoch": 0.026248779653169883, "grad_norm": 0.0018812668276950717, "learning_rate": 1.3922720098128527e-05, "loss": 46.0, "step": 1304 }, { "epoch": 0.026268909085419245, "grad_norm": 0.001090607256628573, "learning_rate": 1.3820326797609129e-05, "loss": 46.0, "step": 1305 }, { "epoch": 0.02628903851766861, "grad_norm": 0.0012601654743775725, "learning_rate": 1.371828345209618e-05, "loss": 46.0, "step": 1306 }, { "epoch": 0.026309167949917972, "grad_norm": 0.0014517259551212192, "learning_rate": 1.361659047596332e-05, "loss": 46.0, "step": 1307 }, { "epoch": 0.026329297382167337, "grad_norm": 0.0019068201072514057, "learning_rate": 1.3515248282161319e-05, "loss": 46.0, "step": 1308 }, { "epoch": 0.0263494268144167, "grad_norm": 0.0016664571594446898, "learning_rate": 1.3414257282216535e-05, "loss": 46.0, "step": 1309 }, { "epoch": 0.02636955624666606, "grad_norm": 0.0009046939085237682, "learning_rate": 1.3313617886229269e-05, "loss": 46.0, "step": 1310 }, { "epoch": 0.026389685678915427, "grad_norm": 0.0005408989964053035, "learning_rate": 1.3213330502871956e-05, "loss": 46.0, "step": 1311 }, { "epoch": 0.02640981511116479, "grad_norm": 0.0008995769894681871, "learning_rate": 1.3113395539387674e-05, "loss": 46.0, "step": 1312 }, { "epoch": 0.026429944543414154, "grad_norm": 0.0012701263185590506, "learning_rate": 1.3013813401588315e-05, "loss": 46.0, "step": 1313 }, { "epoch": 0.026450073975663516, "grad_norm": 0.001226755790412426, "learning_rate": 1.2914584493853144e-05, "loss": 46.0, "step": 1314 }, { "epoch": 0.026470203407912878, "grad_norm": 0.0015894804382696748, "learning_rate": 1.2815709219126959e-05, "loss": 46.0, "step": 1315 }, { "epoch": 0.026490332840162244, "grad_norm": 0.0014253195840865374, "learning_rate": 1.2717187978918544e-05, "loss": 46.0, "step": 1316 }, { "epoch": 0.026510462272411606, "grad_norm": 0.0009140470647253096, "learning_rate": 1.2619021173299051e-05, "loss": 46.0, "step": 1317 }, { "epoch": 0.02653059170466097, "grad_norm": 0.0010910599958151579, "learning_rate": 1.2521209200900397e-05, "loss": 46.0, "step": 1318 }, { "epoch": 0.026550721136910333, "grad_norm": 0.000989207299426198, "learning_rate": 1.2423752458913518e-05, "loss": 46.0, "step": 1319 }, { "epoch": 0.0265708505691597, "grad_norm": 0.0009602408390492201, "learning_rate": 1.2326651343086937e-05, "loss": 46.0, "step": 1320 }, { "epoch": 0.02659098000140906, "grad_norm": 0.0004408732638694346, "learning_rate": 1.2229906247724998e-05, "loss": 46.0, "step": 1321 }, { "epoch": 0.026611109433658423, "grad_norm": 0.0010518889175727963, "learning_rate": 1.2133517565686381e-05, "loss": 46.0, "step": 1322 }, { "epoch": 0.026631238865907788, "grad_norm": 0.002272763755172491, "learning_rate": 1.2037485688382421e-05, "loss": 46.0, "step": 1323 }, { "epoch": 0.02665136829815715, "grad_norm": 0.0020480677485466003, "learning_rate": 1.1941811005775538e-05, "loss": 46.0, "step": 1324 }, { "epoch": 0.026671497730406515, "grad_norm": 0.002253229497000575, "learning_rate": 1.1846493906377743e-05, "loss": 46.0, "step": 1325 }, { "epoch": 0.026691627162655877, "grad_norm": 0.0012804159196093678, "learning_rate": 1.1751534777248885e-05, "loss": 46.0, "step": 1326 }, { "epoch": 0.02671175659490524, "grad_norm": 0.0019482570933178067, "learning_rate": 1.1656934003995302e-05, "loss": 46.0, "step": 1327 }, { "epoch": 0.026731886027154605, "grad_norm": 0.0008674189448356628, "learning_rate": 1.1562691970768014e-05, "loss": 46.0, "step": 1328 }, { "epoch": 0.026752015459403967, "grad_norm": 0.0017704269848763943, "learning_rate": 1.1468809060261399e-05, "loss": 46.0, "step": 1329 }, { "epoch": 0.026772144891653332, "grad_norm": 0.0014249221421778202, "learning_rate": 1.1375285653711399e-05, "loss": 46.0, "step": 1330 }, { "epoch": 0.026792274323902694, "grad_norm": 0.0009925938211381435, "learning_rate": 1.1282122130894202e-05, "loss": 46.0, "step": 1331 }, { "epoch": 0.026812403756152056, "grad_norm": 0.0008217705762945116, "learning_rate": 1.1189318870124531e-05, "loss": 46.0, "step": 1332 }, { "epoch": 0.02683253318840142, "grad_norm": 0.002136779949069023, "learning_rate": 1.1096876248254228e-05, "loss": 46.0, "step": 1333 }, { "epoch": 0.026852662620650784, "grad_norm": 0.0014067484298720956, "learning_rate": 1.1004794640670602e-05, "loss": 46.0, "step": 1334 }, { "epoch": 0.02687279205290015, "grad_norm": 0.0008450828609056771, "learning_rate": 1.0913074421295022e-05, "loss": 46.0, "step": 1335 }, { "epoch": 0.02689292148514951, "grad_norm": 0.0005786415422335267, "learning_rate": 1.0821715962581302e-05, "loss": 46.0, "step": 1336 }, { "epoch": 0.026913050917398876, "grad_norm": 0.0012445749016478658, "learning_rate": 1.0730719635514296e-05, "loss": 46.0, "step": 1337 }, { "epoch": 0.02693318034964824, "grad_norm": 0.0017225752817466855, "learning_rate": 1.0640085809608257e-05, "loss": 46.0, "step": 1338 }, { "epoch": 0.0269533097818976, "grad_norm": 0.0006431869696825743, "learning_rate": 1.0549814852905427e-05, "loss": 46.0, "step": 1339 }, { "epoch": 0.026973439214146966, "grad_norm": 0.0006889837677590549, "learning_rate": 1.0459907131974578e-05, "loss": 46.0, "step": 1340 }, { "epoch": 0.026993568646396328, "grad_norm": 0.0007062877994030714, "learning_rate": 1.0370363011909368e-05, "loss": 46.0, "step": 1341 }, { "epoch": 0.027013698078645693, "grad_norm": 0.0020865912083536386, "learning_rate": 1.0281182856327075e-05, "loss": 46.0, "step": 1342 }, { "epoch": 0.027033827510895055, "grad_norm": 0.000669913541059941, "learning_rate": 1.019236702736689e-05, "loss": 46.0, "step": 1343 }, { "epoch": 0.027053956943144417, "grad_norm": 0.001416582614183426, "learning_rate": 1.0103915885688686e-05, "loss": 46.0, "step": 1344 }, { "epoch": 0.027074086375393783, "grad_norm": 0.0017289246898144484, "learning_rate": 1.0015829790471288e-05, "loss": 46.0, "step": 1345 }, { "epoch": 0.027094215807643145, "grad_norm": 0.0016691044438630342, "learning_rate": 9.928109099411265e-06, "loss": 46.0, "step": 1346 }, { "epoch": 0.02711434523989251, "grad_norm": 0.0008188973879441619, "learning_rate": 9.840754168721289e-06, "loss": 46.0, "step": 1347 }, { "epoch": 0.027134474672141872, "grad_norm": 0.0006958742160350084, "learning_rate": 9.753765353128863e-06, "loss": 46.0, "step": 1348 }, { "epoch": 0.027154604104391234, "grad_norm": 0.0026792276185005903, "learning_rate": 9.667143005874679e-06, "loss": 46.0, "step": 1349 }, { "epoch": 0.0271747335366406, "grad_norm": 0.0012797150993719697, "learning_rate": 9.580887478711376e-06, "loss": 46.0, "step": 1350 }, { "epoch": 0.02719486296888996, "grad_norm": 0.0014746271772310138, "learning_rate": 9.494999121901948e-06, "loss": 46.0, "step": 1351 }, { "epoch": 0.027214992401139327, "grad_norm": 0.0014366628602147102, "learning_rate": 9.409478284218465e-06, "loss": 46.0, "step": 1352 }, { "epoch": 0.02723512183338869, "grad_norm": 0.0005030794418416917, "learning_rate": 9.32432531294054e-06, "loss": 46.0, "step": 1353 }, { "epoch": 0.027255251265638054, "grad_norm": 0.001364423893392086, "learning_rate": 9.239540553853987e-06, "loss": 46.0, "step": 1354 }, { "epoch": 0.027275380697887416, "grad_norm": 0.002308462280780077, "learning_rate": 9.155124351249434e-06, "loss": 46.0, "step": 1355 }, { "epoch": 0.02729551013013678, "grad_norm": 0.0006031619850546122, "learning_rate": 9.071077047920807e-06, "loss": 46.0, "step": 1356 }, { "epoch": 0.027315639562386144, "grad_norm": 0.0008249651291407645, "learning_rate": 8.987398985164108e-06, "loss": 46.0, "step": 1357 }, { "epoch": 0.027335768994635506, "grad_norm": 0.0013380757300183177, "learning_rate": 8.904090502775875e-06, "loss": 46.0, "step": 1358 }, { "epoch": 0.02735589842688487, "grad_norm": 0.0014677924336865544, "learning_rate": 8.821151939051953e-06, "loss": 46.0, "step": 1359 }, { "epoch": 0.027376027859134233, "grad_norm": 0.0010561698582023382, "learning_rate": 8.73858363078589e-06, "loss": 46.0, "step": 1360 }, { "epoch": 0.027396157291383595, "grad_norm": 0.0006301281973719597, "learning_rate": 8.656385913267872e-06, "loss": 46.0, "step": 1361 }, { "epoch": 0.02741628672363296, "grad_norm": 0.0013005957007408142, "learning_rate": 8.574559120283099e-06, "loss": 46.0, "step": 1362 }, { "epoch": 0.027436416155882323, "grad_norm": 0.0025409117806702852, "learning_rate": 8.493103584110595e-06, "loss": 46.0, "step": 1363 }, { "epoch": 0.027456545588131688, "grad_norm": 0.0010066272225230932, "learning_rate": 8.412019635521784e-06, "loss": 46.0, "step": 1364 }, { "epoch": 0.02747667502038105, "grad_norm": 0.0008767693652771413, "learning_rate": 8.331307603779137e-06, "loss": 46.0, "step": 1365 }, { "epoch": 0.027496804452630412, "grad_norm": 0.0005324966041371226, "learning_rate": 8.250967816634914e-06, "loss": 46.0, "step": 1366 }, { "epoch": 0.027516933884879777, "grad_norm": 0.0013536482583731413, "learning_rate": 8.171000600329682e-06, "loss": 46.0, "step": 1367 }, { "epoch": 0.02753706331712914, "grad_norm": 0.0011457927757874131, "learning_rate": 8.091406279591207e-06, "loss": 46.0, "step": 1368 }, { "epoch": 0.027557192749378505, "grad_norm": 0.0013843755004927516, "learning_rate": 8.012185177632914e-06, "loss": 46.0, "step": 1369 }, { "epoch": 0.027577322181627867, "grad_norm": 0.0013142600655555725, "learning_rate": 7.933337616152747e-06, "loss": 46.0, "step": 1370 }, { "epoch": 0.027597451613877232, "grad_norm": 0.0014753196155652404, "learning_rate": 7.854863915331745e-06, "loss": 46.0, "step": 1371 }, { "epoch": 0.027617581046126594, "grad_norm": 0.0005986293544992805, "learning_rate": 7.776764393832825e-06, "loss": 46.0, "step": 1372 }, { "epoch": 0.027637710478375956, "grad_norm": 0.001749454764649272, "learning_rate": 7.69903936879941e-06, "loss": 46.0, "step": 1373 }, { "epoch": 0.02765783991062532, "grad_norm": 0.0005876508657820523, "learning_rate": 7.6216891558542395e-06, "loss": 46.0, "step": 1374 }, { "epoch": 0.027677969342874684, "grad_norm": 0.0010496970498934388, "learning_rate": 7.54471406909798e-06, "loss": 46.0, "step": 1375 }, { "epoch": 0.02769809877512405, "grad_norm": 0.0011588014895096421, "learning_rate": 7.468114421107997e-06, "loss": 46.0, "step": 1376 }, { "epoch": 0.02771822820737341, "grad_norm": 0.002383692190051079, "learning_rate": 7.391890522937139e-06, "loss": 46.0, "step": 1377 }, { "epoch": 0.027738357639622773, "grad_norm": 0.0012233968591317534, "learning_rate": 7.3160426841123676e-06, "loss": 46.0, "step": 1378 }, { "epoch": 0.02775848707187214, "grad_norm": 0.0014729154063388705, "learning_rate": 7.240571212633618e-06, "loss": 46.0, "step": 1379 }, { "epoch": 0.0277786165041215, "grad_norm": 0.0009423243463970721, "learning_rate": 7.165476414972416e-06, "loss": 46.0, "step": 1380 }, { "epoch": 0.027798745936370866, "grad_norm": 0.0012143378844484687, "learning_rate": 7.090758596070801e-06, "loss": 46.0, "step": 1381 }, { "epoch": 0.027818875368620228, "grad_norm": 0.0015902521554380655, "learning_rate": 7.016418059339879e-06, "loss": 46.0, "step": 1382 }, { "epoch": 0.02783900480086959, "grad_norm": 0.001080973306670785, "learning_rate": 6.942455106658785e-06, "loss": 46.0, "step": 1383 }, { "epoch": 0.027859134233118955, "grad_norm": 0.0013885076623409986, "learning_rate": 6.868870038373332e-06, "loss": 46.0, "step": 1384 }, { "epoch": 0.027879263665368317, "grad_norm": 0.0010777899296954274, "learning_rate": 6.795663153294896e-06, "loss": 46.0, "step": 1385 }, { "epoch": 0.027899393097617683, "grad_norm": 0.0009947263170033693, "learning_rate": 6.7228347486990365e-06, "loss": 46.0, "step": 1386 }, { "epoch": 0.027919522529867045, "grad_norm": 0.0010311356745660305, "learning_rate": 6.6503851203245205e-06, "loss": 46.0, "step": 1387 }, { "epoch": 0.02793965196211641, "grad_norm": 0.0013068821281194687, "learning_rate": 6.57831456237189e-06, "loss": 46.0, "step": 1388 }, { "epoch": 0.027959781394365772, "grad_norm": 0.0010487777180969715, "learning_rate": 6.506623367502418e-06, "loss": 46.0, "step": 1389 }, { "epoch": 0.027979910826615134, "grad_norm": 0.0008867370779626071, "learning_rate": 6.4353118268368986e-06, "loss": 46.0, "step": 1390 }, { "epoch": 0.0280000402588645, "grad_norm": 0.0019279102561995387, "learning_rate": 6.3643802299543696e-06, "loss": 46.0, "step": 1391 }, { "epoch": 0.02802016969111386, "grad_norm": 0.001558710471726954, "learning_rate": 6.293828864891105e-06, "loss": 46.0, "step": 1392 }, { "epoch": 0.028040299123363227, "grad_norm": 0.0016826376086100936, "learning_rate": 6.223658018139245e-06, "loss": 46.0, "step": 1393 }, { "epoch": 0.02806042855561259, "grad_norm": 0.0012350878678262234, "learning_rate": 6.153867974645833e-06, "loss": 46.0, "step": 1394 }, { "epoch": 0.02808055798786195, "grad_norm": 0.0007092293817549944, "learning_rate": 6.084459017811473e-06, "loss": 46.0, "step": 1395 }, { "epoch": 0.028100687420111316, "grad_norm": 0.0008784400415606797, "learning_rate": 6.015431429489371e-06, "loss": 46.0, "step": 1396 }, { "epoch": 0.02812081685236068, "grad_norm": 0.0008223768672905862, "learning_rate": 5.946785489983941e-06, "loss": 46.0, "step": 1397 }, { "epoch": 0.028140946284610044, "grad_norm": 0.0015569966053590178, "learning_rate": 5.87852147804997e-06, "loss": 46.0, "step": 1398 }, { "epoch": 0.028161075716859406, "grad_norm": 0.002495410619303584, "learning_rate": 5.810639670891216e-06, "loss": 46.0, "step": 1399 }, { "epoch": 0.028181205149108768, "grad_norm": 0.0011495595099404454, "learning_rate": 5.743140344159459e-06, "loss": 46.0, "step": 1400 }, { "epoch": 0.028201334581358133, "grad_norm": 0.0013125301338732243, "learning_rate": 5.676023771953265e-06, "loss": 46.0, "step": 1401 }, { "epoch": 0.028221464013607495, "grad_norm": 0.0009252792806364596, "learning_rate": 5.6092902268169986e-06, "loss": 46.0, "step": 1402 }, { "epoch": 0.02824159344585686, "grad_norm": 0.0010239135008305311, "learning_rate": 5.542939979739559e-06, "loss": 46.0, "step": 1403 }, { "epoch": 0.028261722878106223, "grad_norm": 0.0005345203098841012, "learning_rate": 5.47697330015341e-06, "loss": 46.0, "step": 1404 }, { "epoch": 0.028281852310355588, "grad_norm": 0.0007038481417112052, "learning_rate": 5.411390455933463e-06, "loss": 46.0, "step": 1405 }, { "epoch": 0.02830198174260495, "grad_norm": 0.0007712701335549355, "learning_rate": 5.346191713395888e-06, "loss": 46.0, "step": 1406 }, { "epoch": 0.028322111174854312, "grad_norm": 0.0020393729209899902, "learning_rate": 5.2813773372971995e-06, "loss": 46.0, "step": 1407 }, { "epoch": 0.028342240607103678, "grad_norm": 0.0008605642360635102, "learning_rate": 5.216947590833032e-06, "loss": 46.0, "step": 1408 }, { "epoch": 0.02836237003935304, "grad_norm": 0.0014623169554397464, "learning_rate": 5.152902735637166e-06, "loss": 46.0, "step": 1409 }, { "epoch": 0.028382499471602405, "grad_norm": 0.0009752597543410957, "learning_rate": 5.089243031780389e-06, "loss": 46.0, "step": 1410 }, { "epoch": 0.028402628903851767, "grad_norm": 0.0009389633196406066, "learning_rate": 5.025968737769548e-06, "loss": 46.0, "step": 1411 }, { "epoch": 0.02842275833610113, "grad_norm": 0.0007833832642063498, "learning_rate": 4.963080110546336e-06, "loss": 46.0, "step": 1412 }, { "epoch": 0.028442887768350494, "grad_norm": 0.0010727515909820795, "learning_rate": 4.90057740548645e-06, "loss": 46.0, "step": 1413 }, { "epoch": 0.028463017200599856, "grad_norm": 0.0007540370570495725, "learning_rate": 4.838460876398365e-06, "loss": 46.0, "step": 1414 }, { "epoch": 0.028483146632849222, "grad_norm": 0.0013027754612267017, "learning_rate": 4.776730775522464e-06, "loss": 46.0, "step": 1415 }, { "epoch": 0.028503276065098584, "grad_norm": 0.0007448008400388062, "learning_rate": 4.715387353529855e-06, "loss": 46.0, "step": 1416 }, { "epoch": 0.028523405497347946, "grad_norm": 0.0008857371867634356, "learning_rate": 4.654430859521519e-06, "loss": 46.0, "step": 1417 }, { "epoch": 0.02854353492959731, "grad_norm": 0.002097605960443616, "learning_rate": 4.593861541027155e-06, "loss": 46.0, "step": 1418 }, { "epoch": 0.028563664361846673, "grad_norm": 0.0014931777259334922, "learning_rate": 4.53367964400423e-06, "loss": 46.0, "step": 1419 }, { "epoch": 0.02858379379409604, "grad_norm": 0.001164833316579461, "learning_rate": 4.473885412837065e-06, "loss": 46.0, "step": 1420 }, { "epoch": 0.0286039232263454, "grad_norm": 0.0007305769831873477, "learning_rate": 4.414479090335644e-06, "loss": 46.0, "step": 1421 }, { "epoch": 0.028624052658594766, "grad_norm": 0.0013536449987441301, "learning_rate": 4.355460917734866e-06, "loss": 46.0, "step": 1422 }, { "epoch": 0.028644182090844128, "grad_norm": 0.0007573326583951712, "learning_rate": 4.296831134693358e-06, "loss": 46.0, "step": 1423 }, { "epoch": 0.02866431152309349, "grad_norm": 0.0009904114995151758, "learning_rate": 4.238589979292651e-06, "loss": 46.0, "step": 1424 }, { "epoch": 0.028684440955342855, "grad_norm": 0.0010607121512293816, "learning_rate": 4.180737688036096e-06, "loss": 46.0, "step": 1425 }, { "epoch": 0.028704570387592217, "grad_norm": 0.001276315306313336, "learning_rate": 4.1232744958479955e-06, "loss": 46.0, "step": 1426 }, { "epoch": 0.028724699819841583, "grad_norm": 0.0012834541266784072, "learning_rate": 4.066200636072604e-06, "loss": 46.0, "step": 1427 }, { "epoch": 0.028744829252090945, "grad_norm": 0.001088009332306683, "learning_rate": 4.0095163404732075e-06, "loss": 46.0, "step": 1428 }, { "epoch": 0.028764958684340307, "grad_norm": 0.0008485732250846922, "learning_rate": 3.953221839231125e-06, "loss": 46.0, "step": 1429 }, { "epoch": 0.028785088116589672, "grad_norm": 0.00294103124178946, "learning_rate": 3.897317360944874e-06, "loss": 46.0, "step": 1430 }, { "epoch": 0.028805217548839034, "grad_norm": 0.0009056427516043186, "learning_rate": 3.841803132629107e-06, "loss": 46.0, "step": 1431 }, { "epoch": 0.0288253469810884, "grad_norm": 0.001108710654079914, "learning_rate": 3.786679379713842e-06, "loss": 46.0, "step": 1432 }, { "epoch": 0.02884547641333776, "grad_norm": 0.0013181203976273537, "learning_rate": 3.731946326043423e-06, "loss": 46.0, "step": 1433 }, { "epoch": 0.028865605845587124, "grad_norm": 0.0015622148057445884, "learning_rate": 3.677604193875639e-06, "loss": 46.0, "step": 1434 }, { "epoch": 0.02888573527783649, "grad_norm": 0.001430216245353222, "learning_rate": 3.6236532038809167e-06, "loss": 46.0, "step": 1435 }, { "epoch": 0.02890586471008585, "grad_norm": 0.0011702035553753376, "learning_rate": 3.5700935751412644e-06, "loss": 46.0, "step": 1436 }, { "epoch": 0.028925994142335217, "grad_norm": 0.0011749324621632695, "learning_rate": 3.5169255251495283e-06, "loss": 46.0, "step": 1437 }, { "epoch": 0.02894612357458458, "grad_norm": 0.0011062580160796642, "learning_rate": 3.464149269808392e-06, "loss": 46.0, "step": 1438 }, { "epoch": 0.028966253006833944, "grad_norm": 0.001277309376746416, "learning_rate": 3.411765023429625e-06, "loss": 46.0, "step": 1439 }, { "epoch": 0.028986382439083306, "grad_norm": 0.002218514448031783, "learning_rate": 3.3597729987330796e-06, "loss": 46.0, "step": 1440 }, { "epoch": 0.029006511871332668, "grad_norm": 0.0007218050304800272, "learning_rate": 3.3081734068459045e-06, "loss": 46.0, "step": 1441 }, { "epoch": 0.029026641303582033, "grad_norm": 0.0006786247249692678, "learning_rate": 3.25696645730168e-06, "loss": 46.0, "step": 1442 }, { "epoch": 0.029046770735831395, "grad_norm": 0.0008958657272160053, "learning_rate": 3.2061523580395824e-06, "loss": 46.0, "step": 1443 }, { "epoch": 0.02906690016808076, "grad_norm": 0.0011994513915851712, "learning_rate": 3.155731315403465e-06, "loss": 46.0, "step": 1444 }, { "epoch": 0.029087029600330123, "grad_norm": 0.001030144514515996, "learning_rate": 3.1057035341411357e-06, "loss": 46.0, "step": 1445 }, { "epoch": 0.029107159032579485, "grad_norm": 0.0007214623037725687, "learning_rate": 3.056069217403401e-06, "loss": 46.0, "step": 1446 }, { "epoch": 0.02912728846482885, "grad_norm": 0.001022842712700367, "learning_rate": 3.006828566743358e-06, "loss": 46.0, "step": 1447 }, { "epoch": 0.029147417897078212, "grad_norm": 0.0007378848385997117, "learning_rate": 2.957981782115471e-06, "loss": 46.0, "step": 1448 }, { "epoch": 0.029167547329327578, "grad_norm": 0.001484018168412149, "learning_rate": 2.909529061874816e-06, "loss": 46.0, "step": 1449 }, { "epoch": 0.02918767676157694, "grad_norm": 0.0008505574078299105, "learning_rate": 2.861470602776317e-06, "loss": 46.0, "step": 1450 }, { "epoch": 0.0292078061938263, "grad_norm": 0.0009992974810302258, "learning_rate": 2.8138065999738337e-06, "loss": 46.0, "step": 1451 }, { "epoch": 0.029227935626075667, "grad_norm": 0.000925807689782232, "learning_rate": 2.766537247019485e-06, "loss": 46.0, "step": 1452 }, { "epoch": 0.02924806505832503, "grad_norm": 0.001936123939231038, "learning_rate": 2.7196627358627713e-06, "loss": 46.0, "step": 1453 }, { "epoch": 0.029268194490574394, "grad_norm": 0.0008978885016404092, "learning_rate": 2.673183256849876e-06, "loss": 46.0, "step": 1454 }, { "epoch": 0.029288323922823756, "grad_norm": 0.001972075318917632, "learning_rate": 2.627098998722799e-06, "loss": 46.0, "step": 1455 }, { "epoch": 0.029308453355073122, "grad_norm": 0.001669483259320259, "learning_rate": 2.58141014861869e-06, "loss": 46.0, "step": 1456 }, { "epoch": 0.029328582787322484, "grad_norm": 0.0009523354819975793, "learning_rate": 2.536116892069007e-06, "loss": 46.0, "step": 1457 }, { "epoch": 0.029348712219571846, "grad_norm": 0.0025416603311896324, "learning_rate": 2.4912194129988353e-06, "loss": 46.0, "step": 1458 }, { "epoch": 0.02936884165182121, "grad_norm": 0.0017256122082471848, "learning_rate": 2.4467178937260692e-06, "loss": 46.0, "step": 1459 }, { "epoch": 0.029388971084070573, "grad_norm": 0.0008214665576815605, "learning_rate": 2.4026125149607225e-06, "loss": 46.0, "step": 1460 }, { "epoch": 0.02940910051631994, "grad_norm": 0.0010338842403143644, "learning_rate": 2.3589034558041624e-06, "loss": 46.0, "step": 1461 }, { "epoch": 0.0294292299485693, "grad_norm": 0.001692043850198388, "learning_rate": 2.3155908937484093e-06, "loss": 46.0, "step": 1462 }, { "epoch": 0.029449359380818663, "grad_norm": 0.0017285742796957493, "learning_rate": 2.2726750046754175e-06, "loss": 46.0, "step": 1463 }, { "epoch": 0.029469488813068028, "grad_norm": 0.0008371649892069399, "learning_rate": 2.2301559628563062e-06, "loss": 46.0, "step": 1464 }, { "epoch": 0.02948961824531739, "grad_norm": 0.0009613548754714429, "learning_rate": 2.1880339409507288e-06, "loss": 46.0, "step": 1465 }, { "epoch": 0.029509747677566756, "grad_norm": 0.0014171084621921182, "learning_rate": 2.146309110006128e-06, "loss": 46.0, "step": 1466 }, { "epoch": 0.029529877109816118, "grad_norm": 0.0011761389905586839, "learning_rate": 2.1049816394570486e-06, "loss": 46.0, "step": 1467 }, { "epoch": 0.02955000654206548, "grad_norm": 0.0011704186908900738, "learning_rate": 2.064051697124425e-06, "loss": 46.0, "step": 1468 }, { "epoch": 0.029570135974314845, "grad_norm": 0.0009983095806092024, "learning_rate": 2.0235194492149832e-06, "loss": 46.0, "step": 1469 }, { "epoch": 0.029590265406564207, "grad_norm": 0.0015483457827940583, "learning_rate": 1.983385060320453e-06, "loss": 46.0, "step": 1470 }, { "epoch": 0.029610394838813572, "grad_norm": 0.0015588031383231282, "learning_rate": 1.943648693416966e-06, "loss": 46.0, "step": 1471 }, { "epoch": 0.029630524271062934, "grad_norm": 0.00174389174208045, "learning_rate": 1.9043105098643931e-06, "loss": 46.0, "step": 1472 }, { "epoch": 0.0296506537033123, "grad_norm": 0.001446812180802226, "learning_rate": 1.865370669405675e-06, "loss": 46.0, "step": 1473 }, { "epoch": 0.029670783135561662, "grad_norm": 0.000889250251930207, "learning_rate": 1.82682933016618e-06, "loss": 46.0, "step": 1474 }, { "epoch": 0.029690912567811024, "grad_norm": 0.0005816498887725174, "learning_rate": 1.7886866486530374e-06, "loss": 46.0, "step": 1475 }, { "epoch": 0.02971104200006039, "grad_norm": 0.0008788988925516605, "learning_rate": 1.7509427797545718e-06, "loss": 46.0, "step": 1476 }, { "epoch": 0.02973117143230975, "grad_norm": 0.0010405541397631168, "learning_rate": 1.7135978767395588e-06, "loss": 46.0, "step": 1477 }, { "epoch": 0.029751300864559117, "grad_norm": 0.0011400578077882528, "learning_rate": 1.676652091256714e-06, "loss": 46.0, "step": 1478 }, { "epoch": 0.02977143029680848, "grad_norm": 0.0015322790713980794, "learning_rate": 1.6401055733340164e-06, "loss": 46.0, "step": 1479 }, { "epoch": 0.02979155972905784, "grad_norm": 0.0007389390957541764, "learning_rate": 1.6039584713781308e-06, "loss": 46.0, "step": 1480 }, { "epoch": 0.029811689161307206, "grad_norm": 0.0011101323179900646, "learning_rate": 1.5682109321737637e-06, "loss": 46.0, "step": 1481 }, { "epoch": 0.029831818593556568, "grad_norm": 0.0012494990369305015, "learning_rate": 1.5328631008831197e-06, "loss": 46.0, "step": 1482 }, { "epoch": 0.029851948025805933, "grad_norm": 0.0019527755212038755, "learning_rate": 1.497915121045268e-06, "loss": 46.0, "step": 1483 }, { "epoch": 0.029872077458055295, "grad_norm": 0.0015624084044247866, "learning_rate": 1.4633671345755884e-06, "loss": 46.0, "step": 1484 }, { "epoch": 0.029892206890304657, "grad_norm": 0.0013834653655067086, "learning_rate": 1.4292192817651706e-06, "loss": 46.0, "step": 1485 }, { "epoch": 0.029912336322554023, "grad_norm": 0.0015040615107864141, "learning_rate": 1.3954717012802599e-06, "loss": 46.0, "step": 1486 }, { "epoch": 0.029932465754803385, "grad_norm": 0.001884009805507958, "learning_rate": 1.3621245301617014e-06, "loss": 46.0, "step": 1487 }, { "epoch": 0.02995259518705275, "grad_norm": 0.0003439519787207246, "learning_rate": 1.3291779038243745e-06, "loss": 46.0, "step": 1488 }, { "epoch": 0.029972724619302112, "grad_norm": 0.0009911386296153069, "learning_rate": 1.2966319560566264e-06, "loss": 46.0, "step": 1489 }, { "epoch": 0.029992854051551478, "grad_norm": 0.001606732839718461, "learning_rate": 1.2644868190197501e-06, "loss": 46.0, "step": 1490 }, { "epoch": 0.03001298348380084, "grad_norm": 0.0009569000103510916, "learning_rate": 1.2327426232474626e-06, "loss": 46.0, "step": 1491 }, { "epoch": 0.0300331129160502, "grad_norm": 0.002008062554523349, "learning_rate": 1.201399497645328e-06, "loss": 46.0, "step": 1492 }, { "epoch": 0.030053242348299567, "grad_norm": 0.0015580368926748633, "learning_rate": 1.1704575694902686e-06, "loss": 46.0, "step": 1493 }, { "epoch": 0.03007337178054893, "grad_norm": 0.0017130931373685598, "learning_rate": 1.1399169644300323e-06, "loss": 46.0, "step": 1494 }, { "epoch": 0.030093501212798295, "grad_norm": 0.0015125928912311792, "learning_rate": 1.1097778064827257e-06, "loss": 46.0, "step": 1495 }, { "epoch": 0.030113630645047657, "grad_norm": 0.0013810923555865884, "learning_rate": 1.0800402180362158e-06, "loss": 46.0, "step": 1496 }, { "epoch": 0.03013376007729702, "grad_norm": 0.0014871679013594985, "learning_rate": 1.0507043198477617e-06, "loss": 46.0, "step": 1497 }, { "epoch": 0.030153889509546384, "grad_norm": 0.0008668963564559817, "learning_rate": 1.0217702310433842e-06, "loss": 46.0, "step": 1498 }, { "epoch": 0.030174018941795746, "grad_norm": 0.00135446572676301, "learning_rate": 9.93238069117508e-07, "loss": 46.0, "step": 1499 }, { "epoch": 0.03019414837404511, "grad_norm": 0.0012777193915098906, "learning_rate": 9.651079499323978e-07, "loss": 46.0, "step": 1500 }, { "epoch": 0.030214277806294473, "grad_norm": 0.0014395661419257522, "learning_rate": 9.373799877177236e-07, "loss": 46.0, "step": 1501 }, { "epoch": 0.030234407238543835, "grad_norm": 0.0019440649775788188, "learning_rate": 9.100542950701063e-07, "loss": 46.0, "step": 1502 }, { "epoch": 0.0302545366707932, "grad_norm": 0.0007341218297369778, "learning_rate": 8.831309829526291e-07, "loss": 46.0, "step": 1503 }, { "epoch": 0.030274666103042563, "grad_norm": 0.0010124749969691038, "learning_rate": 8.566101606944266e-07, "loss": 46.0, "step": 1504 }, { "epoch": 0.030294795535291928, "grad_norm": 0.0011337018804624677, "learning_rate": 8.304919359901963e-07, "loss": 46.0, "step": 1505 }, { "epoch": 0.03031492496754129, "grad_norm": 0.0006525327335111797, "learning_rate": 8.047764148997883e-07, "loss": 46.0, "step": 1506 }, { "epoch": 0.030335054399790656, "grad_norm": 0.0014475996140390635, "learning_rate": 7.794637018477824e-07, "loss": 46.0, "step": 1507 }, { "epoch": 0.030355183832040018, "grad_norm": 0.0006679428042843938, "learning_rate": 7.545538996230228e-07, "loss": 46.0, "step": 1508 }, { "epoch": 0.03037531326428938, "grad_norm": 0.001389230601489544, "learning_rate": 7.300471093782624e-07, "loss": 46.0, "step": 1509 }, { "epoch": 0.030395442696538745, "grad_norm": 0.0013642680132761598, "learning_rate": 7.059434306297075e-07, "loss": 46.0, "step": 1510 }, { "epoch": 0.030415572128788107, "grad_norm": 0.000978952506557107, "learning_rate": 6.822429612566184e-07, "loss": 46.0, "step": 1511 }, { "epoch": 0.030435701561037472, "grad_norm": 0.0009029002394527197, "learning_rate": 6.589457975009205e-07, "loss": 46.0, "step": 1512 }, { "epoch": 0.030455830993286834, "grad_norm": 0.002438169904053211, "learning_rate": 6.360520339668163e-07, "loss": 46.0, "step": 1513 }, { "epoch": 0.030475960425536196, "grad_norm": 0.0020010732114315033, "learning_rate": 6.135617636204072e-07, "loss": 46.0, "step": 1514 }, { "epoch": 0.030496089857785562, "grad_norm": 0.0014994231751188636, "learning_rate": 5.91475077789272e-07, "loss": 46.0, "step": 1515 }, { "epoch": 0.030516219290034924, "grad_norm": 0.0007911003194749355, "learning_rate": 5.697920661621558e-07, "loss": 46.0, "step": 1516 }, { "epoch": 0.03053634872228429, "grad_norm": 0.0011950345942750573, "learning_rate": 5.485128167885933e-07, "loss": 46.0, "step": 1517 }, { "epoch": 0.03055647815453365, "grad_norm": 0.0012883899034932256, "learning_rate": 5.276374160784858e-07, "loss": 46.0, "step": 1518 }, { "epoch": 0.030576607586783013, "grad_norm": 0.002167344558984041, "learning_rate": 5.071659488018688e-07, "loss": 46.0, "step": 1519 }, { "epoch": 0.03059673701903238, "grad_norm": 0.0008158805430866778, "learning_rate": 4.870984980884341e-07, "loss": 46.0, "step": 1520 }, { "epoch": 0.03061686645128174, "grad_norm": 0.00043665210250765085, "learning_rate": 4.674351454273307e-07, "loss": 46.0, "step": 1521 }, { "epoch": 0.030636995883531106, "grad_norm": 0.0012343135895207524, "learning_rate": 4.481759706666755e-07, "loss": 46.0, "step": 1522 }, { "epoch": 0.030657125315780468, "grad_norm": 0.0007814933778718114, "learning_rate": 4.2932105201339835e-07, "loss": 46.0, "step": 1523 }, { "epoch": 0.030677254748029834, "grad_norm": 0.0008475257782265544, "learning_rate": 4.1087046603279777e-07, "loss": 46.0, "step": 1524 }, { "epoch": 0.030697384180279196, "grad_norm": 0.0010306923650205135, "learning_rate": 3.9282428764827463e-07, "loss": 46.0, "step": 1525 }, { "epoch": 0.030717513612528557, "grad_norm": 0.0033900176640599966, "learning_rate": 3.751825901410433e-07, "loss": 46.0, "step": 1526 }, { "epoch": 0.030737643044777923, "grad_norm": 0.0020439354702830315, "learning_rate": 3.579454451498099e-07, "loss": 46.0, "step": 1527 }, { "epoch": 0.030757772477027285, "grad_norm": 0.0013795166742056608, "learning_rate": 3.411129226704945e-07, "loss": 46.0, "step": 1528 }, { "epoch": 0.03077790190927665, "grad_norm": 0.0019550782162696123, "learning_rate": 3.246850910559318e-07, "loss": 46.0, "step": 1529 }, { "epoch": 0.030798031341526012, "grad_norm": 0.001795038697309792, "learning_rate": 3.0866201701560406e-07, "loss": 46.0, "step": 1530 }, { "epoch": 0.030818160773775374, "grad_norm": 0.0019376088166609406, "learning_rate": 2.9304376561539726e-07, "loss": 46.0, "step": 1531 }, { "epoch": 0.03083829020602474, "grad_norm": 0.0016611182363703847, "learning_rate": 2.7783040027726804e-07, "loss": 46.0, "step": 1532 }, { "epoch": 0.030858419638274102, "grad_norm": 0.0016700802370905876, "learning_rate": 2.630219827790659e-07, "loss": 46.0, "step": 1533 }, { "epoch": 0.030878549070523467, "grad_norm": 0.0005473219207488, "learning_rate": 2.4861857325421123e-07, "loss": 46.0, "step": 1534 }, { "epoch": 0.03089867850277283, "grad_norm": 0.0012385062873363495, "learning_rate": 2.346202301915068e-07, "loss": 46.0, "step": 1535 }, { "epoch": 0.03091880793502219, "grad_norm": 0.0020921400282531977, "learning_rate": 2.2102701043487105e-07, "loss": 46.0, "step": 1536 }, { "epoch": 0.030938937367271557, "grad_norm": 0.0009741144021973014, "learning_rate": 2.0783896918310508e-07, "loss": 46.0, "step": 1537 }, { "epoch": 0.03095906679952092, "grad_norm": 0.0017884114058688283, "learning_rate": 1.9505615998969274e-07, "loss": 46.0, "step": 1538 }, { "epoch": 0.030979196231770284, "grad_norm": 0.0012170026311650872, "learning_rate": 1.8267863476255643e-07, "loss": 46.0, "step": 1539 }, { "epoch": 0.030999325664019646, "grad_norm": 0.0015093215042725205, "learning_rate": 1.7070644376386835e-07, "loss": 46.0, "step": 1540 }, { "epoch": 0.03101945509626901, "grad_norm": 0.0010898308828473091, "learning_rate": 1.5913963560981738e-07, "loss": 46.0, "step": 1541 }, { "epoch": 0.031039584528518373, "grad_norm": 0.000764137483201921, "learning_rate": 1.4797825727044246e-07, "loss": 46.0, "step": 1542 }, { "epoch": 0.031059713960767735, "grad_norm": 0.0012036709813401103, "learning_rate": 1.3722235406943285e-07, "loss": 46.0, "step": 1543 }, { "epoch": 0.0310798433930171, "grad_norm": 0.0012527679791674018, "learning_rate": 1.2687196968392822e-07, "loss": 46.0, "step": 1544 }, { "epoch": 0.031099972825266463, "grad_norm": 0.0017057860968634486, "learning_rate": 1.1692714614436329e-07, "loss": 46.0, "step": 1545 }, { "epoch": 0.03112010225751583, "grad_norm": 0.0012995371362194419, "learning_rate": 1.0738792383427898e-07, "loss": 46.0, "step": 1546 }, { "epoch": 0.03114023168976519, "grad_norm": 0.0005156396073289216, "learning_rate": 9.82543414901782e-08, "loss": 46.0, "step": 1547 }, { "epoch": 0.031160361122014552, "grad_norm": 0.0023463049437850714, "learning_rate": 8.952643620134815e-08, "loss": 46.0, "step": 1548 }, { "epoch": 0.031180490554263918, "grad_norm": 0.0005802233936265111, "learning_rate": 8.1204243409716e-08, "loss": 46.0, "step": 1549 }, { "epoch": 0.03120061998651328, "grad_norm": 0.0014365284005180001, "learning_rate": 7.328779690972675e-08, "loss": 46.0, "step": 1550 }, { "epoch": 0.031220749418762645, "grad_norm": 0.0009548702510073781, "learning_rate": 6.577712884816566e-08, "loss": 46.0, "step": 1551 }, { "epoch": 0.031240878851012007, "grad_norm": 0.000912398740183562, "learning_rate": 5.867226972404716e-08, "loss": 46.0, "step": 1552 }, { "epoch": 0.03126100828326137, "grad_norm": 0.0013671774649992585, "learning_rate": 5.197324838851492e-08, "loss": 46.0, "step": 1553 }, { "epoch": 0.031281137715510735, "grad_norm": 0.001228424021974206, "learning_rate": 4.5680092044686486e-08, "loss": 46.0, "step": 1554 }, { "epoch": 0.031301267147760096, "grad_norm": 0.0009602979407645762, "learning_rate": 3.9792826247553315e-08, "loss": 46.0, "step": 1555 }, { "epoch": 0.03132139658000946, "grad_norm": 0.0012437499826774001, "learning_rate": 3.431147490390307e-08, "loss": 46.0, "step": 1556 }, { "epoch": 0.03134152601225883, "grad_norm": 0.0018415412632748485, "learning_rate": 2.9236060272186395e-08, "loss": 46.0, "step": 1557 }, { "epoch": 0.03136165544450819, "grad_norm": 0.0008638726430945098, "learning_rate": 2.4566602962450282e-08, "loss": 46.0, "step": 1558 }, { "epoch": 0.03138178487675755, "grad_norm": 0.001755962148308754, "learning_rate": 2.0303121936227077e-08, "loss": 46.0, "step": 1559 }, { "epoch": 0.03140191430900691, "grad_norm": 0.000949330220464617, "learning_rate": 1.6445634506512265e-08, "loss": 46.0, "step": 1560 }, { "epoch": 0.031422043741256275, "grad_norm": 0.000980414217337966, "learning_rate": 1.2994156337620134e-08, "loss": 46.0, "step": 1561 }, { "epoch": 0.031442173173505644, "grad_norm": 0.001756619312800467, "learning_rate": 9.948701445194885e-09, "loss": 46.0, "step": 1562 }, { "epoch": 0.031462302605755006, "grad_norm": 0.0016652131453156471, "learning_rate": 7.3092821960774046e-09, "loss": 46.0, "step": 1563 }, { "epoch": 0.03148243203800437, "grad_norm": 0.0008927117451094091, "learning_rate": 5.0759093083385665e-09, "loss": 46.0, "step": 1564 }, { "epoch": 0.03150256147025373, "grad_norm": 0.0012494467664510012, "learning_rate": 3.24859185114601e-09, "loss": 46.0, "step": 1565 }, { "epoch": 0.03152269090250309, "grad_norm": 0.0012330285971984267, "learning_rate": 1.8273372448307512e-09, "loss": 46.0, "step": 1566 }, { "epoch": 0.03154282033475246, "grad_norm": 0.002677972661331296, "learning_rate": 8.121512607317528e-10, "loss": 46.0, "step": 1567 }, { "epoch": 0.03156294976700182, "grad_norm": 0.0020114348735660315, "learning_rate": 2.0303802130694493e-10, "loss": 46.0, "step": 1568 }, { "epoch": 0.031583079199251185, "grad_norm": 0.0010916402097791433, "learning_rate": 0.0, "loss": 46.0, "step": 1569 } ], "logging_steps": 1, "max_steps": 1569, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 393, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 36155043692544.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }