Training in progress, step 400, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +713 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b2d83d5f901eae11145cacd89c72231a010785bff7439a1ba5f4a5e94b8052b4
 size 161533192

 version https://git-lfs.github.com/spec/v1
+oid sha256:068b3b1bc314a43d0eee3fa3201a994ea1f37fea65c47f3391566c7f615e4324
 size 161533192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1daa0c2c790a56b21b4c6c41ad6a3c0d997b0d94c85b459b9bd582601d994cca
 size 82461044

 version https://git-lfs.github.com/spec/v1
+oid sha256:4dba59ffe60af080b3920ea163a5a588299c3bb8bd5e6e66a912a79abcc7ef34
 size 82461044

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:89f940dbe392540bac79082367808c07a597a4f60dff698b2c1ee1c27e1aabed
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ba9c3a034fe91f3e6fd2e0635e959f990d8cb75b401553f86500be7643d0a95f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fd8039035e9f22cc9e9230f66e4f1f1db2add3119ff8edfe450ef63eb16e5439
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:9b230d39ff18e054306fe88dd158c885b4e4aab2378a582b64a99349a20eb587
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.700640082359314,
-  "best_model_checkpoint": "miner_id_24/checkpoint-300",
-  "epoch": 0.11954274898513187,
   "eval_steps": 100,
-  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2139,6 +2139,714 @@
       "eval_samples_per_second": 14.018,
       "eval_steps_per_second": 3.505,
       "step": 300
     }
   ],
   "logging_steps": 1,
@@ -2167,7 +2875,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 8.388464319922176e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.6927062273025513,
+  "best_model_checkpoint": "miner_id_24/checkpoint-400",
+  "epoch": 0.15939033198017583,
   "eval_steps": 100,
+  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 14.018,
       "eval_steps_per_second": 3.505,
       "step": 300
+    },
+    {
+      "epoch": 0.1199412248150823,
+      "grad_norm": 0.5043081045150757,
+      "learning_rate": 5.020767189299369e-05,
+      "loss": 1.6154,
+      "step": 301
+    },
+    {
+      "epoch": 0.12033970064503274,
+      "grad_norm": 0.5781189203262329,
+      "learning_rate": 4.9585518766315496e-05,
+      "loss": 1.7757,
+      "step": 302
+    },
+    {
+      "epoch": 0.1207381764749832,
+      "grad_norm": 0.5455333590507507,
+      "learning_rate": 4.896597113499479e-05,
+      "loss": 1.5694,
+      "step": 303
+    },
+    {
+      "epoch": 0.12113665230493363,
+      "grad_norm": 0.5005789399147034,
+      "learning_rate": 4.834906101817438e-05,
+      "loss": 1.716,
+      "step": 304
+    },
+    {
+      "epoch": 0.12153512813488407,
+      "grad_norm": 0.5123348832130432,
+      "learning_rate": 4.773482029868657e-05,
+      "loss": 1.7465,
+      "step": 305
+    },
+    {
+      "epoch": 0.1219336039648345,
+      "grad_norm": 0.5470724105834961,
+      "learning_rate": 4.712328072140505e-05,
+      "loss": 1.7012,
+      "step": 306
+    },
+    {
+      "epoch": 0.12233207979478494,
+      "grad_norm": 0.47325006127357483,
+      "learning_rate": 4.651447389160458e-05,
+      "loss": 1.6349,
+      "step": 307
+    },
+    {
+      "epoch": 0.1227305556247354,
+      "grad_norm": 0.4630093276500702,
+      "learning_rate": 4.5908431273327436e-05,
+      "loss": 1.5126,
+      "step": 308
+    },
+    {
+      "epoch": 0.12312903145468583,
+      "grad_norm": 0.5252417922019958,
+      "learning_rate": 4.530518418775733e-05,
+      "loss": 1.7035,
+      "step": 309
+    },
+    {
+      "epoch": 0.12352750728463627,
+      "grad_norm": 0.5543851256370544,
+      "learning_rate": 4.470476381160065e-05,
+      "loss": 1.9395,
+      "step": 310
+    },
+    {
+      "epoch": 0.1239259831145867,
+      "grad_norm": 0.4824533462524414,
+      "learning_rate": 4.4107201175475275e-05,
+      "loss": 1.5935,
+      "step": 311
+    },
+    {
+      "epoch": 0.12432445894453714,
+      "grad_norm": 0.5243806838989258,
+      "learning_rate": 4.351252716230685e-05,
+      "loss": 1.7277,
+      "step": 312
+    },
+    {
+      "epoch": 0.12472293477448758,
+      "grad_norm": 0.501171886920929,
+      "learning_rate": 4.292077250573266e-05,
+      "loss": 1.594,
+      "step": 313
+    },
+    {
+      "epoch": 0.12512141060443802,
+      "grad_norm": 0.47692808508872986,
+      "learning_rate": 4.2331967788513295e-05,
+      "loss": 1.4772,
+      "step": 314
+    },
+    {
+      "epoch": 0.12551988643438847,
+      "grad_norm": 0.5134193897247314,
+      "learning_rate": 4.174614344095213e-05,
+      "loss": 1.643,
+      "step": 315
+    },
+    {
+      "epoch": 0.1259183622643389,
+      "grad_norm": 0.5090487003326416,
+      "learning_rate": 4.116332973932256e-05,
+      "loss": 1.6696,
+      "step": 316
+    },
+    {
+      "epoch": 0.12631683809428934,
+      "grad_norm": 0.5596434473991394,
+      "learning_rate": 4.058355680430337e-05,
+      "loss": 1.4942,
+      "step": 317
+    },
+    {
+      "epoch": 0.1267153139242398,
+      "grad_norm": 0.5060478448867798,
+      "learning_rate": 4.0006854599421926e-05,
+      "loss": 1.7277,
+      "step": 318
+    },
+    {
+      "epoch": 0.12711378975419022,
+      "grad_norm": 0.5043231248855591,
+      "learning_rate": 3.943325292950579e-05,
+      "loss": 1.653,
+      "step": 319
+    },
+    {
+      "epoch": 0.12751226558414067,
+      "grad_norm": 0.49735555052757263,
+      "learning_rate": 3.886278143914219e-05,
+      "loss": 1.6637,
+      "step": 320
+    },
+    {
+      "epoch": 0.1279107414140911,
+      "grad_norm": 0.5129334926605225,
+      "learning_rate": 3.829546961114607e-05,
+      "loss": 1.7365,
+      "step": 321
+    },
+    {
+      "epoch": 0.12830921724404154,
+      "grad_norm": 0.5209783911705017,
+      "learning_rate": 3.773134676503629e-05,
+      "loss": 1.7903,
+      "step": 322
+    },
+    {
+      "epoch": 0.128707693073992,
+      "grad_norm": 0.5349249243736267,
+      "learning_rate": 3.7170442055520415e-05,
+      "loss": 1.7308,
+      "step": 323
+    },
+    {
+      "epoch": 0.12910616890394241,
+      "grad_norm": 0.5156399011611938,
+      "learning_rate": 3.661278447098789e-05,
+      "loss": 1.5822,
+      "step": 324
+    },
+    {
+      "epoch": 0.12950464473389287,
+      "grad_norm": 0.5426967144012451,
+      "learning_rate": 3.605840283201195e-05,
+      "loss": 1.6398,
+      "step": 325
+    },
+    {
+      "epoch": 0.1299031205638433,
+      "grad_norm": 0.5003894567489624,
+      "learning_rate": 3.550732578986006e-05,
+      "loss": 1.745,
+      "step": 326
+    },
+    {
+      "epoch": 0.13030159639379374,
+      "grad_norm": 0.5355332493782043,
+      "learning_rate": 3.495958182501325e-05,
+      "loss": 1.6628,
+      "step": 327
+    },
+    {
+      "epoch": 0.1307000722237442,
+      "grad_norm": 0.5347525477409363,
+      "learning_rate": 3.441519924569408e-05,
+      "loss": 1.7257,
+      "step": 328
+    },
+    {
+      "epoch": 0.1310985480536946,
+      "grad_norm": 0.5936457514762878,
+      "learning_rate": 3.387420618640379e-05,
+      "loss": 1.7605,
+      "step": 329
+    },
+    {
+      "epoch": 0.13149702388364506,
+      "grad_norm": 0.4799719750881195,
+      "learning_rate": 3.3336630606468134e-05,
+      "loss": 1.5599,
+      "step": 330
+    },
+    {
+      "epoch": 0.1318954997135955,
+      "grad_norm": 0.534975528717041,
+      "learning_rate": 3.280250028859248e-05,
+      "loss": 1.741,
+      "step": 331
+    },
+    {
+      "epoch": 0.13229397554354594,
+      "grad_norm": 0.5082612037658691,
+      "learning_rate": 3.227184283742591e-05,
+      "loss": 1.6139,
+      "step": 332
+    },
+    {
+      "epoch": 0.1326924513734964,
+      "grad_norm": 0.5057691335678101,
+      "learning_rate": 3.174468567813461e-05,
+      "loss": 1.7092,
+      "step": 333
+    },
+    {
+      "epoch": 0.1330909272034468,
+      "grad_norm": 0.4517490863800049,
+      "learning_rate": 3.122105605498442e-05,
+      "loss": 1.5781,
+      "step": 334
+    },
+    {
+      "epoch": 0.13348940303339726,
+      "grad_norm": 0.4567318558692932,
+      "learning_rate": 3.070098102993302e-05,
+      "loss": 1.5871,
+      "step": 335
+    },
+    {
+      "epoch": 0.1338878788633477,
+      "grad_norm": 0.5688018202781677,
+      "learning_rate": 3.018448748123097e-05,
+      "loss": 1.8721,
+      "step": 336
+    },
+    {
+      "epoch": 0.13428635469329814,
+      "grad_norm": 0.5304664969444275,
+      "learning_rate": 2.9671602102032926e-05,
+      "loss": 1.7813,
+      "step": 337
+    },
+    {
+      "epoch": 0.13468483052324856,
+      "grad_norm": 0.4832111597061157,
+      "learning_rate": 2.9162351399017963e-05,
+      "loss": 1.6609,
+      "step": 338
+    },
+    {
+      "epoch": 0.135083306353199,
+      "grad_norm": 0.5615909695625305,
+      "learning_rate": 2.8656761691019673e-05,
+      "loss": 1.7713,
+      "step": 339
+    },
+    {
+      "epoch": 0.13548178218314946,
+      "grad_norm": 0.4976556599140167,
+      "learning_rate": 2.8154859107665987e-05,
+      "loss": 1.6278,
+      "step": 340
+    },
+    {
+      "epoch": 0.13588025801309989,
+      "grad_norm": 0.48554837703704834,
+      "learning_rate": 2.7656669588028762e-05,
+      "loss": 1.6038,
+      "step": 341
+    },
+    {
+      "epoch": 0.13627873384305034,
+      "grad_norm": 0.50529545545578,
+      "learning_rate": 2.7162218879283176e-05,
+      "loss": 1.515,
+      "step": 342
+    },
+    {
+      "epoch": 0.13667720967300076,
+      "grad_norm": 0.5264208912849426,
+      "learning_rate": 2.667153253537713e-05,
+      "loss": 1.7788,
+      "step": 343
+    },
+    {
+      "epoch": 0.1370756855029512,
+      "grad_norm": 0.5166341066360474,
+      "learning_rate": 2.618463591571052e-05,
+      "loss": 1.7624,
+      "step": 344
+    },
+    {
+      "epoch": 0.13747416133290166,
+      "grad_norm": 0.4670686721801758,
+      "learning_rate": 2.570155418382473e-05,
+      "loss": 1.5216,
+      "step": 345
+    },
+    {
+      "epoch": 0.13787263716285209,
+      "grad_norm": 0.5010607242584229,
+      "learning_rate": 2.5222312306101925e-05,
+      "loss": 1.6297,
+      "step": 346
+    },
+    {
+      "epoch": 0.13827111299280254,
+      "grad_norm": 0.44925105571746826,
+      "learning_rate": 2.474693505047504e-05,
+      "loss": 1.4302,
+      "step": 347
+    },
+    {
+      "epoch": 0.13866958882275296,
+      "grad_norm": 0.44039008021354675,
+      "learning_rate": 2.427544698514753e-05,
+      "loss": 1.4163,
+      "step": 348
+    },
+    {
+      "epoch": 0.1390680646527034,
+      "grad_norm": 0.5106916427612305,
+      "learning_rate": 2.3807872477323733e-05,
+      "loss": 1.5566,
+      "step": 349
+    },
+    {
+      "epoch": 0.13946654048265386,
+      "grad_norm": 0.5118552446365356,
+      "learning_rate": 2.334423569194948e-05,
+      "loss": 1.6767,
+      "step": 350
+    },
+    {
+      "epoch": 0.13986501631260428,
+      "grad_norm": 0.5088701248168945,
+      "learning_rate": 2.288456059046331e-05,
+      "loss": 1.6389,
+      "step": 351
+    },
+    {
+      "epoch": 0.14026349214255474,
+      "grad_norm": 0.5484685301780701,
+      "learning_rate": 2.242887092955801e-05,
+      "loss": 1.6978,
+      "step": 352
+    },
+    {
+      "epoch": 0.14066196797250516,
+      "grad_norm": 0.5057936906814575,
+      "learning_rate": 2.1977190259952883e-05,
+      "loss": 1.7238,
+      "step": 353
+    },
+    {
+      "epoch": 0.1410604438024556,
+      "grad_norm": 0.4977273643016815,
+      "learning_rate": 2.1529541925176555e-05,
+      "loss": 1.589,
+      "step": 354
+    },
+    {
+      "epoch": 0.14145891963240606,
+      "grad_norm": 0.556425929069519,
+      "learning_rate": 2.1085949060360654e-05,
+      "loss": 1.784,
+      "step": 355
+    },
+    {
+      "epoch": 0.14185739546235648,
+      "grad_norm": 0.48254185914993286,
+      "learning_rate": 2.064643459104405e-05,
+      "loss": 1.5242,
+      "step": 356
+    },
+    {
+      "epoch": 0.14225587129230693,
+      "grad_norm": 0.5121050477027893,
+      "learning_rate": 2.0211021231988102e-05,
+      "loss": 1.6523,
+      "step": 357
+    },
+    {
+      "epoch": 0.14265434712225736,
+      "grad_norm": 0.5412747263908386,
+      "learning_rate": 1.977973148600266e-05,
+      "loss": 1.7816,
+      "step": 358
+    },
+    {
+      "epoch": 0.1430528229522078,
+      "grad_norm": 0.5190417170524597,
+      "learning_rate": 1.935258764278314e-05,
+      "loss": 1.7215,
+      "step": 359
+    },
+    {
+      "epoch": 0.14345129878215823,
+      "grad_norm": 0.5047377347946167,
+      "learning_rate": 1.8929611777758526e-05,
+      "loss": 1.6061,
+      "step": 360
+    },
+    {
+      "epoch": 0.14384977461210868,
+      "grad_norm": 0.5179762840270996,
+      "learning_rate": 1.851082575095051e-05,
+      "loss": 1.6054,
+      "step": 361
+    },
+    {
+      "epoch": 0.14424825044205913,
+      "grad_norm": 0.533320963382721,
+      "learning_rate": 1.8096251205843684e-05,
+      "loss": 1.6372,
+      "step": 362
+    },
+    {
+      "epoch": 0.14464672627200956,
+      "grad_norm": 0.4938521981239319,
+      "learning_rate": 1.7685909568267033e-05,
+      "loss": 1.7578,
+      "step": 363
+    },
+    {
+      "epoch": 0.14504520210196,
+      "grad_norm": 0.5236971974372864,
+      "learning_rate": 1.7279822045286576e-05,
+      "loss": 1.7821,
+      "step": 364
+    },
+    {
+      "epoch": 0.14544367793191043,
+      "grad_norm": 0.548584520816803,
+      "learning_rate": 1.6878009624109313e-05,
+      "loss": 1.7914,
+      "step": 365
+    },
+    {
+      "epoch": 0.14584215376186088,
+      "grad_norm": 0.5472350716590881,
+      "learning_rate": 1.648049307099874e-05,
+      "loss": 1.5642,
+      "step": 366
+    },
+    {
+      "epoch": 0.14624062959181133,
+      "grad_norm": 0.5605772733688354,
+      "learning_rate": 1.6087292930201394e-05,
+      "loss": 1.7474,
+      "step": 367
+    },
+    {
+      "epoch": 0.14663910542176176,
+      "grad_norm": 0.47889217734336853,
+      "learning_rate": 1.569842952288527e-05,
+      "loss": 1.5833,
+      "step": 368
+    },
+    {
+      "epoch": 0.1470375812517122,
+      "grad_norm": 0.5094882845878601,
+      "learning_rate": 1.5313922946089486e-05,
+      "loss": 1.702,
+      "step": 369
+    },
+    {
+      "epoch": 0.14743605708166263,
+      "grad_norm": 0.48179590702056885,
+      "learning_rate": 1.4933793071685732e-05,
+      "loss": 1.5017,
+      "step": 370
+    },
+    {
+      "epoch": 0.14783453291161308,
+      "grad_norm": 0.5010417699813843,
+      "learning_rate": 1.4558059545351143e-05,
+      "loss": 1.5445,
+      "step": 371
+    },
+    {
+      "epoch": 0.14823300874156353,
+      "grad_norm": 0.43870919942855835,
+      "learning_rate": 1.4186741785553115e-05,
+      "loss": 1.5428,
+      "step": 372
+    },
+    {
+      "epoch": 0.14863148457151396,
+      "grad_norm": 0.4662686288356781,
+      "learning_rate": 1.3819858982545598e-05,
+      "loss": 1.4941,
+      "step": 373
+    },
+    {
+      "epoch": 0.1490299604014644,
+      "grad_norm": 0.5589818358421326,
+      "learning_rate": 1.3457430097377421e-05,
+      "loss": 1.7253,
+      "step": 374
+    },
+    {
+      "epoch": 0.14942843623141483,
+      "grad_norm": 0.5113766193389893,
+      "learning_rate": 1.3099473860912326e-05,
+      "loss": 1.5904,
+      "step": 375
+    },
+    {
+      "epoch": 0.14982691206136528,
+      "grad_norm": 0.5116039514541626,
+      "learning_rate": 1.2746008772860884e-05,
+      "loss": 1.6287,
+      "step": 376
+    },
+    {
+      "epoch": 0.1502253878913157,
+      "grad_norm": 0.5835950970649719,
+      "learning_rate": 1.2397053100824463e-05,
+      "loss": 1.4924,
+      "step": 377
+    },
+    {
+      "epoch": 0.15062386372126615,
+      "grad_norm": 0.5390010476112366,
+      "learning_rate": 1.2052624879351104e-05,
+      "loss": 1.5715,
+      "step": 378
+    },
+    {
+      "epoch": 0.1510223395512166,
+      "grad_norm": 0.5362573266029358,
+      "learning_rate": 1.1712741909003444e-05,
+      "loss": 1.7802,
+      "step": 379
+    },
+    {
+      "epoch": 0.15142081538116703,
+      "grad_norm": 0.49706023931503296,
+      "learning_rate": 1.1377421755438832e-05,
+      "loss": 1.7128,
+      "step": 380
+    },
+    {
+      "epoch": 0.15181929121111748,
+      "grad_norm": 0.5436707735061646,
+      "learning_rate": 1.1046681748501408e-05,
+      "loss": 1.7381,
+      "step": 381
+    },
+    {
+      "epoch": 0.1522177670410679,
+      "grad_norm": 0.51026850938797,
+      "learning_rate": 1.0720538981326556e-05,
+      "loss": 1.6813,
+      "step": 382
+    },
+    {
+      "epoch": 0.15261624287101835,
+      "grad_norm": 0.5093562006950378,
+      "learning_rate": 1.0399010309457457e-05,
+      "loss": 1.6643,
+      "step": 383
+    },
+    {
+      "epoch": 0.1530147187009688,
+      "grad_norm": 0.5157011151313782,
+      "learning_rate": 1.0082112349974016e-05,
+      "loss": 1.7036,
+      "step": 384
+    },
+    {
+      "epoch": 0.15341319453091923,
+      "grad_norm": 0.46138855814933777,
+      "learning_rate": 9.76986148063398e-06,
+      "loss": 1.3899,
+      "step": 385
+    },
+    {
+      "epoch": 0.15381167036086968,
+      "grad_norm": 0.48433151841163635,
+      "learning_rate": 9.462273839026624e-06,
+      "loss": 1.5909,
+      "step": 386
+    },
+    {
+      "epoch": 0.1542101461908201,
+      "grad_norm": 0.5252178907394409,
+      "learning_rate": 9.159365321738655e-06,
+      "loss": 1.7776,
+      "step": 387
+    },
+    {
+      "epoch": 0.15460862202077055,
+      "grad_norm": 0.5377585291862488,
+      "learning_rate": 8.861151583532656e-06,
+      "loss": 1.7147,
+      "step": 388
+    },
+    {
+      "epoch": 0.155007097850721,
+      "grad_norm": 0.5604181885719299,
+      "learning_rate": 8.56764803653809e-06,
+      "loss": 1.8522,
+      "step": 389
+    },
+    {
+      "epoch": 0.15540557368067143,
+      "grad_norm": 0.5364322662353516,
+      "learning_rate": 8.278869849454718e-06,
+      "loss": 1.6899,
+      "step": 390
+    },
+    {
+      "epoch": 0.15580404951062188,
+      "grad_norm": 0.4611172676086426,
+      "learning_rate": 7.994831946768622e-06,
+      "loss": 1.5186,
+      "step": 391
+    },
+    {
+      "epoch": 0.1562025253405723,
+      "grad_norm": 0.5083454251289368,
+      "learning_rate": 7.715549007981027e-06,
+      "loss": 1.6604,
+      "step": 392
+    },
+    {
+      "epoch": 0.15660100117052275,
+      "grad_norm": 0.5075681209564209,
+      "learning_rate": 7.441035466849489e-06,
+      "loss": 1.5289,
+      "step": 393
+    },
+    {
+      "epoch": 0.1569994770004732,
+      "grad_norm": 0.5153861045837402,
+      "learning_rate": 7.171305510642023e-06,
+      "loss": 1.6241,
+      "step": 394
+    },
+    {
+      "epoch": 0.15739795283042363,
+      "grad_norm": 0.5271784663200378,
+      "learning_rate": 6.906373079403849e-06,
+      "loss": 1.7079,
+      "step": 395
+    },
+    {
+      "epoch": 0.15779642866037408,
+      "grad_norm": 0.49165791273117065,
+      "learning_rate": 6.646251865236997e-06,
+      "loss": 1.7268,
+      "step": 396
+    },
+    {
+      "epoch": 0.1581949044903245,
+      "grad_norm": 0.5324728488922119,
+      "learning_rate": 6.390955311592617e-06,
+      "loss": 1.6794,
+      "step": 397
+    },
+    {
+      "epoch": 0.15859338032027495,
+      "grad_norm": 0.5144858956336975,
+      "learning_rate": 6.140496612576241e-06,
+      "loss": 1.7149,
+      "step": 398
+    },
+    {
+      "epoch": 0.15899185615022537,
+      "grad_norm": 0.5114040374755859,
+      "learning_rate": 5.8948887122658335e-06,
+      "loss": 1.561,
+      "step": 399
+    },
+    {
+      "epoch": 0.15939033198017583,
+      "grad_norm": 0.5673094391822815,
+      "learning_rate": 5.65414430404293e-06,
+      "loss": 1.873,
+      "step": 400
+    },
+    {
+      "epoch": 0.15939033198017583,
+      "eval_loss": 1.6927062273025513,
+      "eval_runtime": 603.1583,
+      "eval_samples_per_second": 14.016,
+      "eval_steps_per_second": 3.505,
+      "step": 400
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.1184619093229568e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null