Training in progress, step 500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +714 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5588eecd5f769c52f3cac67754e46118c439ef12bf4d2f87a1b696bd31da1343
 size 295488936

 version https://git-lfs.github.com/spec/v1
+oid sha256:cc28f05c0d1a72cded4da44d22c5e1fd9c3915082481859ab9ec771b64423971
 size 295488936

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a8f7e8822cc8390b87737761643cf4e0181e15a8a99cd2d9b1b21a2898e6b42c
 size 150487412

 version https://git-lfs.github.com/spec/v1
+oid sha256:5baf7e3c77cdbe23608dd8b54540056a02d99afe2fd7e5d577c4f448b6ddbe00
 size 150487412

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:68d3930dbe611394fff50a68e382c9b5d534f18b66320a76dedcdfda36d41cdb
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ab7c329c15f2bf24d1a6f79ee2f9a91d61a52ff5abf0aa0411faa38c088aae79
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:505f9225762b105f8ca5168f44d99b2f8467174f4ade85f1cc95f684fbd828e0
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe1d153de177b356f9e3a70d6e4ec979560b0c300994e71ca4cb89afc74c5b3a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.8149161338806152,
-  "best_model_checkpoint": "miner_id_24/checkpoint-400",
-  "epoch": 0.02359882005899705,
   "eval_steps": 100,
-  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2847,6 +2847,714 @@
       "eval_samples_per_second": 34.265,
       "eval_steps_per_second": 8.566,
       "step": 400
     }
   ],
   "logging_steps": 1,
@@ -2870,12 +3578,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.08857892274176e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.8056918382644653,
+  "best_model_checkpoint": "miner_id_24/checkpoint-500",
+  "epoch": 0.029498525073746312,
   "eval_steps": 100,
+  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 34.265,
       "eval_steps_per_second": 8.566,
       "step": 400
+    },
+    {
+      "epoch": 0.023657817109144542,
+      "grad_norm": 0.9209845662117004,
+      "learning_rate": 1.013396731136465e-05,
+      "loss": 2.1722,
+      "step": 401
+    },
+    {
+      "epoch": 0.023716814159292034,
+      "grad_norm": 1.1392157077789307,
+      "learning_rate": 9.937309365446973e-06,
+      "loss": 2.1656,
+      "step": 402
+    },
+    {
+      "epoch": 0.02377581120943953,
+      "grad_norm": 1.0543746948242188,
+      "learning_rate": 9.742367571857091e-06,
+      "loss": 2.0298,
+      "step": 403
+    },
+    {
+      "epoch": 0.02383480825958702,
+      "grad_norm": 1.2574312686920166,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 2.3652,
+      "step": 404
+    },
+    {
+      "epoch": 0.023893805309734513,
+      "grad_norm": 1.1854058504104614,
+      "learning_rate": 9.357665770419244e-06,
+      "loss": 2.0376,
+      "step": 405
+    },
+    {
+      "epoch": 0.023952802359882005,
+      "grad_norm": 1.2661393880844116,
+      "learning_rate": 9.167922241916055e-06,
+      "loss": 2.018,
+      "step": 406
+    },
+    {
+      "epoch": 0.024011799410029497,
+      "grad_norm": 1.4815001487731934,
+      "learning_rate": 8.97992782372432e-06,
+      "loss": 1.9833,
+      "step": 407
+    },
+    {
+      "epoch": 0.024070796460176992,
+      "grad_norm": 1.2577320337295532,
+      "learning_rate": 8.793690568899216e-06,
+      "loss": 1.8977,
+      "step": 408
+    },
+    {
+      "epoch": 0.024129793510324484,
+      "grad_norm": 1.3636155128479004,
+      "learning_rate": 8.609218455224893e-06,
+      "loss": 1.9632,
+      "step": 409
+    },
+    {
+      "epoch": 0.024188790560471976,
+      "grad_norm": 1.4056997299194336,
+      "learning_rate": 8.426519384872733e-06,
+      "loss": 2.0869,
+      "step": 410
+    },
+    {
+      "epoch": 0.024247787610619468,
+      "grad_norm": 1.3155086040496826,
+      "learning_rate": 8.245601184062852e-06,
+      "loss": 1.798,
+      "step": 411
+    },
+    {
+      "epoch": 0.02430678466076696,
+      "grad_norm": 1.3223779201507568,
+      "learning_rate": 8.066471602728803e-06,
+      "loss": 1.9267,
+      "step": 412
+    },
+    {
+      "epoch": 0.024365781710914455,
+      "grad_norm": 1.2776771783828735,
+      "learning_rate": 7.889138314185678e-06,
+      "loss": 2.007,
+      "step": 413
+    },
+    {
+      "epoch": 0.024424778761061947,
+      "grad_norm": 1.4912304878234863,
+      "learning_rate": 7.71360891480134e-06,
+      "loss": 1.8495,
+      "step": 414
+    },
+    {
+      "epoch": 0.02448377581120944,
+      "grad_norm": 1.433796763420105,
+      "learning_rate": 7.539890923671062e-06,
+      "loss": 1.7718,
+      "step": 415
+    },
+    {
+      "epoch": 0.02454277286135693,
+      "grad_norm": 1.4976009130477905,
+      "learning_rate": 7.367991782295391e-06,
+      "loss": 1.9716,
+      "step": 416
+    },
+    {
+      "epoch": 0.024601769911504423,
+      "grad_norm": 1.6325736045837402,
+      "learning_rate": 7.197918854261432e-06,
+      "loss": 1.8368,
+      "step": 417
+    },
+    {
+      "epoch": 0.02466076696165192,
+      "grad_norm": 1.4927877187728882,
+      "learning_rate": 7.029679424927365e-06,
+      "loss": 1.9722,
+      "step": 418
+    },
+    {
+      "epoch": 0.02471976401179941,
+      "grad_norm": 1.4721519947052002,
+      "learning_rate": 6.863280701110408e-06,
+      "loss": 1.8419,
+      "step": 419
+    },
+    {
+      "epoch": 0.024778761061946902,
+      "grad_norm": 1.5104364156723022,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 1.74,
+      "step": 420
+    },
+    {
+      "epoch": 0.024837758112094394,
+      "grad_norm": 1.548914909362793,
+      "learning_rate": 6.536033802742813e-06,
+      "loss": 1.9283,
+      "step": 421
+    },
+    {
+      "epoch": 0.024896755162241886,
+      "grad_norm": 1.8828091621398926,
+      "learning_rate": 6.375199646360142e-06,
+      "loss": 1.5957,
+      "step": 422
+    },
+    {
+      "epoch": 0.02495575221238938,
+      "grad_norm": 1.548220157623291,
+      "learning_rate": 6.216234231230012e-06,
+      "loss": 1.8291,
+      "step": 423
+    },
+    {
+      "epoch": 0.025014749262536874,
+      "grad_norm": 1.3501148223876953,
+      "learning_rate": 6.059144366901736e-06,
+      "loss": 1.4787,
+      "step": 424
+    },
+    {
+      "epoch": 0.025073746312684365,
+      "grad_norm": 1.7426213026046753,
+      "learning_rate": 5.903936782582253e-06,
+      "loss": 1.9869,
+      "step": 425
+    },
+    {
+      "epoch": 0.025132743362831857,
+      "grad_norm": 1.5662567615509033,
+      "learning_rate": 5.750618126847912e-06,
+      "loss": 1.8013,
+      "step": 426
+    },
+    {
+      "epoch": 0.025191740412979353,
+      "grad_norm": 2.0955779552459717,
+      "learning_rate": 5.599194967359639e-06,
+      "loss": 1.7219,
+      "step": 427
+    },
+    {
+      "epoch": 0.025250737463126845,
+      "grad_norm": 1.7120258808135986,
+      "learning_rate": 5.449673790581611e-06,
+      "loss": 1.6197,
+      "step": 428
+    },
+    {
+      "epoch": 0.025309734513274337,
+      "grad_norm": 1.6571422815322876,
+      "learning_rate": 5.302061001503394e-06,
+      "loss": 1.6886,
+      "step": 429
+    },
+    {
+      "epoch": 0.02536873156342183,
+      "grad_norm": 1.5991970300674438,
+      "learning_rate": 5.156362923365588e-06,
+      "loss": 1.7477,
+      "step": 430
+    },
+    {
+      "epoch": 0.02542772861356932,
+      "grad_norm": 1.7963250875473022,
+      "learning_rate": 5.012585797388936e-06,
+      "loss": 1.695,
+      "step": 431
+    },
+    {
+      "epoch": 0.025486725663716816,
+      "grad_norm": 1.865119457244873,
+      "learning_rate": 4.87073578250698e-06,
+      "loss": 1.9642,
+      "step": 432
+    },
+    {
+      "epoch": 0.025545722713864308,
+      "grad_norm": 1.6518311500549316,
+      "learning_rate": 4.730818955102234e-06,
+      "loss": 1.6379,
+      "step": 433
+    },
+    {
+      "epoch": 0.0256047197640118,
+      "grad_norm": 1.5884329080581665,
+      "learning_rate": 4.592841308745932e-06,
+      "loss": 1.6334,
+      "step": 434
+    },
+    {
+      "epoch": 0.02566371681415929,
+      "grad_norm": 1.5620722770690918,
+      "learning_rate": 4.456808753941205e-06,
+      "loss": 1.6801,
+      "step": 435
+    },
+    {
+      "epoch": 0.025722713864306784,
+      "grad_norm": 1.9980131387710571,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 1.7142,
+      "step": 436
+    },
+    {
+      "epoch": 0.02578171091445428,
+      "grad_norm": 1.7783011198043823,
+      "learning_rate": 4.190602144143207e-06,
+      "loss": 1.676,
+      "step": 437
+    },
+    {
+      "epoch": 0.02584070796460177,
+      "grad_norm": 1.5452146530151367,
+      "learning_rate": 4.06043949255509e-06,
+      "loss": 1.5975,
+      "step": 438
+    },
+    {
+      "epoch": 0.025899705014749263,
+      "grad_norm": 9.242511749267578,
+      "learning_rate": 3.932244738840379e-06,
+      "loss": 1.7141,
+      "step": 439
+    },
+    {
+      "epoch": 0.025958702064896755,
+      "grad_norm": 1.899537444114685,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 1.442,
+      "step": 440
+    },
+    {
+      "epoch": 0.026017699115044247,
+      "grad_norm": 2.1592230796813965,
+      "learning_rate": 3.681780806244095e-06,
+      "loss": 1.678,
+      "step": 441
+    },
+    {
+      "epoch": 0.026076696165191742,
+      "grad_norm": 2.1297240257263184,
+      "learning_rate": 3.5595223564037884e-06,
+      "loss": 1.7924,
+      "step": 442
+    },
+    {
+      "epoch": 0.026135693215339234,
+      "grad_norm": 1.8116285800933838,
+      "learning_rate": 3.4392532620598216e-06,
+      "loss": 1.9063,
+      "step": 443
+    },
+    {
+      "epoch": 0.026194690265486726,
+      "grad_norm": 3.00453782081604,
+      "learning_rate": 3.3209786751399187e-06,
+      "loss": 1.7412,
+      "step": 444
+    },
+    {
+      "epoch": 0.026253687315634218,
+      "grad_norm": 1.8771898746490479,
+      "learning_rate": 3.2047036621337236e-06,
+      "loss": 1.6172,
+      "step": 445
+    },
+    {
+      "epoch": 0.02631268436578171,
+      "grad_norm": 2.164557695388794,
+      "learning_rate": 3.0904332038757977e-06,
+      "loss": 1.6289,
+      "step": 446
+    },
+    {
+      "epoch": 0.026371681415929205,
+      "grad_norm": 2.2664170265197754,
+      "learning_rate": 2.978172195332263e-06,
+      "loss": 1.5912,
+      "step": 447
+    },
+    {
+      "epoch": 0.026430678466076697,
+      "grad_norm": 2.546576976776123,
+      "learning_rate": 2.8679254453910785e-06,
+      "loss": 1.4073,
+      "step": 448
+    },
+    {
+      "epoch": 0.02648967551622419,
+      "grad_norm": 2.776029586791992,
+      "learning_rate": 2.759697676656098e-06,
+      "loss": 1.9999,
+      "step": 449
+    },
+    {
+      "epoch": 0.02654867256637168,
+      "grad_norm": 3.360868215560913,
+      "learning_rate": 2.653493525244721e-06,
+      "loss": 1.877,
+      "step": 450
+    },
+    {
+      "epoch": 0.026607669616519173,
+      "grad_norm": 0.9646655917167664,
+      "learning_rate": 2.549317540589308e-06,
+      "loss": 2.3042,
+      "step": 451
+    },
+    {
+      "epoch": 0.02666666666666667,
+      "grad_norm": 1.0232704877853394,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 2.0728,
+      "step": 452
+    },
+    {
+      "epoch": 0.02672566371681416,
+      "grad_norm": 1.1683145761489868,
+      "learning_rate": 2.3470678346851518e-06,
+      "loss": 2.2397,
+      "step": 453
+    },
+    {
+      "epoch": 0.026784660766961652,
+      "grad_norm": 1.223487377166748,
+      "learning_rate": 2.2490027771406687e-06,
+      "loss": 2.4104,
+      "step": 454
+    },
+    {
+      "epoch": 0.026843657817109144,
+      "grad_norm": 1.2041693925857544,
+      "learning_rate": 2.152983213389559e-06,
+      "loss": 2.1414,
+      "step": 455
+    },
+    {
+      "epoch": 0.026902654867256636,
+      "grad_norm": 1.2345439195632935,
+      "learning_rate": 2.0590132565903476e-06,
+      "loss": 2.0854,
+      "step": 456
+    },
+    {
+      "epoch": 0.02696165191740413,
+      "grad_norm": 1.2510029077529907,
+      "learning_rate": 1.9670969321032407e-06,
+      "loss": 2.0812,
+      "step": 457
+    },
+    {
+      "epoch": 0.027020648967551623,
+      "grad_norm": 1.3654087781906128,
+      "learning_rate": 1.8772381773176417e-06,
+      "loss": 2.0686,
+      "step": 458
+    },
+    {
+      "epoch": 0.027079646017699115,
+      "grad_norm": 1.1924909353256226,
+      "learning_rate": 1.7894408414835362e-06,
+      "loss": 1.8719,
+      "step": 459
+    },
+    {
+      "epoch": 0.027138643067846607,
+      "grad_norm": 1.2497165203094482,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 1.974,
+      "step": 460
+    },
+    {
+      "epoch": 0.0271976401179941,
+      "grad_norm": 1.3291590213775635,
+      "learning_rate": 1.620045381987012e-06,
+      "loss": 2.0367,
+      "step": 461
+    },
+    {
+      "epoch": 0.027256637168141595,
+      "grad_norm": 1.3052842617034912,
+      "learning_rate": 1.5384545146622852e-06,
+      "loss": 1.7768,
+      "step": 462
+    },
+    {
+      "epoch": 0.027315634218289087,
+      "grad_norm": 1.3033207654953003,
+      "learning_rate": 1.4589395786535953e-06,
+      "loss": 1.5886,
+      "step": 463
+    },
+    {
+      "epoch": 0.02737463126843658,
+      "grad_norm": 1.3222646713256836,
+      "learning_rate": 1.3815039801161721e-06,
+      "loss": 1.8032,
+      "step": 464
+    },
+    {
+      "epoch": 0.02743362831858407,
+      "grad_norm": 1.3655725717544556,
+      "learning_rate": 1.3061510361333185e-06,
+      "loss": 1.743,
+      "step": 465
+    },
+    {
+      "epoch": 0.027492625368731562,
+      "grad_norm": 1.355893850326538,
+      "learning_rate": 1.232883974574367e-06,
+      "loss": 1.8282,
+      "step": 466
+    },
+    {
+      "epoch": 0.027551622418879058,
+      "grad_norm": 1.4596573114395142,
+      "learning_rate": 1.1617059339563807e-06,
+      "loss": 1.8079,
+      "step": 467
+    },
+    {
+      "epoch": 0.02761061946902655,
+      "grad_norm": 1.344362497329712,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 1.8753,
+      "step": 468
+    },
+    {
+      "epoch": 0.02766961651917404,
+      "grad_norm": 1.6230723857879639,
+      "learning_rate": 1.0256290220474307e-06,
+      "loss": 1.8656,
+      "step": 469
+    },
+    {
+      "epoch": 0.027728613569321534,
+      "grad_norm": 1.4650503396987915,
+      "learning_rate": 9.607359798384785e-07,
+      "loss": 1.6753,
+      "step": 470
+    },
+    {
+      "epoch": 0.027787610619469025,
+      "grad_norm": 11.219602584838867,
+      "learning_rate": 8.979436164848088e-07,
+      "loss": 1.7268,
+      "step": 471
+    },
+    {
+      "epoch": 0.02784660766961652,
+      "grad_norm": 1.4899492263793945,
+      "learning_rate": 8.372546218022747e-07,
+      "loss": 1.9279,
+      "step": 472
+    },
+    {
+      "epoch": 0.027905604719764013,
+      "grad_norm": 1.4614150524139404,
+      "learning_rate": 7.786715955054203e-07,
+      "loss": 1.6597,
+      "step": 473
+    },
+    {
+      "epoch": 0.027964601769911505,
+      "grad_norm": 1.3974477052688599,
+      "learning_rate": 7.221970470961125e-07,
+      "loss": 1.5856,
+      "step": 474
+    },
+    {
+      "epoch": 0.028023598820058997,
+      "grad_norm": 1.4127671718597412,
+      "learning_rate": 6.678333957560512e-07,
+      "loss": 1.7202,
+      "step": 475
+    },
+    {
+      "epoch": 0.02808259587020649,
+      "grad_norm": 1.4668078422546387,
+      "learning_rate": 6.15582970243117e-07,
+      "loss": 1.8222,
+      "step": 476
+    },
+    {
+      "epoch": 0.028141592920353984,
+      "grad_norm": 1.4601526260375977,
+      "learning_rate": 5.654480087916303e-07,
+      "loss": 1.7513,
+      "step": 477
+    },
+    {
+      "epoch": 0.028200589970501476,
+      "grad_norm": 1.5982226133346558,
+      "learning_rate": 5.174306590164879e-07,
+      "loss": 1.7809,
+      "step": 478
+    },
+    {
+      "epoch": 0.028259587020648968,
+      "grad_norm": 1.6426746845245361,
+      "learning_rate": 4.715329778211375e-07,
+      "loss": 1.6782,
+      "step": 479
+    },
+    {
+      "epoch": 0.02831858407079646,
+      "grad_norm": 1.628758430480957,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 1.8743,
+      "step": 480
+    },
+    {
+      "epoch": 0.02837758112094395,
+      "grad_norm": 3.4329941272735596,
+      "learning_rate": 3.8610439470164737e-07,
+      "loss": 1.9262,
+      "step": 481
+    },
+    {
+      "epoch": 0.028436578171091447,
+      "grad_norm": 2.058459758758545,
+      "learning_rate": 3.465771522536854e-07,
+      "loss": 1.723,
+      "step": 482
+    },
+    {
+      "epoch": 0.02849557522123894,
+      "grad_norm": 1.820152759552002,
+      "learning_rate": 3.09176897181096e-07,
+      "loss": 1.9772,
+      "step": 483
+    },
+    {
+      "epoch": 0.02855457227138643,
+      "grad_norm": 1.711161494255066,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 1.4173,
+      "step": 484
+    },
+    {
+      "epoch": 0.028613569321533923,
+      "grad_norm": 1.9426226615905762,
+      "learning_rate": 2.407636663901591e-07,
+      "loss": 1.8578,
+      "step": 485
+    },
+    {
+      "epoch": 0.028672566371681415,
+      "grad_norm": 1.735135555267334,
+      "learning_rate": 2.0975362126691712e-07,
+      "loss": 1.6647,
+      "step": 486
+    },
+    {
+      "epoch": 0.02873156342182891,
+      "grad_norm": 1.8284307718276978,
+      "learning_rate": 1.8087642458373134e-07,
+      "loss": 1.4557,
+      "step": 487
+    },
+    {
+      "epoch": 0.028790560471976402,
+      "grad_norm": 1.755139946937561,
+      "learning_rate": 1.5413331334360182e-07,
+      "loss": 1.5418,
+      "step": 488
+    },
+    {
+      "epoch": 0.028849557522123894,
+      "grad_norm": 1.9075005054473877,
+      "learning_rate": 1.2952543313240472e-07,
+      "loss": 1.6149,
+      "step": 489
+    },
+    {
+      "epoch": 0.028908554572271386,
+      "grad_norm": 1.7076497077941895,
+      "learning_rate": 1.0705383806982606e-07,
+      "loss": 1.7904,
+      "step": 490
+    },
+    {
+      "epoch": 0.028967551622418878,
+      "grad_norm": 1.776504397392273,
+      "learning_rate": 8.671949076420882e-08,
+      "loss": 1.6125,
+      "step": 491
+    },
+    {
+      "epoch": 0.029026548672566373,
+      "grad_norm": 1.845313549041748,
+      "learning_rate": 6.852326227130834e-08,
+      "loss": 1.7453,
+      "step": 492
+    },
+    {
+      "epoch": 0.029085545722713865,
+      "grad_norm": 2.104560613632202,
+      "learning_rate": 5.246593205699424e-08,
+      "loss": 1.831,
+      "step": 493
+    },
+    {
+      "epoch": 0.029144542772861357,
+      "grad_norm": 2.0944643020629883,
+      "learning_rate": 3.8548187963854956e-08,
+      "loss": 1.8021,
+      "step": 494
+    },
+    {
+      "epoch": 0.02920353982300885,
+      "grad_norm": 2.2614376544952393,
+      "learning_rate": 2.6770626181715773e-08,
+      "loss": 1.726,
+      "step": 495
+    },
+    {
+      "epoch": 0.02926253687315634,
+      "grad_norm": 2.8416197299957275,
+      "learning_rate": 1.7133751222137007e-08,
+      "loss": 1.861,
+      "step": 496
+    },
+    {
+      "epoch": 0.029321533923303836,
+      "grad_norm": 2.1933250427246094,
+      "learning_rate": 9.637975896759077e-09,
+      "loss": 1.7771,
+      "step": 497
+    },
+    {
+      "epoch": 0.02938053097345133,
+      "grad_norm": 2.6819872856140137,
+      "learning_rate": 4.2836212996499865e-09,
+      "loss": 1.9063,
+      "step": 498
+    },
+    {
+      "epoch": 0.02943952802359882,
+      "grad_norm": 2.429462194442749,
+      "learning_rate": 1.0709167935385455e-09,
+      "loss": 1.7014,
+      "step": 499
+    },
+    {
+      "epoch": 0.029498525073746312,
+      "grad_norm": 4.0091729164123535,
+      "learning_rate": 0.0,
+      "loss": 1.9165,
+      "step": 500
+    },
+    {
+      "epoch": 0.029498525073746312,
+      "eval_loss": 1.8056918382644653,
+      "eval_runtime": 833.0398,
+      "eval_samples_per_second": 34.27,
+      "eval_steps_per_second": 8.567,
+      "step": 500
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.3607236534272e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null