Training in progress, step 500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +714 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9501ca0dc62f26a9e50b24460176b770e108566a2025f4ff8194bef05f53c830
 size 1101095848

 version https://git-lfs.github.com/spec/v1
+oid sha256:4025038b5fc072b67a253532a93f2c43ab40670d4217061e4a6daa59e30b3ccc
 size 1101095848

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b4db5723c2a10e0cf144300d8c0335c3fa0a77daa08ec346348b56bbb479ee55
 size 559894868

 version https://git-lfs.github.com/spec/v1
+oid sha256:afd0aa37c4e4e0262f4af92c8504635dc3e4072f65cf732e1505468f47f8918d
 size 559894868

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1b88f51ea4aca4967e21304a13c704d24a2fa7c4771693bc7f684dcd43cda380
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ca098dbeb5a213030d4a95169c8516a56537cd48b941391407ec14b935852cb6
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:505f9225762b105f8ca5168f44d99b2f8467174f4ade85f1cc95f684fbd828e0
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe1d153de177b356f9e3a70d6e4ec979560b0c300994e71ca4cb89afc74c5b3a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.8380851745605469,
-  "best_model_checkpoint": "miner_id_24/checkpoint-400",
-  "epoch": 0.5772005772005772,
   "eval_steps": 100,
-  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2847,6 +2847,714 @@
       "eval_samples_per_second": 7.437,
       "eval_steps_per_second": 1.861,
       "step": 400
     }
   ],
   "logging_steps": 1,
@@ -2870,12 +3578,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.1261877189429166e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.8335492610931396,
+  "best_model_checkpoint": "miner_id_24/checkpoint-500",
+  "epoch": 0.7215007215007215,
   "eval_steps": 100,
+  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 7.437,
       "eval_steps_per_second": 1.861,
       "step": 400
+    },
+    {
+      "epoch": 0.5786435786435786,
+      "grad_norm": 0.22474224865436554,
+      "learning_rate": 1.013396731136465e-05,
+      "loss": 1.7378,
+      "step": 401
+    },
+    {
+      "epoch": 0.5800865800865801,
+      "grad_norm": 0.20946919918060303,
+      "learning_rate": 9.937309365446973e-06,
+      "loss": 1.8411,
+      "step": 402
+    },
+    {
+      "epoch": 0.5815295815295816,
+      "grad_norm": 0.203902006149292,
+      "learning_rate": 9.742367571857091e-06,
+      "loss": 1.6986,
+      "step": 403
+    },
+    {
+      "epoch": 0.5829725829725829,
+      "grad_norm": 0.2520330548286438,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 1.7644,
+      "step": 404
+    },
+    {
+      "epoch": 0.5844155844155844,
+      "grad_norm": 0.23222126066684723,
+      "learning_rate": 9.357665770419244e-06,
+      "loss": 1.8559,
+      "step": 405
+    },
+    {
+      "epoch": 0.5858585858585859,
+      "grad_norm": 0.25231245160102844,
+      "learning_rate": 9.167922241916055e-06,
+      "loss": 1.9885,
+      "step": 406
+    },
+    {
+      "epoch": 0.5873015873015873,
+      "grad_norm": 0.24505528807640076,
+      "learning_rate": 8.97992782372432e-06,
+      "loss": 1.8296,
+      "step": 407
+    },
+    {
+      "epoch": 0.5887445887445888,
+      "grad_norm": 0.32213860750198364,
+      "learning_rate": 8.793690568899216e-06,
+      "loss": 1.928,
+      "step": 408
+    },
+    {
+      "epoch": 0.5901875901875901,
+      "grad_norm": 0.28556668758392334,
+      "learning_rate": 8.609218455224893e-06,
+      "loss": 1.9806,
+      "step": 409
+    },
+    {
+      "epoch": 0.5916305916305916,
+      "grad_norm": 0.28748056292533875,
+      "learning_rate": 8.426519384872733e-06,
+      "loss": 2.0535,
+      "step": 410
+    },
+    {
+      "epoch": 0.5930735930735931,
+      "grad_norm": 0.3097786605358124,
+      "learning_rate": 8.245601184062852e-06,
+      "loss": 1.8546,
+      "step": 411
+    },
+    {
+      "epoch": 0.5945165945165946,
+      "grad_norm": 0.34151002764701843,
+      "learning_rate": 8.066471602728803e-06,
+      "loss": 2.0569,
+      "step": 412
+    },
+    {
+      "epoch": 0.5959595959595959,
+      "grad_norm": 0.31291815638542175,
+      "learning_rate": 7.889138314185678e-06,
+      "loss": 1.7969,
+      "step": 413
+    },
+    {
+      "epoch": 0.5974025974025974,
+      "grad_norm": 0.3612555265426636,
+      "learning_rate": 7.71360891480134e-06,
+      "loss": 1.7247,
+      "step": 414
+    },
+    {
+      "epoch": 0.5988455988455988,
+      "grad_norm": 0.3746615946292877,
+      "learning_rate": 7.539890923671062e-06,
+      "loss": 1.9217,
+      "step": 415
+    },
+    {
+      "epoch": 0.6002886002886003,
+      "grad_norm": 0.4344548285007477,
+      "learning_rate": 7.367991782295391e-06,
+      "loss": 2.1042,
+      "step": 416
+    },
+    {
+      "epoch": 0.6017316017316018,
+      "grad_norm": 0.31650206446647644,
+      "learning_rate": 7.197918854261432e-06,
+      "loss": 1.814,
+      "step": 417
+    },
+    {
+      "epoch": 0.6031746031746031,
+      "grad_norm": 0.37703970074653625,
+      "learning_rate": 7.029679424927365e-06,
+      "loss": 1.9636,
+      "step": 418
+    },
+    {
+      "epoch": 0.6046176046176046,
+      "grad_norm": 0.37509685754776,
+      "learning_rate": 6.863280701110408e-06,
+      "loss": 1.7446,
+      "step": 419
+    },
+    {
+      "epoch": 0.6060606060606061,
+      "grad_norm": 0.33785712718963623,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 1.8171,
+      "step": 420
+    },
+    {
+      "epoch": 0.6075036075036075,
+      "grad_norm": 0.36930790543556213,
+      "learning_rate": 6.536033802742813e-06,
+      "loss": 1.9212,
+      "step": 421
+    },
+    {
+      "epoch": 0.6089466089466089,
+      "grad_norm": 0.41896510124206543,
+      "learning_rate": 6.375199646360142e-06,
+      "loss": 1.9301,
+      "step": 422
+    },
+    {
+      "epoch": 0.6103896103896104,
+      "grad_norm": 0.3452630639076233,
+      "learning_rate": 6.216234231230012e-06,
+      "loss": 2.0033,
+      "step": 423
+    },
+    {
+      "epoch": 0.6118326118326118,
+      "grad_norm": 0.4596862196922302,
+      "learning_rate": 6.059144366901736e-06,
+      "loss": 1.9092,
+      "step": 424
+    },
+    {
+      "epoch": 0.6132756132756133,
+      "grad_norm": 0.42444470524787903,
+      "learning_rate": 5.903936782582253e-06,
+      "loss": 1.9446,
+      "step": 425
+    },
+    {
+      "epoch": 0.6147186147186147,
+      "grad_norm": 0.45510318875312805,
+      "learning_rate": 5.750618126847912e-06,
+      "loss": 1.9616,
+      "step": 426
+    },
+    {
+      "epoch": 0.6161616161616161,
+      "grad_norm": 0.4246458113193512,
+      "learning_rate": 5.599194967359639e-06,
+      "loss": 2.0316,
+      "step": 427
+    },
+    {
+      "epoch": 0.6176046176046176,
+      "grad_norm": 0.43000173568725586,
+      "learning_rate": 5.449673790581611e-06,
+      "loss": 1.8851,
+      "step": 428
+    },
+    {
+      "epoch": 0.6190476190476191,
+      "grad_norm": 0.4248652160167694,
+      "learning_rate": 5.302061001503394e-06,
+      "loss": 2.0064,
+      "step": 429
+    },
+    {
+      "epoch": 0.6204906204906205,
+      "grad_norm": 0.4813087284564972,
+      "learning_rate": 5.156362923365588e-06,
+      "loss": 1.9639,
+      "step": 430
+    },
+    {
+      "epoch": 0.6219336219336219,
+      "grad_norm": 0.42517709732055664,
+      "learning_rate": 5.012585797388936e-06,
+      "loss": 1.9537,
+      "step": 431
+    },
+    {
+      "epoch": 0.6233766233766234,
+      "grad_norm": 0.413674920797348,
+      "learning_rate": 4.87073578250698e-06,
+      "loss": 1.911,
+      "step": 432
+    },
+    {
+      "epoch": 0.6248196248196248,
+      "grad_norm": 0.44890064001083374,
+      "learning_rate": 4.730818955102234e-06,
+      "loss": 1.8757,
+      "step": 433
+    },
+    {
+      "epoch": 0.6262626262626263,
+      "grad_norm": 0.5599820613861084,
+      "learning_rate": 4.592841308745932e-06,
+      "loss": 1.7325,
+      "step": 434
+    },
+    {
+      "epoch": 0.6277056277056277,
+      "grad_norm": 0.504962682723999,
+      "learning_rate": 4.456808753941205e-06,
+      "loss": 1.9556,
+      "step": 435
+    },
+    {
+      "epoch": 0.6291486291486291,
+      "grad_norm": 0.4306437373161316,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 1.8258,
+      "step": 436
+    },
+    {
+      "epoch": 0.6305916305916306,
+      "grad_norm": 0.481152206659317,
+      "learning_rate": 4.190602144143207e-06,
+      "loss": 1.9023,
+      "step": 437
+    },
+    {
+      "epoch": 0.6320346320346321,
+      "grad_norm": 0.43929627537727356,
+      "learning_rate": 4.06043949255509e-06,
+      "loss": 1.6293,
+      "step": 438
+    },
+    {
+      "epoch": 0.6334776334776335,
+      "grad_norm": 0.5508288145065308,
+      "learning_rate": 3.932244738840379e-06,
+      "loss": 1.8766,
+      "step": 439
+    },
+    {
+      "epoch": 0.6349206349206349,
+      "grad_norm": 0.503813624382019,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 1.7276,
+      "step": 440
+    },
+    {
+      "epoch": 0.6363636363636364,
+      "grad_norm": 0.5829843282699585,
+      "learning_rate": 3.681780806244095e-06,
+      "loss": 1.7256,
+      "step": 441
+    },
+    {
+      "epoch": 0.6378066378066378,
+      "grad_norm": 0.5285375714302063,
+      "learning_rate": 3.5595223564037884e-06,
+      "loss": 1.7845,
+      "step": 442
+    },
+    {
+      "epoch": 0.6392496392496393,
+      "grad_norm": 0.5721433758735657,
+      "learning_rate": 3.4392532620598216e-06,
+      "loss": 1.6747,
+      "step": 443
+    },
+    {
+      "epoch": 0.6406926406926406,
+      "grad_norm": 0.5801364183425903,
+      "learning_rate": 3.3209786751399187e-06,
+      "loss": 1.9831,
+      "step": 444
+    },
+    {
+      "epoch": 0.6421356421356421,
+      "grad_norm": 0.6420133113861084,
+      "learning_rate": 3.2047036621337236e-06,
+      "loss": 1.842,
+      "step": 445
+    },
+    {
+      "epoch": 0.6435786435786436,
+      "grad_norm": 0.657759428024292,
+      "learning_rate": 3.0904332038757977e-06,
+      "loss": 1.732,
+      "step": 446
+    },
+    {
+      "epoch": 0.645021645021645,
+      "grad_norm": 0.6824421882629395,
+      "learning_rate": 2.978172195332263e-06,
+      "loss": 1.5208,
+      "step": 447
+    },
+    {
+      "epoch": 0.6464646464646465,
+      "grad_norm": 0.7443853616714478,
+      "learning_rate": 2.8679254453910785e-06,
+      "loss": 1.7159,
+      "step": 448
+    },
+    {
+      "epoch": 0.6479076479076479,
+      "grad_norm": 0.8423395156860352,
+      "learning_rate": 2.759697676656098e-06,
+      "loss": 1.6525,
+      "step": 449
+    },
+    {
+      "epoch": 0.6493506493506493,
+      "grad_norm": 1.326777696609497,
+      "learning_rate": 2.653493525244721e-06,
+      "loss": 1.8648,
+      "step": 450
+    },
+    {
+      "epoch": 0.6507936507936508,
+      "grad_norm": 0.20779266953468323,
+      "learning_rate": 2.549317540589308e-06,
+      "loss": 1.6705,
+      "step": 451
+    },
+    {
+      "epoch": 0.6522366522366523,
+      "grad_norm": 0.21033185720443726,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 1.7687,
+      "step": 452
+    },
+    {
+      "epoch": 0.6536796536796536,
+      "grad_norm": 0.2421332150697708,
+      "learning_rate": 2.3470678346851518e-06,
+      "loss": 1.689,
+      "step": 453
+    },
+    {
+      "epoch": 0.6551226551226551,
+      "grad_norm": 0.26601821184158325,
+      "learning_rate": 2.2490027771406687e-06,
+      "loss": 1.8311,
+      "step": 454
+    },
+    {
+      "epoch": 0.6565656565656566,
+      "grad_norm": 0.2268337607383728,
+      "learning_rate": 2.152983213389559e-06,
+      "loss": 1.8122,
+      "step": 455
+    },
+    {
+      "epoch": 0.658008658008658,
+      "grad_norm": 0.27295178174972534,
+      "learning_rate": 2.0590132565903476e-06,
+      "loss": 1.8871,
+      "step": 456
+    },
+    {
+      "epoch": 0.6594516594516594,
+      "grad_norm": 0.25046512484550476,
+      "learning_rate": 1.9670969321032407e-06,
+      "loss": 1.9112,
+      "step": 457
+    },
+    {
+      "epoch": 0.6608946608946609,
+      "grad_norm": 0.2787816822528839,
+      "learning_rate": 1.8772381773176417e-06,
+      "loss": 1.8829,
+      "step": 458
+    },
+    {
+      "epoch": 0.6623376623376623,
+      "grad_norm": 0.28614646196365356,
+      "learning_rate": 1.7894408414835362e-06,
+      "loss": 1.9613,
+      "step": 459
+    },
+    {
+      "epoch": 0.6637806637806638,
+      "grad_norm": 0.29104024171829224,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 1.883,
+      "step": 460
+    },
+    {
+      "epoch": 0.6652236652236653,
+      "grad_norm": 0.3024556040763855,
+      "learning_rate": 1.620045381987012e-06,
+      "loss": 1.7852,
+      "step": 461
+    },
+    {
+      "epoch": 0.6666666666666666,
+      "grad_norm": 0.3180261552333832,
+      "learning_rate": 1.5384545146622852e-06,
+      "loss": 1.8743,
+      "step": 462
+    },
+    {
+      "epoch": 0.6681096681096681,
+      "grad_norm": 0.34793442487716675,
+      "learning_rate": 1.4589395786535953e-06,
+      "loss": 1.7692,
+      "step": 463
+    },
+    {
+      "epoch": 0.6695526695526696,
+      "grad_norm": 0.3252454698085785,
+      "learning_rate": 1.3815039801161721e-06,
+      "loss": 1.9941,
+      "step": 464
+    },
+    {
+      "epoch": 0.670995670995671,
+      "grad_norm": 0.38873234391212463,
+      "learning_rate": 1.3061510361333185e-06,
+      "loss": 1.8367,
+      "step": 465
+    },
+    {
+      "epoch": 0.6724386724386724,
+      "grad_norm": 0.3405764102935791,
+      "learning_rate": 1.232883974574367e-06,
+      "loss": 1.9568,
+      "step": 466
+    },
+    {
+      "epoch": 0.6738816738816739,
+      "grad_norm": 0.4072246253490448,
+      "learning_rate": 1.1617059339563807e-06,
+      "loss": 1.9118,
+      "step": 467
+    },
+    {
+      "epoch": 0.6753246753246753,
+      "grad_norm": 0.38311657309532166,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 1.9212,
+      "step": 468
+    },
+    {
+      "epoch": 0.6767676767676768,
+      "grad_norm": 0.372346431016922,
+      "learning_rate": 1.0256290220474307e-06,
+      "loss": 1.8625,
+      "step": 469
+    },
+    {
+      "epoch": 0.6782106782106783,
+      "grad_norm": 0.32480254769325256,
+      "learning_rate": 9.607359798384785e-07,
+      "loss": 2.0927,
+      "step": 470
+    },
+    {
+      "epoch": 0.6796536796536796,
+      "grad_norm": 0.349486380815506,
+      "learning_rate": 8.979436164848088e-07,
+      "loss": 1.8977,
+      "step": 471
+    },
+    {
+      "epoch": 0.6810966810966811,
+      "grad_norm": 0.5002724528312683,
+      "learning_rate": 8.372546218022747e-07,
+      "loss": 1.7969,
+      "step": 472
+    },
+    {
+      "epoch": 0.6825396825396826,
+      "grad_norm": 0.43671897053718567,
+      "learning_rate": 7.786715955054203e-07,
+      "loss": 2.0032,
+      "step": 473
+    },
+    {
+      "epoch": 0.683982683982684,
+      "grad_norm": 0.43542805314064026,
+      "learning_rate": 7.221970470961125e-07,
+      "loss": 1.9024,
+      "step": 474
+    },
+    {
+      "epoch": 0.6854256854256854,
+      "grad_norm": 0.4874102771282196,
+      "learning_rate": 6.678333957560512e-07,
+      "loss": 1.6566,
+      "step": 475
+    },
+    {
+      "epoch": 0.6868686868686869,
+      "grad_norm": 0.4170205891132355,
+      "learning_rate": 6.15582970243117e-07,
+      "loss": 1.8643,
+      "step": 476
+    },
+    {
+      "epoch": 0.6883116883116883,
+      "grad_norm": 0.4337291717529297,
+      "learning_rate": 5.654480087916303e-07,
+      "loss": 2.0587,
+      "step": 477
+    },
+    {
+      "epoch": 0.6897546897546898,
+      "grad_norm": 0.4315203130245209,
+      "learning_rate": 5.174306590164879e-07,
+      "loss": 2.0489,
+      "step": 478
+    },
+    {
+      "epoch": 0.6911976911976911,
+      "grad_norm": 0.43969807028770447,
+      "learning_rate": 4.715329778211375e-07,
+      "loss": 1.9063,
+      "step": 479
+    },
+    {
+      "epoch": 0.6926406926406926,
+      "grad_norm": 0.39184656739234924,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 1.7943,
+      "step": 480
+    },
+    {
+      "epoch": 0.6940836940836941,
+      "grad_norm": 0.43797311186790466,
+      "learning_rate": 3.8610439470164737e-07,
+      "loss": 1.9579,
+      "step": 481
+    },
+    {
+      "epoch": 0.6955266955266955,
+      "grad_norm": 0.45759716629981995,
+      "learning_rate": 3.465771522536854e-07,
+      "loss": 1.7868,
+      "step": 482
+    },
+    {
+      "epoch": 0.696969696969697,
+      "grad_norm": 0.530158281326294,
+      "learning_rate": 3.09176897181096e-07,
+      "loss": 1.9113,
+      "step": 483
+    },
+    {
+      "epoch": 0.6984126984126984,
+      "grad_norm": 0.4709303081035614,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 1.9702,
+      "step": 484
+    },
+    {
+      "epoch": 0.6998556998556998,
+      "grad_norm": 0.5338622331619263,
+      "learning_rate": 2.407636663901591e-07,
+      "loss": 1.8965,
+      "step": 485
+    },
+    {
+      "epoch": 0.7012987012987013,
+      "grad_norm": 0.48375779390335083,
+      "learning_rate": 2.0975362126691712e-07,
+      "loss": 1.6047,
+      "step": 486
+    },
+    {
+      "epoch": 0.7027417027417028,
+      "grad_norm": 0.4987216293811798,
+      "learning_rate": 1.8087642458373134e-07,
+      "loss": 1.8085,
+      "step": 487
+    },
+    {
+      "epoch": 0.7041847041847041,
+      "grad_norm": 0.5824238657951355,
+      "learning_rate": 1.5413331334360182e-07,
+      "loss": 1.6619,
+      "step": 488
+    },
+    {
+      "epoch": 0.7056277056277056,
+      "grad_norm": 0.5118747353553772,
+      "learning_rate": 1.2952543313240472e-07,
+      "loss": 1.7263,
+      "step": 489
+    },
+    {
+      "epoch": 0.7070707070707071,
+      "grad_norm": 0.6156334280967712,
+      "learning_rate": 1.0705383806982606e-07,
+      "loss": 1.8636,
+      "step": 490
+    },
+    {
+      "epoch": 0.7085137085137085,
+      "grad_norm": 0.587309718132019,
+      "learning_rate": 8.671949076420882e-08,
+      "loss": 1.6529,
+      "step": 491
+    },
+    {
+      "epoch": 0.70995670995671,
+      "grad_norm": 0.5975311398506165,
+      "learning_rate": 6.852326227130834e-08,
+      "loss": 1.875,
+      "step": 492
+    },
+    {
+      "epoch": 0.7113997113997114,
+      "grad_norm": 0.5370944142341614,
+      "learning_rate": 5.246593205699424e-08,
+      "loss": 1.8962,
+      "step": 493
+    },
+    {
+      "epoch": 0.7128427128427128,
+      "grad_norm": 0.6013877391815186,
+      "learning_rate": 3.8548187963854956e-08,
+      "loss": 1.9036,
+      "step": 494
+    },
+    {
+      "epoch": 0.7142857142857143,
+      "grad_norm": 0.6089028120040894,
+      "learning_rate": 2.6770626181715773e-08,
+      "loss": 1.7561,
+      "step": 495
+    },
+    {
+      "epoch": 0.7157287157287158,
+      "grad_norm": 0.6195142865180969,
+      "learning_rate": 1.7133751222137007e-08,
+      "loss": 1.8843,
+      "step": 496
+    },
+    {
+      "epoch": 0.7171717171717171,
+      "grad_norm": 0.7459947466850281,
+      "learning_rate": 9.637975896759077e-09,
+      "loss": 1.5594,
+      "step": 497
+    },
+    {
+      "epoch": 0.7186147186147186,
+      "grad_norm": 0.8303995132446289,
+      "learning_rate": 4.2836212996499865e-09,
+      "loss": 1.6858,
+      "step": 498
+    },
+    {
+      "epoch": 0.7200577200577201,
+      "grad_norm": 0.9504781365394592,
+      "learning_rate": 1.0709167935385455e-09,
+      "loss": 1.4528,
+      "step": 499
+    },
+    {
+      "epoch": 0.7215007215007215,
+      "grad_norm": 1.5718448162078857,
+      "learning_rate": 0.0,
+      "loss": 1.6021,
+      "step": 500
+    },
+    {
+      "epoch": 0.7215007215007215,
+      "eval_loss": 1.8335492610931396,
+      "eval_runtime": 156.8048,
+      "eval_samples_per_second": 7.442,
+      "eval_steps_per_second": 1.862,
+      "step": 500
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.4073840298371318e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null