Training in progress, step 500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +714 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fd7ec7ed8621950c4f4ce3df1a465257096ff664ab6591a01a76f40a81de5452
 size 138995824

 version https://git-lfs.github.com/spec/v1
+oid sha256:91a92f0d5fa53563c8e3cb59e75f4f4a435459031430247e4e1068cf29e87b03
 size 138995824

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:acabe7c12083d0e3e16bf3d884a9c9e8c0bbb0afb6d20443b3ad6b4d1ecda373
 size 71078228

 version https://git-lfs.github.com/spec/v1
+oid sha256:9aa2fe4f4524584b283769f498f2c0b482df9fbe9667610a56eb1c754f549414
 size 71078228

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dbe0fb4c7c173c17f1ec9c17f771c84df701a98081e25c9b53f8fced8c3bbca6
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:eb8ed34c03405748b8b867d86edd4e42a95d2cdebd3d6b517ab6062bc6a8540d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:505f9225762b105f8ca5168f44d99b2f8467174f4ade85f1cc95f684fbd828e0
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe1d153de177b356f9e3a70d6e4ec979560b0c300994e71ca4cb89afc74c5b3a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.6988234519958496,
-  "best_model_checkpoint": "miner_id_24/checkpoint-400",
-  "epoch": 0.13499831252109348,
   "eval_steps": 100,
-  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2847,6 +2847,714 @@
       "eval_samples_per_second": 60.213,
       "eval_steps_per_second": 15.059,
       "step": 400
     }
   ],
   "logging_steps": 1,
@@ -2870,12 +3578,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.7475518160896e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.692722201347351,
+  "best_model_checkpoint": "miner_id_24/checkpoint-500",
+  "epoch": 0.16874789065136686,
   "eval_steps": 100,
+  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 60.213,
       "eval_steps_per_second": 15.059,
       "step": 400
+    },
+    {
+      "epoch": 0.13533580830239622,
+      "grad_norm": 0.3129079043865204,
+      "learning_rate": 1.013396731136465e-05,
+      "loss": 0.9035,
+      "step": 401
+    },
+    {
+      "epoch": 0.13567330408369896,
+      "grad_norm": 0.37935182452201843,
+      "learning_rate": 9.937309365446973e-06,
+      "loss": 1.0468,
+      "step": 402
+    },
+    {
+      "epoch": 0.1360107998650017,
+      "grad_norm": 0.4280663728713989,
+      "learning_rate": 9.742367571857091e-06,
+      "loss": 1.1142,
+      "step": 403
+    },
+    {
+      "epoch": 0.13634829564630443,
+      "grad_norm": 0.47628381848335266,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 1.2826,
+      "step": 404
+    },
+    {
+      "epoch": 0.13668579142760715,
+      "grad_norm": 0.44870856404304504,
+      "learning_rate": 9.357665770419244e-06,
+      "loss": 1.2352,
+      "step": 405
+    },
+    {
+      "epoch": 0.13702328720890988,
+      "grad_norm": 0.45660126209259033,
+      "learning_rate": 9.167922241916055e-06,
+      "loss": 1.4218,
+      "step": 406
+    },
+    {
+      "epoch": 0.13736078299021262,
+      "grad_norm": 0.4797450304031372,
+      "learning_rate": 8.97992782372432e-06,
+      "loss": 1.5542,
+      "step": 407
+    },
+    {
+      "epoch": 0.13769827877151536,
+      "grad_norm": 0.5301920175552368,
+      "learning_rate": 8.793690568899216e-06,
+      "loss": 1.4398,
+      "step": 408
+    },
+    {
+      "epoch": 0.1380357745528181,
+      "grad_norm": 0.5419598817825317,
+      "learning_rate": 8.609218455224893e-06,
+      "loss": 1.5041,
+      "step": 409
+    },
+    {
+      "epoch": 0.13837327033412083,
+      "grad_norm": 0.5022268891334534,
+      "learning_rate": 8.426519384872733e-06,
+      "loss": 1.4239,
+      "step": 410
+    },
+    {
+      "epoch": 0.13871076611542354,
+      "grad_norm": 0.5344182848930359,
+      "learning_rate": 8.245601184062852e-06,
+      "loss": 1.3749,
+      "step": 411
+    },
+    {
+      "epoch": 0.13904826189672628,
+      "grad_norm": 0.49099403619766235,
+      "learning_rate": 8.066471602728803e-06,
+      "loss": 1.564,
+      "step": 412
+    },
+    {
+      "epoch": 0.13938575767802902,
+      "grad_norm": 0.5761291980743408,
+      "learning_rate": 7.889138314185678e-06,
+      "loss": 1.6728,
+      "step": 413
+    },
+    {
+      "epoch": 0.13972325345933176,
+      "grad_norm": 0.5480265021324158,
+      "learning_rate": 7.71360891480134e-06,
+      "loss": 1.391,
+      "step": 414
+    },
+    {
+      "epoch": 0.1400607492406345,
+      "grad_norm": 0.5651693344116211,
+      "learning_rate": 7.539890923671062e-06,
+      "loss": 1.7986,
+      "step": 415
+    },
+    {
+      "epoch": 0.14039824502193723,
+      "grad_norm": 0.5342073440551758,
+      "learning_rate": 7.367991782295391e-06,
+      "loss": 1.628,
+      "step": 416
+    },
+    {
+      "epoch": 0.14073574080323997,
+      "grad_norm": 0.5717581510543823,
+      "learning_rate": 7.197918854261432e-06,
+      "loss": 1.7378,
+      "step": 417
+    },
+    {
+      "epoch": 0.14107323658454268,
+      "grad_norm": 0.5637682676315308,
+      "learning_rate": 7.029679424927365e-06,
+      "loss": 1.7038,
+      "step": 418
+    },
+    {
+      "epoch": 0.14141073236584542,
+      "grad_norm": 0.5095825791358948,
+      "learning_rate": 6.863280701110408e-06,
+      "loss": 1.7386,
+      "step": 419
+    },
+    {
+      "epoch": 0.14174822814714816,
+      "grad_norm": 0.5181745886802673,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 1.4854,
+      "step": 420
+    },
+    {
+      "epoch": 0.1420857239284509,
+      "grad_norm": 0.6405823826789856,
+      "learning_rate": 6.536033802742813e-06,
+      "loss": 1.8479,
+      "step": 421
+    },
+    {
+      "epoch": 0.14242321970975363,
+      "grad_norm": 0.5425795316696167,
+      "learning_rate": 6.375199646360142e-06,
+      "loss": 1.5897,
+      "step": 422
+    },
+    {
+      "epoch": 0.14276071549105637,
+      "grad_norm": 0.5734001994132996,
+      "learning_rate": 6.216234231230012e-06,
+      "loss": 1.8683,
+      "step": 423
+    },
+    {
+      "epoch": 0.14309821127235908,
+      "grad_norm": 0.5597679615020752,
+      "learning_rate": 6.059144366901736e-06,
+      "loss": 1.706,
+      "step": 424
+    },
+    {
+      "epoch": 0.14343570705366182,
+      "grad_norm": 0.5667785406112671,
+      "learning_rate": 5.903936782582253e-06,
+      "loss": 1.6797,
+      "step": 425
+    },
+    {
+      "epoch": 0.14377320283496456,
+      "grad_norm": 0.5886920690536499,
+      "learning_rate": 5.750618126847912e-06,
+      "loss": 1.5788,
+      "step": 426
+    },
+    {
+      "epoch": 0.1441106986162673,
+      "grad_norm": 0.5989837050437927,
+      "learning_rate": 5.599194967359639e-06,
+      "loss": 1.7581,
+      "step": 427
+    },
+    {
+      "epoch": 0.14444819439757003,
+      "grad_norm": 0.6294358372688293,
+      "learning_rate": 5.449673790581611e-06,
+      "loss": 1.5623,
+      "step": 428
+    },
+    {
+      "epoch": 0.14478569017887277,
+      "grad_norm": 0.6019811034202576,
+      "learning_rate": 5.302061001503394e-06,
+      "loss": 1.8442,
+      "step": 429
+    },
+    {
+      "epoch": 0.1451231859601755,
+      "grad_norm": 0.6037694811820984,
+      "learning_rate": 5.156362923365588e-06,
+      "loss": 1.8926,
+      "step": 430
+    },
+    {
+      "epoch": 0.14546068174147822,
+      "grad_norm": 0.6271533966064453,
+      "learning_rate": 5.012585797388936e-06,
+      "loss": 1.7361,
+      "step": 431
+    },
+    {
+      "epoch": 0.14579817752278096,
+      "grad_norm": 0.6006637215614319,
+      "learning_rate": 4.87073578250698e-06,
+      "loss": 2.1113,
+      "step": 432
+    },
+    {
+      "epoch": 0.1461356733040837,
+      "grad_norm": 0.6098272204399109,
+      "learning_rate": 4.730818955102234e-06,
+      "loss": 1.8291,
+      "step": 433
+    },
+    {
+      "epoch": 0.14647316908538643,
+      "grad_norm": 0.5911081433296204,
+      "learning_rate": 4.592841308745932e-06,
+      "loss": 1.8071,
+      "step": 434
+    },
+    {
+      "epoch": 0.14681066486668917,
+      "grad_norm": 0.6096124649047852,
+      "learning_rate": 4.456808753941205e-06,
+      "loss": 1.8495,
+      "step": 435
+    },
+    {
+      "epoch": 0.1471481606479919,
+      "grad_norm": 0.6468681693077087,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 1.9272,
+      "step": 436
+    },
+    {
+      "epoch": 0.14748565642929462,
+      "grad_norm": 0.691624641418457,
+      "learning_rate": 4.190602144143207e-06,
+      "loss": 1.9704,
+      "step": 437
+    },
+    {
+      "epoch": 0.14782315221059736,
+      "grad_norm": 0.6680425405502319,
+      "learning_rate": 4.06043949255509e-06,
+      "loss": 2.0347,
+      "step": 438
+    },
+    {
+      "epoch": 0.1481606479919001,
+      "grad_norm": 0.6429935693740845,
+      "learning_rate": 3.932244738840379e-06,
+      "loss": 1.9344,
+      "step": 439
+    },
+    {
+      "epoch": 0.14849814377320283,
+      "grad_norm": 0.6537317633628845,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 2.1843,
+      "step": 440
+    },
+    {
+      "epoch": 0.14883563955450557,
+      "grad_norm": 0.6376103758811951,
+      "learning_rate": 3.681780806244095e-06,
+      "loss": 1.9123,
+      "step": 441
+    },
+    {
+      "epoch": 0.1491731353358083,
+      "grad_norm": 0.6572866439819336,
+      "learning_rate": 3.5595223564037884e-06,
+      "loss": 1.9031,
+      "step": 442
+    },
+    {
+      "epoch": 0.14951063111711105,
+      "grad_norm": 0.6777390837669373,
+      "learning_rate": 3.4392532620598216e-06,
+      "loss": 2.0248,
+      "step": 443
+    },
+    {
+      "epoch": 0.14984812689841376,
+      "grad_norm": 0.6614691019058228,
+      "learning_rate": 3.3209786751399187e-06,
+      "loss": 1.8491,
+      "step": 444
+    },
+    {
+      "epoch": 0.1501856226797165,
+      "grad_norm": 0.6615676879882812,
+      "learning_rate": 3.2047036621337236e-06,
+      "loss": 2.0458,
+      "step": 445
+    },
+    {
+      "epoch": 0.15052311846101923,
+      "grad_norm": 0.6535036563873291,
+      "learning_rate": 3.0904332038757977e-06,
+      "loss": 1.9352,
+      "step": 446
+    },
+    {
+      "epoch": 0.15086061424232197,
+      "grad_norm": 0.7253483533859253,
+      "learning_rate": 2.978172195332263e-06,
+      "loss": 1.9182,
+      "step": 447
+    },
+    {
+      "epoch": 0.1511981100236247,
+      "grad_norm": 0.7578319907188416,
+      "learning_rate": 2.8679254453910785e-06,
+      "loss": 2.186,
+      "step": 448
+    },
+    {
+      "epoch": 0.15153560580492745,
+      "grad_norm": 0.7149704098701477,
+      "learning_rate": 2.759697676656098e-06,
+      "loss": 2.0296,
+      "step": 449
+    },
+    {
+      "epoch": 0.15187310158623019,
+      "grad_norm": 0.9842973351478577,
+      "learning_rate": 2.653493525244721e-06,
+      "loss": 1.9824,
+      "step": 450
+    },
+    {
+      "epoch": 0.1522105973675329,
+      "grad_norm": 0.3274257481098175,
+      "learning_rate": 2.549317540589308e-06,
+      "loss": 0.7707,
+      "step": 451
+    },
+    {
+      "epoch": 0.15254809314883563,
+      "grad_norm": 0.3513686954975128,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 1.2193,
+      "step": 452
+    },
+    {
+      "epoch": 0.15288558893013837,
+      "grad_norm": 0.3720841705799103,
+      "learning_rate": 2.3470678346851518e-06,
+      "loss": 1.0917,
+      "step": 453
+    },
+    {
+      "epoch": 0.1532230847114411,
+      "grad_norm": 0.40632250905036926,
+      "learning_rate": 2.2490027771406687e-06,
+      "loss": 1.3549,
+      "step": 454
+    },
+    {
+      "epoch": 0.15356058049274385,
+      "grad_norm": 0.4259728193283081,
+      "learning_rate": 2.152983213389559e-06,
+      "loss": 1.2863,
+      "step": 455
+    },
+    {
+      "epoch": 0.15389807627404659,
+      "grad_norm": 0.44335639476776123,
+      "learning_rate": 2.0590132565903476e-06,
+      "loss": 1.6179,
+      "step": 456
+    },
+    {
+      "epoch": 0.1542355720553493,
+      "grad_norm": 0.5289120674133301,
+      "learning_rate": 1.9670969321032407e-06,
+      "loss": 1.5035,
+      "step": 457
+    },
+    {
+      "epoch": 0.15457306783665203,
+      "grad_norm": 0.48128408193588257,
+      "learning_rate": 1.8772381773176417e-06,
+      "loss": 1.5238,
+      "step": 458
+    },
+    {
+      "epoch": 0.15491056361795477,
+      "grad_norm": 0.48134317994117737,
+      "learning_rate": 1.7894408414835362e-06,
+      "loss": 1.3862,
+      "step": 459
+    },
+    {
+      "epoch": 0.1552480593992575,
+      "grad_norm": 0.47726309299468994,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 1.6318,
+      "step": 460
+    },
+    {
+      "epoch": 0.15558555518056025,
+      "grad_norm": 0.5632562637329102,
+      "learning_rate": 1.620045381987012e-06,
+      "loss": 1.6729,
+      "step": 461
+    },
+    {
+      "epoch": 0.15592305096186299,
+      "grad_norm": 0.5401945114135742,
+      "learning_rate": 1.5384545146622852e-06,
+      "loss": 1.5796,
+      "step": 462
+    },
+    {
+      "epoch": 0.15626054674316572,
+      "grad_norm": 0.5418731570243835,
+      "learning_rate": 1.4589395786535953e-06,
+      "loss": 1.7439,
+      "step": 463
+    },
+    {
+      "epoch": 0.15659804252446843,
+      "grad_norm": 0.5528028011322021,
+      "learning_rate": 1.3815039801161721e-06,
+      "loss": 1.7827,
+      "step": 464
+    },
+    {
+      "epoch": 0.15693553830577117,
+      "grad_norm": 0.5368233323097229,
+      "learning_rate": 1.3061510361333185e-06,
+      "loss": 1.5638,
+      "step": 465
+    },
+    {
+      "epoch": 0.1572730340870739,
+      "grad_norm": 0.5752639174461365,
+      "learning_rate": 1.232883974574367e-06,
+      "loss": 1.711,
+      "step": 466
+    },
+    {
+      "epoch": 0.15761052986837665,
+      "grad_norm": 0.570741593837738,
+      "learning_rate": 1.1617059339563807e-06,
+      "loss": 1.8203,
+      "step": 467
+    },
+    {
+      "epoch": 0.15794802564967939,
+      "grad_norm": 0.5896568298339844,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 1.8287,
+      "step": 468
+    },
+    {
+      "epoch": 0.15828552143098212,
+      "grad_norm": 0.5372335910797119,
+      "learning_rate": 1.0256290220474307e-06,
+      "loss": 1.4701,
+      "step": 469
+    },
+    {
+      "epoch": 0.15862301721228483,
+      "grad_norm": 0.5423702001571655,
+      "learning_rate": 9.607359798384785e-07,
+      "loss": 1.6354,
+      "step": 470
+    },
+    {
+      "epoch": 0.15896051299358757,
+      "grad_norm": 0.5899948477745056,
+      "learning_rate": 8.979436164848088e-07,
+      "loss": 1.8108,
+      "step": 471
+    },
+    {
+      "epoch": 0.1592980087748903,
+      "grad_norm": 0.5420827269554138,
+      "learning_rate": 8.372546218022747e-07,
+      "loss": 1.9779,
+      "step": 472
+    },
+    {
+      "epoch": 0.15963550455619305,
+      "grad_norm": 0.5416197180747986,
+      "learning_rate": 7.786715955054203e-07,
+      "loss": 1.7092,
+      "step": 473
+    },
+    {
+      "epoch": 0.15997300033749579,
+      "grad_norm": 0.6021366119384766,
+      "learning_rate": 7.221970470961125e-07,
+      "loss": 1.6724,
+      "step": 474
+    },
+    {
+      "epoch": 0.16031049611879852,
+      "grad_norm": 0.5433163642883301,
+      "learning_rate": 6.678333957560512e-07,
+      "loss": 1.8533,
+      "step": 475
+    },
+    {
+      "epoch": 0.16064799190010126,
+      "grad_norm": 0.5783931016921997,
+      "learning_rate": 6.15582970243117e-07,
+      "loss": 1.6125,
+      "step": 476
+    },
+    {
+      "epoch": 0.16098548768140397,
+      "grad_norm": 0.6139048337936401,
+      "learning_rate": 5.654480087916303e-07,
+      "loss": 1.8255,
+      "step": 477
+    },
+    {
+      "epoch": 0.1613229834627067,
+      "grad_norm": 0.5656695365905762,
+      "learning_rate": 5.174306590164879e-07,
+      "loss": 2.0079,
+      "step": 478
+    },
+    {
+      "epoch": 0.16166047924400945,
+      "grad_norm": 0.6840364336967468,
+      "learning_rate": 4.715329778211375e-07,
+      "loss": 1.9251,
+      "step": 479
+    },
+    {
+      "epoch": 0.16199797502531219,
+      "grad_norm": 0.552597165107727,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 1.8384,
+      "step": 480
+    },
+    {
+      "epoch": 0.16233547080661492,
+      "grad_norm": 0.635677695274353,
+      "learning_rate": 3.8610439470164737e-07,
+      "loss": 1.9796,
+      "step": 481
+    },
+    {
+      "epoch": 0.16267296658791766,
+      "grad_norm": 0.6121070981025696,
+      "learning_rate": 3.465771522536854e-07,
+      "loss": 1.7527,
+      "step": 482
+    },
+    {
+      "epoch": 0.16301046236922037,
+      "grad_norm": 0.6411792039871216,
+      "learning_rate": 3.09176897181096e-07,
+      "loss": 1.7262,
+      "step": 483
+    },
+    {
+      "epoch": 0.1633479581505231,
+      "grad_norm": 0.5451667904853821,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 1.9697,
+      "step": 484
+    },
+    {
+      "epoch": 0.16368545393182585,
+      "grad_norm": 0.5873634815216064,
+      "learning_rate": 2.407636663901591e-07,
+      "loss": 1.601,
+      "step": 485
+    },
+    {
+      "epoch": 0.16402294971312859,
+      "grad_norm": 0.6943976879119873,
+      "learning_rate": 2.0975362126691712e-07,
+      "loss": 2.1144,
+      "step": 486
+    },
+    {
+      "epoch": 0.16436044549443132,
+      "grad_norm": 0.6562677025794983,
+      "learning_rate": 1.8087642458373134e-07,
+      "loss": 2.0762,
+      "step": 487
+    },
+    {
+      "epoch": 0.16469794127573406,
+      "grad_norm": 0.6708371043205261,
+      "learning_rate": 1.5413331334360182e-07,
+      "loss": 1.9978,
+      "step": 488
+    },
+    {
+      "epoch": 0.1650354370570368,
+      "grad_norm": 0.6782956719398499,
+      "learning_rate": 1.2952543313240472e-07,
+      "loss": 1.8231,
+      "step": 489
+    },
+    {
+      "epoch": 0.1653729328383395,
+      "grad_norm": 0.6593528985977173,
+      "learning_rate": 1.0705383806982606e-07,
+      "loss": 2.0718,
+      "step": 490
+    },
+    {
+      "epoch": 0.16571042861964225,
+      "grad_norm": 0.6144323348999023,
+      "learning_rate": 8.671949076420882e-08,
+      "loss": 1.8841,
+      "step": 491
+    },
+    {
+      "epoch": 0.16604792440094499,
+      "grad_norm": 0.6653460264205933,
+      "learning_rate": 6.852326227130834e-08,
+      "loss": 2.095,
+      "step": 492
+    },
+    {
+      "epoch": 0.16638542018224772,
+      "grad_norm": 0.7157003283500671,
+      "learning_rate": 5.246593205699424e-08,
+      "loss": 2.1404,
+      "step": 493
+    },
+    {
+      "epoch": 0.16672291596355046,
+      "grad_norm": 0.6483384966850281,
+      "learning_rate": 3.8548187963854956e-08,
+      "loss": 2.1051,
+      "step": 494
+    },
+    {
+      "epoch": 0.1670604117448532,
+      "grad_norm": 0.6570152640342712,
+      "learning_rate": 2.6770626181715773e-08,
+      "loss": 2.1723,
+      "step": 495
+    },
+    {
+      "epoch": 0.16739790752615594,
+      "grad_norm": 0.692857027053833,
+      "learning_rate": 1.7133751222137007e-08,
+      "loss": 1.936,
+      "step": 496
+    },
+    {
+      "epoch": 0.16773540330745865,
+      "grad_norm": 0.7435788512229919,
+      "learning_rate": 9.637975896759077e-09,
+      "loss": 2.2764,
+      "step": 497
+    },
+    {
+      "epoch": 0.16807289908876138,
+      "grad_norm": 0.6761675477027893,
+      "learning_rate": 4.2836212996499865e-09,
+      "loss": 2.0686,
+      "step": 498
+    },
+    {
+      "epoch": 0.16841039487006412,
+      "grad_norm": 0.6835579872131348,
+      "learning_rate": 1.0709167935385455e-09,
+      "loss": 2.0321,
+      "step": 499
+    },
+    {
+      "epoch": 0.16874789065136686,
+      "grad_norm": 0.8246026635169983,
+      "learning_rate": 0.0,
+      "loss": 2.0082,
+      "step": 500
+    },
+    {
+      "epoch": 0.16874789065136686,
+      "eval_loss": 1.692722201347351,
+      "eval_runtime": 82.8216,
+      "eval_samples_per_second": 60.25,
+      "eval_steps_per_second": 15.069,
+      "step": 500
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.434439770112e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null