Training in progress, step 500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +714 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9e653775cc249175678648a653044416209f6475bf29e43394e2ab2b03341ab5
 size 411094576

 version https://git-lfs.github.com/spec/v1
+oid sha256:465b9b0af0608697e8682f3b66a5e25c67857bc0a9b3d36dcd98756946a1ee10
 size 411094576

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f2fe895de504a208490a1cf2ef58a932627f92684431ae1bdba71258bdcbb6bf
 size 209193780

 version https://git-lfs.github.com/spec/v1
+oid sha256:d9a33185e51ca90faded8c9cd34e0fd37a2543b6404e4251329fbe029148d9a8
 size 209193780

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:22087d1b9076daa7f9d9ca391795de63314bd5b40201ba03c337e4e05bebf4a3
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:92edbed6f58c24253fd12cccbdbe7533f3c8eb8d6d1eea3a09457ef78e54318a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:505f9225762b105f8ca5168f44d99b2f8467174f4ade85f1cc95f684fbd828e0
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe1d153de177b356f9e3a70d6e4ec979560b0c300994e71ca4cb89afc74c5b3a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.8248791694641113,
-  "best_model_checkpoint": "miner_id_24/checkpoint-400",
-  "epoch": 0.005317363518233572,
   "eval_steps": 100,
-  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2847,6 +2847,714 @@
       "eval_samples_per_second": 19.64,
       "eval_steps_per_second": 4.91,
       "step": 400
     }
   ],
   "logging_steps": 1,
@@ -2870,12 +3578,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.506940810621092e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.8191825747489929,
+  "best_model_checkpoint": "miner_id_24/checkpoint-500",
+  "epoch": 0.006646704397791965,
   "eval_steps": 100,
+  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 19.64,
       "eval_steps_per_second": 4.91,
       "step": 400
+    },
+    {
+      "epoch": 0.005330656927029156,
+      "grad_norm": 1.0595228672027588,
+      "learning_rate": 1.013396731136465e-05,
+      "loss": 1.017,
+      "step": 401
+    },
+    {
+      "epoch": 0.00534395033582474,
+      "grad_norm": 1.4388854503631592,
+      "learning_rate": 9.937309365446973e-06,
+      "loss": 0.918,
+      "step": 402
+    },
+    {
+      "epoch": 0.005357243744620324,
+      "grad_norm": 1.3190706968307495,
+      "learning_rate": 9.742367571857091e-06,
+      "loss": 0.8082,
+      "step": 403
+    },
+    {
+      "epoch": 0.0053705371534159075,
+      "grad_norm": 1.449982762336731,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.6463,
+      "step": 404
+    },
+    {
+      "epoch": 0.005383830562211492,
+      "grad_norm": 1.861377477645874,
+      "learning_rate": 9.357665770419244e-06,
+      "loss": 0.8151,
+      "step": 405
+    },
+    {
+      "epoch": 0.005397123971007075,
+      "grad_norm": 1.519214391708374,
+      "learning_rate": 9.167922241916055e-06,
+      "loss": 0.7792,
+      "step": 406
+    },
+    {
+      "epoch": 0.00541041737980266,
+      "grad_norm": 1.5293023586273193,
+      "learning_rate": 8.97992782372432e-06,
+      "loss": 0.9019,
+      "step": 407
+    },
+    {
+      "epoch": 0.005423710788598243,
+      "grad_norm": 1.5244901180267334,
+      "learning_rate": 8.793690568899216e-06,
+      "loss": 0.9023,
+      "step": 408
+    },
+    {
+      "epoch": 0.0054370041973938275,
+      "grad_norm": 1.7790530920028687,
+      "learning_rate": 8.609218455224893e-06,
+      "loss": 0.9439,
+      "step": 409
+    },
+    {
+      "epoch": 0.005450297606189411,
+      "grad_norm": 1.5161616802215576,
+      "learning_rate": 8.426519384872733e-06,
+      "loss": 0.8852,
+      "step": 410
+    },
+    {
+      "epoch": 0.005463591014984995,
+      "grad_norm": 1.6171609163284302,
+      "learning_rate": 8.245601184062852e-06,
+      "loss": 0.8912,
+      "step": 411
+    },
+    {
+      "epoch": 0.005476884423780579,
+      "grad_norm": 1.899787425994873,
+      "learning_rate": 8.066471602728803e-06,
+      "loss": 1.0067,
+      "step": 412
+    },
+    {
+      "epoch": 0.005490177832576163,
+      "grad_norm": 1.5935102701187134,
+      "learning_rate": 7.889138314185678e-06,
+      "loss": 0.8857,
+      "step": 413
+    },
+    {
+      "epoch": 0.005503471241371747,
+      "grad_norm": 1.7188307046890259,
+      "learning_rate": 7.71360891480134e-06,
+      "loss": 1.0062,
+      "step": 414
+    },
+    {
+      "epoch": 0.005516764650167331,
+      "grad_norm": 1.8137848377227783,
+      "learning_rate": 7.539890923671062e-06,
+      "loss": 0.8886,
+      "step": 415
+    },
+    {
+      "epoch": 0.0055300580589629144,
+      "grad_norm": 1.8021348714828491,
+      "learning_rate": 7.367991782295391e-06,
+      "loss": 0.8139,
+      "step": 416
+    },
+    {
+      "epoch": 0.005543351467758499,
+      "grad_norm": 1.692603588104248,
+      "learning_rate": 7.197918854261432e-06,
+      "loss": 0.7903,
+      "step": 417
+    },
+    {
+      "epoch": 0.005556644876554082,
+      "grad_norm": 1.8016855716705322,
+      "learning_rate": 7.029679424927365e-06,
+      "loss": 0.8315,
+      "step": 418
+    },
+    {
+      "epoch": 0.005569938285349667,
+      "grad_norm": 1.8316609859466553,
+      "learning_rate": 6.863280701110408e-06,
+      "loss": 0.7959,
+      "step": 419
+    },
+    {
+      "epoch": 0.00558323169414525,
+      "grad_norm": 1.9248038530349731,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 0.9514,
+      "step": 420
+    },
+    {
+      "epoch": 0.0055965251029408344,
+      "grad_norm": 1.8567143678665161,
+      "learning_rate": 6.536033802742813e-06,
+      "loss": 0.8723,
+      "step": 421
+    },
+    {
+      "epoch": 0.005609818511736418,
+      "grad_norm": 1.8281995058059692,
+      "learning_rate": 6.375199646360142e-06,
+      "loss": 0.881,
+      "step": 422
+    },
+    {
+      "epoch": 0.005623111920532002,
+      "grad_norm": 2.1290082931518555,
+      "learning_rate": 6.216234231230012e-06,
+      "loss": 0.7979,
+      "step": 423
+    },
+    {
+      "epoch": 0.005636405329327586,
+      "grad_norm": 2.67155122756958,
+      "learning_rate": 6.059144366901736e-06,
+      "loss": 0.9295,
+      "step": 424
+    },
+    {
+      "epoch": 0.00564969873812317,
+      "grad_norm": 2.3634464740753174,
+      "learning_rate": 5.903936782582253e-06,
+      "loss": 0.9214,
+      "step": 425
+    },
+    {
+      "epoch": 0.005662992146918754,
+      "grad_norm": 2.1651251316070557,
+      "learning_rate": 5.750618126847912e-06,
+      "loss": 0.8525,
+      "step": 426
+    },
+    {
+      "epoch": 0.005676285555714338,
+      "grad_norm": 1.97309410572052,
+      "learning_rate": 5.599194967359639e-06,
+      "loss": 0.9601,
+      "step": 427
+    },
+    {
+      "epoch": 0.005689578964509922,
+      "grad_norm": 2.1008105278015137,
+      "learning_rate": 5.449673790581611e-06,
+      "loss": 0.7276,
+      "step": 428
+    },
+    {
+      "epoch": 0.005702872373305506,
+      "grad_norm": 2.0735974311828613,
+      "learning_rate": 5.302061001503394e-06,
+      "loss": 0.7779,
+      "step": 429
+    },
+    {
+      "epoch": 0.00571616578210109,
+      "grad_norm": 1.9534869194030762,
+      "learning_rate": 5.156362923365588e-06,
+      "loss": 0.7884,
+      "step": 430
+    },
+    {
+      "epoch": 0.005729459190896674,
+      "grad_norm": 2.201770544052124,
+      "learning_rate": 5.012585797388936e-06,
+      "loss": 0.7961,
+      "step": 431
+    },
+    {
+      "epoch": 0.005742752599692258,
+      "grad_norm": 2.4963033199310303,
+      "learning_rate": 4.87073578250698e-06,
+      "loss": 0.9411,
+      "step": 432
+    },
+    {
+      "epoch": 0.005756046008487841,
+      "grad_norm": 2.3531322479248047,
+      "learning_rate": 4.730818955102234e-06,
+      "loss": 0.8704,
+      "step": 433
+    },
+    {
+      "epoch": 0.005769339417283426,
+      "grad_norm": 2.5668766498565674,
+      "learning_rate": 4.592841308745932e-06,
+      "loss": 0.7668,
+      "step": 434
+    },
+    {
+      "epoch": 0.005782632826079009,
+      "grad_norm": 2.429396152496338,
+      "learning_rate": 4.456808753941205e-06,
+      "loss": 0.7915,
+      "step": 435
+    },
+    {
+      "epoch": 0.005795926234874594,
+      "grad_norm": 2.1826658248901367,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 0.8068,
+      "step": 436
+    },
+    {
+      "epoch": 0.005809219643670177,
+      "grad_norm": 2.3497233390808105,
+      "learning_rate": 4.190602144143207e-06,
+      "loss": 0.7689,
+      "step": 437
+    },
+    {
+      "epoch": 0.005822513052465761,
+      "grad_norm": 2.6154377460479736,
+      "learning_rate": 4.06043949255509e-06,
+      "loss": 0.6609,
+      "step": 438
+    },
+    {
+      "epoch": 0.005835806461261345,
+      "grad_norm": 2.4827382564544678,
+      "learning_rate": 3.932244738840379e-06,
+      "loss": 0.6727,
+      "step": 439
+    },
+    {
+      "epoch": 0.005849099870056929,
+      "grad_norm": 2.957174777984619,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 0.7521,
+      "step": 440
+    },
+    {
+      "epoch": 0.005862393278852513,
+      "grad_norm": 2.6731929779052734,
+      "learning_rate": 3.681780806244095e-06,
+      "loss": 0.8004,
+      "step": 441
+    },
+    {
+      "epoch": 0.005875686687648097,
+      "grad_norm": 2.5539755821228027,
+      "learning_rate": 3.5595223564037884e-06,
+      "loss": 0.7855,
+      "step": 442
+    },
+    {
+      "epoch": 0.0058889800964436806,
+      "grad_norm": 2.6112401485443115,
+      "learning_rate": 3.4392532620598216e-06,
+      "loss": 0.6713,
+      "step": 443
+    },
+    {
+      "epoch": 0.005902273505239265,
+      "grad_norm": 3.1589066982269287,
+      "learning_rate": 3.3209786751399187e-06,
+      "loss": 0.8434,
+      "step": 444
+    },
+    {
+      "epoch": 0.005915566914034848,
+      "grad_norm": 3.0150818824768066,
+      "learning_rate": 3.2047036621337236e-06,
+      "loss": 0.6647,
+      "step": 445
+    },
+    {
+      "epoch": 0.005928860322830433,
+      "grad_norm": 2.906002998352051,
+      "learning_rate": 3.0904332038757977e-06,
+      "loss": 0.7664,
+      "step": 446
+    },
+    {
+      "epoch": 0.005942153731626016,
+      "grad_norm": 3.820185661315918,
+      "learning_rate": 2.978172195332263e-06,
+      "loss": 0.8387,
+      "step": 447
+    },
+    {
+      "epoch": 0.0059554471404216006,
+      "grad_norm": 3.4882965087890625,
+      "learning_rate": 2.8679254453910785e-06,
+      "loss": 0.9115,
+      "step": 448
+    },
+    {
+      "epoch": 0.005968740549217184,
+      "grad_norm": 4.0936174392700195,
+      "learning_rate": 2.759697676656098e-06,
+      "loss": 0.7122,
+      "step": 449
+    },
+    {
+      "epoch": 0.005982033958012768,
+      "grad_norm": 6.082996845245361,
+      "learning_rate": 2.653493525244721e-06,
+      "loss": 0.9206,
+      "step": 450
+    },
+    {
+      "epoch": 0.005995327366808352,
+      "grad_norm": 0.9247643351554871,
+      "learning_rate": 2.549317540589308e-06,
+      "loss": 0.9231,
+      "step": 451
+    },
+    {
+      "epoch": 0.006008620775603936,
+      "grad_norm": 1.2527554035186768,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.9569,
+      "step": 452
+    },
+    {
+      "epoch": 0.00602191418439952,
+      "grad_norm": 1.998666763305664,
+      "learning_rate": 2.3470678346851518e-06,
+      "loss": 0.809,
+      "step": 453
+    },
+    {
+      "epoch": 0.006035207593195104,
+      "grad_norm": 1.6121776103973389,
+      "learning_rate": 2.2490027771406687e-06,
+      "loss": 1.0757,
+      "step": 454
+    },
+    {
+      "epoch": 0.006048501001990688,
+      "grad_norm": 1.6252521276474,
+      "learning_rate": 2.152983213389559e-06,
+      "loss": 1.0179,
+      "step": 455
+    },
+    {
+      "epoch": 0.006061794410786272,
+      "grad_norm": 1.5165375471115112,
+      "learning_rate": 2.0590132565903476e-06,
+      "loss": 0.9696,
+      "step": 456
+    },
+    {
+      "epoch": 0.006075087819581856,
+      "grad_norm": 1.8923349380493164,
+      "learning_rate": 1.9670969321032407e-06,
+      "loss": 0.7702,
+      "step": 457
+    },
+    {
+      "epoch": 0.00608838122837744,
+      "grad_norm": 1.4962996244430542,
+      "learning_rate": 1.8772381773176417e-06,
+      "loss": 1.0016,
+      "step": 458
+    },
+    {
+      "epoch": 0.006101674637173024,
+      "grad_norm": 1.6529314517974854,
+      "learning_rate": 1.7894408414835362e-06,
+      "loss": 0.7777,
+      "step": 459
+    },
+    {
+      "epoch": 0.0061149680459686075,
+      "grad_norm": 1.5695680379867554,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 0.9964,
+      "step": 460
+    },
+    {
+      "epoch": 0.006128261454764192,
+      "grad_norm": 1.4540382623672485,
+      "learning_rate": 1.620045381987012e-06,
+      "loss": 0.8244,
+      "step": 461
+    },
+    {
+      "epoch": 0.006141554863559775,
+      "grad_norm": 1.6551907062530518,
+      "learning_rate": 1.5384545146622852e-06,
+      "loss": 0.8744,
+      "step": 462
+    },
+    {
+      "epoch": 0.00615484827235536,
+      "grad_norm": 1.7763545513153076,
+      "learning_rate": 1.4589395786535953e-06,
+      "loss": 0.9266,
+      "step": 463
+    },
+    {
+      "epoch": 0.006168141681150943,
+      "grad_norm": 1.7592118978500366,
+      "learning_rate": 1.3815039801161721e-06,
+      "loss": 0.8165,
+      "step": 464
+    },
+    {
+      "epoch": 0.0061814350899465275,
+      "grad_norm": 1.6234304904937744,
+      "learning_rate": 1.3061510361333185e-06,
+      "loss": 0.9257,
+      "step": 465
+    },
+    {
+      "epoch": 0.006194728498742111,
+      "grad_norm": 1.7104973793029785,
+      "learning_rate": 1.232883974574367e-06,
+      "loss": 0.8413,
+      "step": 466
+    },
+    {
+      "epoch": 0.006208021907537695,
+      "grad_norm": 1.8358904123306274,
+      "learning_rate": 1.1617059339563807e-06,
+      "loss": 0.9262,
+      "step": 467
+    },
+    {
+      "epoch": 0.006221315316333279,
+      "grad_norm": 2.0369277000427246,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 0.9344,
+      "step": 468
+    },
+    {
+      "epoch": 0.006234608725128863,
+      "grad_norm": 1.7839794158935547,
+      "learning_rate": 1.0256290220474307e-06,
+      "loss": 0.9029,
+      "step": 469
+    },
+    {
+      "epoch": 0.006247902133924447,
+      "grad_norm": 1.9375988245010376,
+      "learning_rate": 9.607359798384785e-07,
+      "loss": 0.7894,
+      "step": 470
+    },
+    {
+      "epoch": 0.006261195542720031,
+      "grad_norm": 1.928093671798706,
+      "learning_rate": 8.979436164848088e-07,
+      "loss": 0.7991,
+      "step": 471
+    },
+    {
+      "epoch": 0.0062744889515156145,
+      "grad_norm": 1.8025486469268799,
+      "learning_rate": 8.372546218022747e-07,
+      "loss": 0.7644,
+      "step": 472
+    },
+    {
+      "epoch": 0.006287782360311199,
+      "grad_norm": 2.0792739391326904,
+      "learning_rate": 7.786715955054203e-07,
+      "loss": 0.6734,
+      "step": 473
+    },
+    {
+      "epoch": 0.006301075769106782,
+      "grad_norm": 1.997489094734192,
+      "learning_rate": 7.221970470961125e-07,
+      "loss": 0.8299,
+      "step": 474
+    },
+    {
+      "epoch": 0.006314369177902367,
+      "grad_norm": 2.252878427505493,
+      "learning_rate": 6.678333957560512e-07,
+      "loss": 0.8351,
+      "step": 475
+    },
+    {
+      "epoch": 0.00632766258669795,
+      "grad_norm": 1.966784954071045,
+      "learning_rate": 6.15582970243117e-07,
+      "loss": 0.8078,
+      "step": 476
+    },
+    {
+      "epoch": 0.0063409559954935345,
+      "grad_norm": 1.9468001127243042,
+      "learning_rate": 5.654480087916303e-07,
+      "loss": 0.8398,
+      "step": 477
+    },
+    {
+      "epoch": 0.006354249404289118,
+      "grad_norm": 2.1014404296875,
+      "learning_rate": 5.174306590164879e-07,
+      "loss": 0.883,
+      "step": 478
+    },
+    {
+      "epoch": 0.006367542813084702,
+      "grad_norm": 1.949217677116394,
+      "learning_rate": 4.715329778211375e-07,
+      "loss": 0.7912,
+      "step": 479
+    },
+    {
+      "epoch": 0.006380836221880286,
+      "grad_norm": 2.206620931625366,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 0.9008,
+      "step": 480
+    },
+    {
+      "epoch": 0.00639412963067587,
+      "grad_norm": 2.2743523120880127,
+      "learning_rate": 3.8610439470164737e-07,
+      "loss": 0.7626,
+      "step": 481
+    },
+    {
+      "epoch": 0.006407423039471454,
+      "grad_norm": 2.57737135887146,
+      "learning_rate": 3.465771522536854e-07,
+      "loss": 0.9584,
+      "step": 482
+    },
+    {
+      "epoch": 0.006420716448267038,
+      "grad_norm": 2.0969834327697754,
+      "learning_rate": 3.09176897181096e-07,
+      "loss": 0.7054,
+      "step": 483
+    },
+    {
+      "epoch": 0.006434009857062622,
+      "grad_norm": 2.5706839561462402,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 0.8623,
+      "step": 484
+    },
+    {
+      "epoch": 0.006447303265858206,
+      "grad_norm": 2.5481481552124023,
+      "learning_rate": 2.407636663901591e-07,
+      "loss": 0.8337,
+      "step": 485
+    },
+    {
+      "epoch": 0.00646059667465379,
+      "grad_norm": 2.2276320457458496,
+      "learning_rate": 2.0975362126691712e-07,
+      "loss": 0.8068,
+      "step": 486
+    },
+    {
+      "epoch": 0.006473890083449374,
+      "grad_norm": 2.349933624267578,
+      "learning_rate": 1.8087642458373134e-07,
+      "loss": 0.7385,
+      "step": 487
+    },
+    {
+      "epoch": 0.006487183492244958,
+      "grad_norm": 2.462743043899536,
+      "learning_rate": 1.5413331334360182e-07,
+      "loss": 0.892,
+      "step": 488
+    },
+    {
+      "epoch": 0.0065004769010405415,
+      "grad_norm": 2.3636093139648438,
+      "learning_rate": 1.2952543313240472e-07,
+      "loss": 0.8818,
+      "step": 489
+    },
+    {
+      "epoch": 0.006513770309836126,
+      "grad_norm": 2.944607734680176,
+      "learning_rate": 1.0705383806982606e-07,
+      "loss": 1.079,
+      "step": 490
+    },
+    {
+      "epoch": 0.006527063718631709,
+      "grad_norm": 2.7098374366760254,
+      "learning_rate": 8.671949076420882e-08,
+      "loss": 0.8077,
+      "step": 491
+    },
+    {
+      "epoch": 0.006540357127427294,
+      "grad_norm": 3.112506866455078,
+      "learning_rate": 6.852326227130834e-08,
+      "loss": 0.8186,
+      "step": 492
+    },
+    {
+      "epoch": 0.006553650536222877,
+      "grad_norm": 2.717796802520752,
+      "learning_rate": 5.246593205699424e-08,
+      "loss": 0.6142,
+      "step": 493
+    },
+    {
+      "epoch": 0.0065669439450184615,
+      "grad_norm": 3.020770788192749,
+      "learning_rate": 3.8548187963854956e-08,
+      "loss": 0.8441,
+      "step": 494
+    },
+    {
+      "epoch": 0.006580237353814045,
+      "grad_norm": 2.710313081741333,
+      "learning_rate": 2.6770626181715773e-08,
+      "loss": 0.7852,
+      "step": 495
+    },
+    {
+      "epoch": 0.006593530762609629,
+      "grad_norm": 3.541020154953003,
+      "learning_rate": 1.7133751222137007e-08,
+      "loss": 0.7705,
+      "step": 496
+    },
+    {
+      "epoch": 0.006606824171405213,
+      "grad_norm": 3.2380614280700684,
+      "learning_rate": 9.637975896759077e-09,
+      "loss": 0.9778,
+      "step": 497
+    },
+    {
+      "epoch": 0.006620117580200797,
+      "grad_norm": 3.6511924266815186,
+      "learning_rate": 4.2836212996499865e-09,
+      "loss": 0.8976,
+      "step": 498
+    },
+    {
+      "epoch": 0.006633410988996381,
+      "grad_norm": 4.02791690826416,
+      "learning_rate": 1.0709167935385455e-09,
+      "loss": 0.8247,
+      "step": 499
+    },
+    {
+      "epoch": 0.006646704397791965,
+      "grad_norm": 5.131423473358154,
+      "learning_rate": 0.0,
+      "loss": 0.7257,
+      "step": 500
+    },
+    {
+      "epoch": 0.006646704397791965,
+      "eval_loss": 0.8191825747489929,
+      "eval_runtime": 6452.7093,
+      "eval_samples_per_second": 19.635,
+      "eval_steps_per_second": 4.909,
+      "step": 500
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.133284548440228e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null