Training in progress, step 300, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +713 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c2ef37e66cf698968f2fda51a39cd65be386075e1290cd6d28862b2396af8133
 size 411094576

 version https://git-lfs.github.com/spec/v1
+oid sha256:c6998651a4f6221c924c5f14c9847c85d7f4ff286da87b492a2c183aab4f8ae3
 size 411094576

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:991167d60948ccbda32d8519d13ed559556631258d027dad0b6ebf3160db254c
-size 209193332

 version https://git-lfs.github.com/spec/v1
+oid sha256:3c709930fcd73234c8fd0636ff4ab5c31426d68d32e9fb3a01ebf09b79f50d61
+size 209193780

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:57857a501037bef88058a756500631b1ca55e504df91ef1fa8582774fac1d6f2
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:5d4c3d45bd5aae62c92c9ff393cc15dabbde2961fe98f16264940444bac07041
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:530505d607699f384741067a5f9139d72f043713adb680898a3f1b5714170c97
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ebdb14d51e77eb18f9d6184de19bfac710da5493717593749289db85474b6091
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.9358128905296326,
-  "best_model_checkpoint": "miner_id_24/checkpoint-200",
-  "epoch": 0.002658681759116786,
   "eval_steps": 100,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1431,6 +1431,714 @@
       "eval_samples_per_second": 19.651,
       "eval_steps_per_second": 4.913,
       "step": 200
     }
   ],
   "logging_steps": 1,
@@ -1459,7 +2167,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.2542533349828198e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.8697348237037659,
+  "best_model_checkpoint": "miner_id_24/checkpoint-300",
+  "epoch": 0.003988022638675179,
   "eval_steps": 100,
+  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 19.651,
       "eval_steps_per_second": 4.913,
       "step": 200
+    },
+    {
+      "epoch": 0.00267197516791237,
+      "grad_norm": 1.469771385192871,
+      "learning_rate": 6.883142508466054e-05,
+      "loss": 1.1812,
+      "step": 201
+    },
+    {
+      "epoch": 0.0026852685767079537,
+      "grad_norm": 1.9593838453292847,
+      "learning_rate": 6.852787187549182e-05,
+      "loss": 0.9595,
+      "step": 202
+    },
+    {
+      "epoch": 0.0026985619855035376,
+      "grad_norm": 1.881966471672058,
+      "learning_rate": 6.82235249939575e-05,
+      "loss": 1.0891,
+      "step": 203
+    },
+    {
+      "epoch": 0.0027118553942991216,
+      "grad_norm": 1.5613300800323486,
+      "learning_rate": 6.7918397477265e-05,
+      "loss": 1.1484,
+      "step": 204
+    },
+    {
+      "epoch": 0.0027251488030947055,
+      "grad_norm": 1.6164745092391968,
+      "learning_rate": 6.761250239606169e-05,
+      "loss": 0.8214,
+      "step": 205
+    },
+    {
+      "epoch": 0.0027384422118902894,
+      "grad_norm": 1.4947420358657837,
+      "learning_rate": 6.730585285387465e-05,
+      "loss": 0.9956,
+      "step": 206
+    },
+    {
+      "epoch": 0.0027517356206858733,
+      "grad_norm": 1.6902318000793457,
+      "learning_rate": 6.699846198654971e-05,
+      "loss": 0.8679,
+      "step": 207
+    },
+    {
+      "epoch": 0.0027650290294814572,
+      "grad_norm": 1.6245845556259155,
+      "learning_rate": 6.669034296168855e-05,
+      "loss": 0.9362,
+      "step": 208
+    },
+    {
+      "epoch": 0.002778322438277041,
+      "grad_norm": 1.687902569770813,
+      "learning_rate": 6.638150897808468e-05,
+      "loss": 0.9086,
+      "step": 209
+    },
+    {
+      "epoch": 0.002791615847072625,
+      "grad_norm": 1.6738238334655762,
+      "learning_rate": 6.607197326515808e-05,
+      "loss": 1.0715,
+      "step": 210
+    },
+    {
+      "epoch": 0.002804909255868209,
+      "grad_norm": 1.8981541395187378,
+      "learning_rate": 6.57617490823885e-05,
+      "loss": 1.0964,
+      "step": 211
+    },
+    {
+      "epoch": 0.002818202664663793,
+      "grad_norm": 1.994768500328064,
+      "learning_rate": 6.545084971874738e-05,
+      "loss": 1.0326,
+      "step": 212
+    },
+    {
+      "epoch": 0.002831496073459377,
+      "grad_norm": 1.953397274017334,
+      "learning_rate": 6.513928849212873e-05,
+      "loss": 0.9181,
+      "step": 213
+    },
+    {
+      "epoch": 0.002844789482254961,
+      "grad_norm": 1.5838758945465088,
+      "learning_rate": 6.482707874877854e-05,
+      "loss": 1.0109,
+      "step": 214
+    },
+    {
+      "epoch": 0.002858082891050545,
+      "grad_norm": 2.204489231109619,
+      "learning_rate": 6.451423386272312e-05,
+      "loss": 0.9583,
+      "step": 215
+    },
+    {
+      "epoch": 0.002871376299846129,
+      "grad_norm": 1.679416537284851,
+      "learning_rate": 6.420076723519614e-05,
+      "loss": 1.0152,
+      "step": 216
+    },
+    {
+      "epoch": 0.002884669708641713,
+      "grad_norm": 2.062034845352173,
+      "learning_rate": 6.388669229406462e-05,
+      "loss": 0.9058,
+      "step": 217
+    },
+    {
+      "epoch": 0.002897963117437297,
+      "grad_norm": 1.9006659984588623,
+      "learning_rate": 6.357202249325371e-05,
+      "loss": 0.8065,
+      "step": 218
+    },
+    {
+      "epoch": 0.0029112565262328807,
+      "grad_norm": 1.9091770648956299,
+      "learning_rate": 6.32567713121704e-05,
+      "loss": 0.8315,
+      "step": 219
+    },
+    {
+      "epoch": 0.0029245499350284646,
+      "grad_norm": 2.309170722961426,
+      "learning_rate": 6.294095225512603e-05,
+      "loss": 0.9633,
+      "step": 220
+    },
+    {
+      "epoch": 0.0029378433438240485,
+      "grad_norm": 2.036591053009033,
+      "learning_rate": 6.26245788507579e-05,
+      "loss": 0.9114,
+      "step": 221
+    },
+    {
+      "epoch": 0.0029511367526196325,
+      "grad_norm": 2.540407419204712,
+      "learning_rate": 6.230766465144967e-05,
+      "loss": 0.8928,
+      "step": 222
+    },
+    {
+      "epoch": 0.0029644301614152164,
+      "grad_norm": 2.447540044784546,
+      "learning_rate": 6.199022323275083e-05,
+      "loss": 0.8958,
+      "step": 223
+    },
+    {
+      "epoch": 0.0029777235702108003,
+      "grad_norm": 2.1421115398406982,
+      "learning_rate": 6.167226819279528e-05,
+      "loss": 0.9212,
+      "step": 224
+    },
+    {
+      "epoch": 0.002991016979006384,
+      "grad_norm": 2.2318172454833984,
+      "learning_rate": 6.135381315171867e-05,
+      "loss": 0.7849,
+      "step": 225
+    },
+    {
+      "epoch": 0.003004310387801968,
+      "grad_norm": 2.107386350631714,
+      "learning_rate": 6.103487175107507e-05,
+      "loss": 0.8173,
+      "step": 226
+    },
+    {
+      "epoch": 0.003017603796597552,
+      "grad_norm": 2.08105206489563,
+      "learning_rate": 6.071545765325254e-05,
+      "loss": 0.9439,
+      "step": 227
+    },
+    {
+      "epoch": 0.003030897205393136,
+      "grad_norm": 2.376014471054077,
+      "learning_rate": 6.0395584540887963e-05,
+      "loss": 0.9148,
+      "step": 228
+    },
+    {
+      "epoch": 0.00304419061418872,
+      "grad_norm": 2.1427810192108154,
+      "learning_rate": 6.007526611628086e-05,
+      "loss": 0.8044,
+      "step": 229
+    },
+    {
+      "epoch": 0.0030574840229843038,
+      "grad_norm": 2.6007277965545654,
+      "learning_rate": 5.9754516100806423e-05,
+      "loss": 0.8667,
+      "step": 230
+    },
+    {
+      "epoch": 0.0030707774317798877,
+      "grad_norm": 2.6047568321228027,
+      "learning_rate": 5.9433348234327765e-05,
+      "loss": 0.8761,
+      "step": 231
+    },
+    {
+      "epoch": 0.0030840708405754716,
+      "grad_norm": 2.6537301540374756,
+      "learning_rate": 5.911177627460739e-05,
+      "loss": 1.0008,
+      "step": 232
+    },
+    {
+      "epoch": 0.0030973642493710555,
+      "grad_norm": 2.5320510864257812,
+      "learning_rate": 5.8789813996717736e-05,
+      "loss": 0.9914,
+      "step": 233
+    },
+    {
+      "epoch": 0.0031106576581666394,
+      "grad_norm": 2.1913628578186035,
+      "learning_rate": 5.8467475192451226e-05,
+      "loss": 0.8323,
+      "step": 234
+    },
+    {
+      "epoch": 0.0031239510669622233,
+      "grad_norm": 2.716449737548828,
+      "learning_rate": 5.814477366972945e-05,
+      "loss": 0.885,
+      "step": 235
+    },
+    {
+      "epoch": 0.0031372444757578073,
+      "grad_norm": 3.720883846282959,
+      "learning_rate": 5.782172325201155e-05,
+      "loss": 1.0299,
+      "step": 236
+    },
+    {
+      "epoch": 0.003150537884553391,
+      "grad_norm": 2.6181509494781494,
+      "learning_rate": 5.749833777770225e-05,
+      "loss": 0.8591,
+      "step": 237
+    },
+    {
+      "epoch": 0.003163831293348975,
+      "grad_norm": 2.382664918899536,
+      "learning_rate": 5.717463109955896e-05,
+      "loss": 0.8851,
+      "step": 238
+    },
+    {
+      "epoch": 0.003177124702144559,
+      "grad_norm": 2.3294265270233154,
+      "learning_rate": 5.685061708409841e-05,
+      "loss": 0.7961,
+      "step": 239
+    },
+    {
+      "epoch": 0.003190418110940143,
+      "grad_norm": 3.5375571250915527,
+      "learning_rate": 5.6526309611002594e-05,
+      "loss": 0.9115,
+      "step": 240
+    },
+    {
+      "epoch": 0.003203711519735727,
+      "grad_norm": 2.9233639240264893,
+      "learning_rate": 5.6201722572524275e-05,
+      "loss": 0.7504,
+      "step": 241
+    },
+    {
+      "epoch": 0.003217004928531311,
+      "grad_norm": 3.2264790534973145,
+      "learning_rate": 5.587686987289189e-05,
+      "loss": 0.7414,
+      "step": 242
+    },
+    {
+      "epoch": 0.003230298337326895,
+      "grad_norm": 2.921107530593872,
+      "learning_rate": 5.5551765427713884e-05,
+      "loss": 0.5761,
+      "step": 243
+    },
+    {
+      "epoch": 0.003243591746122479,
+      "grad_norm": 3.3718862533569336,
+      "learning_rate": 5.522642316338268e-05,
+      "loss": 0.8256,
+      "step": 244
+    },
+    {
+      "epoch": 0.003256885154918063,
+      "grad_norm": 2.911158800125122,
+      "learning_rate": 5.490085701647805e-05,
+      "loss": 0.7299,
+      "step": 245
+    },
+    {
+      "epoch": 0.003270178563713647,
+      "grad_norm": 3.8199920654296875,
+      "learning_rate": 5.457508093317013e-05,
+      "loss": 0.7782,
+      "step": 246
+    },
+    {
+      "epoch": 0.0032834719725092307,
+      "grad_norm": 3.6611592769622803,
+      "learning_rate": 5.4249108868622086e-05,
+      "loss": 0.7346,
+      "step": 247
+    },
+    {
+      "epoch": 0.0032967653813048147,
+      "grad_norm": 3.3983826637268066,
+      "learning_rate": 5.392295478639225e-05,
+      "loss": 0.68,
+      "step": 248
+    },
+    {
+      "epoch": 0.0033100587901003986,
+      "grad_norm": 4.487311840057373,
+      "learning_rate": 5.359663265783598e-05,
+      "loss": 0.7677,
+      "step": 249
+    },
+    {
+      "epoch": 0.0033233521988959825,
+      "grad_norm": 5.948139667510986,
+      "learning_rate": 5.327015646150716e-05,
+      "loss": 0.7702,
+      "step": 250
+    },
+    {
+      "epoch": 0.0033366456076915664,
+      "grad_norm": 1.1044285297393799,
+      "learning_rate": 5.294354018255945e-05,
+      "loss": 0.9482,
+      "step": 251
+    },
+    {
+      "epoch": 0.0033499390164871503,
+      "grad_norm": 1.6621804237365723,
+      "learning_rate": 5.26167978121472e-05,
+      "loss": 0.8978,
+      "step": 252
+    },
+    {
+      "epoch": 0.0033632324252827342,
+      "grad_norm": 1.7893524169921875,
+      "learning_rate": 5.228994334682604e-05,
+      "loss": 0.9667,
+      "step": 253
+    },
+    {
+      "epoch": 0.003376525834078318,
+      "grad_norm": 1.6742503643035889,
+      "learning_rate": 5.196299078795344e-05,
+      "loss": 0.8508,
+      "step": 254
+    },
+    {
+      "epoch": 0.003389819242873902,
+      "grad_norm": 1.6336678266525269,
+      "learning_rate": 5.1635954141088813e-05,
+      "loss": 1.0183,
+      "step": 255
+    },
+    {
+      "epoch": 0.003403112651669486,
+      "grad_norm": 1.4634300470352173,
+      "learning_rate": 5.1308847415393666e-05,
+      "loss": 0.8325,
+      "step": 256
+    },
+    {
+      "epoch": 0.00341640606046507,
+      "grad_norm": 1.8752628564834595,
+      "learning_rate": 5.0981684623031415e-05,
+      "loss": 0.8682,
+      "step": 257
+    },
+    {
+      "epoch": 0.003429699469260654,
+      "grad_norm": 1.516921043395996,
+      "learning_rate": 5.0654479778567223e-05,
+      "loss": 0.87,
+      "step": 258
+    },
+    {
+      "epoch": 0.0034429928780562377,
+      "grad_norm": 1.953859567642212,
+      "learning_rate": 5.0327246898367597e-05,
+      "loss": 1.0406,
+      "step": 259
+    },
+    {
+      "epoch": 0.0034562862868518216,
+      "grad_norm": 1.692104697227478,
+      "learning_rate": 5e-05,
+      "loss": 1.0228,
+      "step": 260
+    },
+    {
+      "epoch": 0.0034695796956474055,
+      "grad_norm": 1.6310532093048096,
+      "learning_rate": 4.9672753101632415e-05,
+      "loss": 1.0342,
+      "step": 261
+    },
+    {
+      "epoch": 0.0034828731044429895,
+      "grad_norm": 1.8367486000061035,
+      "learning_rate": 4.934552022143279e-05,
+      "loss": 0.8968,
+      "step": 262
+    },
+    {
+      "epoch": 0.0034961665132385734,
+      "grad_norm": 1.6751683950424194,
+      "learning_rate": 4.901831537696859e-05,
+      "loss": 0.9189,
+      "step": 263
+    },
+    {
+      "epoch": 0.0035094599220341573,
+      "grad_norm": 1.8059558868408203,
+      "learning_rate": 4.869115258460635e-05,
+      "loss": 0.9239,
+      "step": 264
+    },
+    {
+      "epoch": 0.003522753330829741,
+      "grad_norm": 1.7002429962158203,
+      "learning_rate": 4.83640458589112e-05,
+      "loss": 0.8102,
+      "step": 265
+    },
+    {
+      "epoch": 0.003536046739625325,
+      "grad_norm": 1.738466501235962,
+      "learning_rate": 4.8037009212046586e-05,
+      "loss": 1.0022,
+      "step": 266
+    },
+    {
+      "epoch": 0.003549340148420909,
+      "grad_norm": 2.1397879123687744,
+      "learning_rate": 4.7710056653173976e-05,
+      "loss": 0.8882,
+      "step": 267
+    },
+    {
+      "epoch": 0.003562633557216493,
+      "grad_norm": 2.1561543941497803,
+      "learning_rate": 4.738320218785281e-05,
+      "loss": 0.9589,
+      "step": 268
+    },
+    {
+      "epoch": 0.003575926966012077,
+      "grad_norm": 2.116396427154541,
+      "learning_rate": 4.7056459817440544e-05,
+      "loss": 0.8827,
+      "step": 269
+    },
+    {
+      "epoch": 0.003589220374807661,
+      "grad_norm": 2.172356367111206,
+      "learning_rate": 4.6729843538492847e-05,
+      "loss": 0.9613,
+      "step": 270
+    },
+    {
+      "epoch": 0.003602513783603245,
+      "grad_norm": 1.7960728406906128,
+      "learning_rate": 4.640336734216403e-05,
+      "loss": 0.8781,
+      "step": 271
+    },
+    {
+      "epoch": 0.003615807192398829,
+      "grad_norm": 1.8511698246002197,
+      "learning_rate": 4.607704521360776e-05,
+      "loss": 0.8647,
+      "step": 272
+    },
+    {
+      "epoch": 0.003629100601194413,
+      "grad_norm": 2.0229597091674805,
+      "learning_rate": 4.575089113137792e-05,
+      "loss": 1.004,
+      "step": 273
+    },
+    {
+      "epoch": 0.003642394009989997,
+      "grad_norm": 2.3480939865112305,
+      "learning_rate": 4.542491906682989e-05,
+      "loss": 1.0119,
+      "step": 274
+    },
+    {
+      "epoch": 0.0036556874187855808,
+      "grad_norm": 1.9140769243240356,
+      "learning_rate": 4.509914298352197e-05,
+      "loss": 0.8089,
+      "step": 275
+    },
+    {
+      "epoch": 0.0036689808275811647,
+      "grad_norm": 1.9026778936386108,
+      "learning_rate": 4.477357683661734e-05,
+      "loss": 0.7569,
+      "step": 276
+    },
+    {
+      "epoch": 0.0036822742363767486,
+      "grad_norm": 2.2026915550231934,
+      "learning_rate": 4.444823457228612e-05,
+      "loss": 0.8855,
+      "step": 277
+    },
+    {
+      "epoch": 0.0036955676451723325,
+      "grad_norm": 2.2045536041259766,
+      "learning_rate": 4.412313012710813e-05,
+      "loss": 0.8306,
+      "step": 278
+    },
+    {
+      "epoch": 0.0037088610539679164,
+      "grad_norm": 2.3194525241851807,
+      "learning_rate": 4.379827742747575e-05,
+      "loss": 0.8154,
+      "step": 279
+    },
+    {
+      "epoch": 0.0037221544627635004,
+      "grad_norm": 2.6129322052001953,
+      "learning_rate": 4.347369038899744e-05,
+      "loss": 0.9187,
+      "step": 280
+    },
+    {
+      "epoch": 0.0037354478715590843,
+      "grad_norm": 2.2725565433502197,
+      "learning_rate": 4.3149382915901606e-05,
+      "loss": 0.793,
+      "step": 281
+    },
+    {
+      "epoch": 0.003748741280354668,
+      "grad_norm": 2.23557186126709,
+      "learning_rate": 4.282536890044104e-05,
+      "loss": 0.8578,
+      "step": 282
+    },
+    {
+      "epoch": 0.003762034689150252,
+      "grad_norm": 2.394350290298462,
+      "learning_rate": 4.250166222229774e-05,
+      "loss": 0.8555,
+      "step": 283
+    },
+    {
+      "epoch": 0.003775328097945836,
+      "grad_norm": 2.935340642929077,
+      "learning_rate": 4.2178276747988446e-05,
+      "loss": 0.852,
+      "step": 284
+    },
+    {
+      "epoch": 0.00378862150674142,
+      "grad_norm": 3.061005115509033,
+      "learning_rate": 4.185522633027057e-05,
+      "loss": 0.8782,
+      "step": 285
+    },
+    {
+      "epoch": 0.003801914915537004,
+      "grad_norm": 2.7278201580047607,
+      "learning_rate": 4.153252480754877e-05,
+      "loss": 0.8451,
+      "step": 286
+    },
+    {
+      "epoch": 0.0038152083243325878,
+      "grad_norm": 2.643934488296509,
+      "learning_rate": 4.1210186003282275e-05,
+      "loss": 0.7744,
+      "step": 287
+    },
+    {
+      "epoch": 0.0038285017331281717,
+      "grad_norm": 3.218041181564331,
+      "learning_rate": 4.088822372539263e-05,
+      "loss": 0.8261,
+      "step": 288
+    },
+    {
+      "epoch": 0.0038417951419237556,
+      "grad_norm": 2.650805950164795,
+      "learning_rate": 4.0566651765672246e-05,
+      "loss": 0.8542,
+      "step": 289
+    },
+    {
+      "epoch": 0.0038550885507193395,
+      "grad_norm": 2.679455041885376,
+      "learning_rate": 4.0245483899193595e-05,
+      "loss": 0.8374,
+      "step": 290
+    },
+    {
+      "epoch": 0.0038683819595149234,
+      "grad_norm": 3.0015034675598145,
+      "learning_rate": 3.992473388371915e-05,
+      "loss": 0.9829,
+      "step": 291
+    },
+    {
+      "epoch": 0.0038816753683105073,
+      "grad_norm": 3.549671173095703,
+      "learning_rate": 3.960441545911204e-05,
+      "loss": 0.8356,
+      "step": 292
+    },
+    {
+      "epoch": 0.0038949687771060912,
+      "grad_norm": 3.1017885208129883,
+      "learning_rate": 3.928454234674747e-05,
+      "loss": 0.8398,
+      "step": 293
+    },
+    {
+      "epoch": 0.003908262185901675,
+      "grad_norm": 3.0585556030273438,
+      "learning_rate": 3.896512824892495e-05,
+      "loss": 0.7647,
+      "step": 294
+    },
+    {
+      "epoch": 0.0039215555946972595,
+      "grad_norm": 3.385533571243286,
+      "learning_rate": 3.864618684828134e-05,
+      "loss": 0.6782,
+      "step": 295
+    },
+    {
+      "epoch": 0.003934849003492843,
+      "grad_norm": 5.169503211975098,
+      "learning_rate": 3.832773180720475e-05,
+      "loss": 0.7389,
+      "step": 296
+    },
+    {
+      "epoch": 0.003948142412288427,
+      "grad_norm": 2.9151158332824707,
+      "learning_rate": 3.800977676724919e-05,
+      "loss": 0.6878,
+      "step": 297
+    },
+    {
+      "epoch": 0.003961435821084011,
+      "grad_norm": 3.42378568649292,
+      "learning_rate": 3.769233534855035e-05,
+      "loss": 0.6473,
+      "step": 298
+    },
+    {
+      "epoch": 0.003974729229879595,
+      "grad_norm": 4.071504592895508,
+      "learning_rate": 3.73754211492421e-05,
+      "loss": 0.8104,
+      "step": 299
+    },
+    {
+      "epoch": 0.003988022638675179,
+      "grad_norm": 5.344113349914551,
+      "learning_rate": 3.705904774487396e-05,
+      "loss": 0.9578,
+      "step": 300
+    },
+    {
+      "epoch": 0.003988022638675179,
+      "eval_loss": 0.8697348237037659,
+      "eval_runtime": 6447.6628,
+      "eval_samples_per_second": 19.65,
+      "eval_steps_per_second": 4.912,
+      "step": 300
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.880597072801956e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null