Training in progress, step 300, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +713 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f5ab6b7d01e357faec8d24f5eeab288da927aeb0b4ae82502046b4bd35b750a
 size 161533192

 version https://git-lfs.github.com/spec/v1
+oid sha256:b2d83d5f901eae11145cacd89c72231a010785bff7439a1ba5f4a5e94b8052b4
 size 161533192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b3beddf3d49e219d1affc71de13c23d27956429a4cabfa1b9a37828e9f9664d0
-size 82460660

 version https://git-lfs.github.com/spec/v1
+oid sha256:1daa0c2c790a56b21b4c6c41ad6a3c0d997b0d94c85b459b9bd582601d994cca
+size 82461044

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:204817fe3f4bf35aa4941a46f59c51c1b43bbff8a2da0f4838279ae11c91a700
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:89f940dbe392540bac79082367808c07a597a4f60dff698b2c1ee1c27e1aabed
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:83ebe7e36e83c1b5914c2a6daae1c4f326cee5b8b90231fe486f937787ce3706
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fd8039035e9f22cc9e9230f66e4f1f1db2add3119ff8edfe450ef63eb16e5439
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.7198740243911743,
-  "best_model_checkpoint": "miner_id_24/checkpoint-200",
-  "epoch": 0.07969516599008791,
   "eval_steps": 100,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1431,6 +1431,714 @@
       "eval_samples_per_second": 14.017,
       "eval_steps_per_second": 3.505,
       "step": 200
     }
   ],
   "logging_steps": 1,
@@ -1459,7 +2167,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.592309546614784e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.700640082359314,
+  "best_model_checkpoint": "miner_id_24/checkpoint-300",
+  "epoch": 0.11954274898513187,
   "eval_steps": 100,
+  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 14.017,
       "eval_steps_per_second": 3.505,
       "step": 200
+    },
+    {
+      "epoch": 0.08009364182003835,
+      "grad_norm": 0.5120438933372498,
+      "learning_rate": 0.00011964121814957137,
+      "loss": 1.7626,
+      "step": 201
+    },
+    {
+      "epoch": 0.08049211764998879,
+      "grad_norm": 0.5081213712692261,
+      "learning_rate": 0.00011893581990651848,
+      "loss": 1.7664,
+      "step": 202
+    },
+    {
+      "epoch": 0.08089059347993924,
+      "grad_norm": 0.45303136110305786,
+      "learning_rate": 0.00011822944303213486,
+      "loss": 1.4845,
+      "step": 203
+    },
+    {
+      "epoch": 0.08128906930988967,
+      "grad_norm": 0.4521328806877136,
+      "learning_rate": 0.00011752212403302784,
+      "loss": 1.4534,
+      "step": 204
+    },
+    {
+      "epoch": 0.08168754513984011,
+      "grad_norm": 0.4870966970920563,
+      "learning_rate": 0.00011681389946449504,
+      "loss": 1.568,
+      "step": 205
+    },
+    {
+      "epoch": 0.08208602096979055,
+      "grad_norm": 0.5694287419319153,
+      "learning_rate": 0.00011610480592863531,
+      "loss": 1.9597,
+      "step": 206
+    },
+    {
+      "epoch": 0.08248449679974099,
+      "grad_norm": 0.5484179258346558,
+      "learning_rate": 0.00011539488007245702,
+      "loss": 1.8557,
+      "step": 207
+    },
+    {
+      "epoch": 0.08288297262969144,
+      "grad_norm": 0.44166100025177,
+      "learning_rate": 0.00011468415858598411,
+      "loss": 1.5371,
+      "step": 208
+    },
+    {
+      "epoch": 0.08328144845964187,
+      "grad_norm": 0.5365661978721619,
+      "learning_rate": 0.00011397267820035986,
+      "loss": 1.7778,
+      "step": 209
+    },
+    {
+      "epoch": 0.08367992428959231,
+      "grad_norm": 0.45911017060279846,
+      "learning_rate": 0.00011326047568594851,
+      "loss": 1.5729,
+      "step": 210
+    },
+    {
+      "epoch": 0.08407840011954275,
+      "grad_norm": 0.5417599678039551,
+      "learning_rate": 0.00011254758785043515,
+      "loss": 1.8296,
+      "step": 211
+    },
+    {
+      "epoch": 0.08447687594949319,
+      "grad_norm": 0.48594942688941956,
+      "learning_rate": 0.0001118340515369232,
+      "loss": 1.7837,
+      "step": 212
+    },
+    {
+      "epoch": 0.08487535177944362,
+      "grad_norm": 0.4888298511505127,
+      "learning_rate": 0.00011111990362203033,
+      "loss": 1.6575,
+      "step": 213
+    },
+    {
+      "epoch": 0.08527382760939407,
+      "grad_norm": 0.5313907265663147,
+      "learning_rate": 0.00011040518101398276,
+      "loss": 1.7803,
+      "step": 214
+    },
+    {
+      "epoch": 0.08567230343934451,
+      "grad_norm": 0.5065906643867493,
+      "learning_rate": 0.00010968992065070769,
+      "loss": 1.6539,
+      "step": 215
+    },
+    {
+      "epoch": 0.08607077926929495,
+      "grad_norm": 0.46294957399368286,
+      "learning_rate": 0.00010897415949792427,
+      "loss": 1.6412,
+      "step": 216
+    },
+    {
+      "epoch": 0.08646925509924538,
+      "grad_norm": 0.5068647861480713,
+      "learning_rate": 0.00010825793454723325,
+      "loss": 1.7173,
+      "step": 217
+    },
+    {
+      "epoch": 0.08686773092919582,
+      "grad_norm": 0.45219966769218445,
+      "learning_rate": 0.0001075412828142051,
+      "loss": 1.4531,
+      "step": 218
+    },
+    {
+      "epoch": 0.08726620675914626,
+      "grad_norm": 0.48035022616386414,
+      "learning_rate": 0.0001068242413364671,
+      "loss": 1.6187,
+      "step": 219
+    },
+    {
+      "epoch": 0.08766468258909671,
+      "grad_norm": 0.5463985204696655,
+      "learning_rate": 0.00010610684717178905,
+      "loss": 1.749,
+      "step": 220
+    },
+    {
+      "epoch": 0.08806315841904715,
+      "grad_norm": 0.4818764626979828,
+      "learning_rate": 0.00010538913739616816,
+      "loss": 1.4508,
+      "step": 221
+    },
+    {
+      "epoch": 0.08846163424899758,
+      "grad_norm": 0.5018213987350464,
+      "learning_rate": 0.00010467114910191289,
+      "loss": 1.6853,
+      "step": 222
+    },
+    {
+      "epoch": 0.08886011007894802,
+      "grad_norm": 0.5122075080871582,
+      "learning_rate": 0.00010395291939572593,
+      "loss": 1.6991,
+      "step": 223
+    },
+    {
+      "epoch": 0.08925858590889846,
+      "grad_norm": 0.48191148042678833,
+      "learning_rate": 0.00010323448539678653,
+      "loss": 1.6428,
+      "step": 224
+    },
+    {
+      "epoch": 0.08965706173884891,
+      "grad_norm": 0.4748276472091675,
+      "learning_rate": 0.00010251588423483205,
+      "loss": 1.7059,
+      "step": 225
+    },
+    {
+      "epoch": 0.09005553756879935,
+      "grad_norm": 0.5150067806243896,
+      "learning_rate": 0.0001017971530482392,
+      "loss": 1.7409,
+      "step": 226
+    },
+    {
+      "epoch": 0.09045401339874978,
+      "grad_norm": 0.5893855094909668,
+      "learning_rate": 0.00010107832898210439,
+      "loss": 1.7183,
+      "step": 227
+    },
+    {
+      "epoch": 0.09085248922870022,
+      "grad_norm": 0.5195055603981018,
+      "learning_rate": 0.00010035944918632429,
+      "loss": 1.8396,
+      "step": 228
+    },
+    {
+      "epoch": 0.09125096505865066,
+      "grad_norm": 0.5996953845024109,
+      "learning_rate": 9.96405508136757e-05,
+      "loss": 1.9944,
+      "step": 229
+    },
+    {
+      "epoch": 0.0916494408886011,
+      "grad_norm": 0.5057780146598816,
+      "learning_rate": 9.892167101789564e-05,
+      "loss": 1.6473,
+      "step": 230
+    },
+    {
+      "epoch": 0.09204791671855155,
+      "grad_norm": 0.46774283051490784,
+      "learning_rate": 9.820284695176082e-05,
+      "loss": 1.5973,
+      "step": 231
+    },
+    {
+      "epoch": 0.09244639254850198,
+      "grad_norm": 0.46982142329216003,
+      "learning_rate": 9.748411576516794e-05,
+      "loss": 1.6464,
+      "step": 232
+    },
+    {
+      "epoch": 0.09284486837845242,
+      "grad_norm": 0.4873621165752411,
+      "learning_rate": 9.676551460321349e-05,
+      "loss": 1.6629,
+      "step": 233
+    },
+    {
+      "epoch": 0.09324334420840286,
+      "grad_norm": 0.4866909682750702,
+      "learning_rate": 9.60470806042741e-05,
+      "loss": 1.6262,
+      "step": 234
+    },
+    {
+      "epoch": 0.0936418200383533,
+      "grad_norm": 0.5320809483528137,
+      "learning_rate": 9.532885089808713e-05,
+      "loss": 1.7158,
+      "step": 235
+    },
+    {
+      "epoch": 0.09404029586830374,
+      "grad_norm": 0.47346270084381104,
+      "learning_rate": 9.461086260383187e-05,
+      "loss": 1.6044,
+      "step": 236
+    },
+    {
+      "epoch": 0.09443877169825418,
+      "grad_norm": 0.5696609616279602,
+      "learning_rate": 9.389315282821097e-05,
+      "loss": 1.7883,
+      "step": 237
+    },
+    {
+      "epoch": 0.09483724752820462,
+      "grad_norm": 0.4926949441432953,
+      "learning_rate": 9.317575866353292e-05,
+      "loss": 1.7306,
+      "step": 238
+    },
+    {
+      "epoch": 0.09523572335815506,
+      "grad_norm": 0.5241943001747131,
+      "learning_rate": 9.245871718579491e-05,
+      "loss": 1.732,
+      "step": 239
+    },
+    {
+      "epoch": 0.09563419918810549,
+      "grad_norm": 0.5425236225128174,
+      "learning_rate": 9.174206545276677e-05,
+      "loss": 1.6209,
+      "step": 240
+    },
+    {
+      "epoch": 0.09603267501805593,
+      "grad_norm": 0.5216458439826965,
+      "learning_rate": 9.102584050207578e-05,
+      "loss": 1.74,
+      "step": 241
+    },
+    {
+      "epoch": 0.09643115084800638,
+      "grad_norm": 0.5082316994667053,
+      "learning_rate": 9.031007934929236e-05,
+      "loss": 1.6836,
+      "step": 242
+    },
+    {
+      "epoch": 0.09682962667795682,
+      "grad_norm": 0.48965132236480713,
+      "learning_rate": 8.959481898601728e-05,
+      "loss": 1.7055,
+      "step": 243
+    },
+    {
+      "epoch": 0.09722810250790725,
+      "grad_norm": 0.514946699142456,
+      "learning_rate": 8.888009637796968e-05,
+      "loss": 1.684,
+      "step": 244
+    },
+    {
+      "epoch": 0.09762657833785769,
+      "grad_norm": 0.551802396774292,
+      "learning_rate": 8.81659484630768e-05,
+      "loss": 1.8566,
+      "step": 245
+    },
+    {
+      "epoch": 0.09802505416780813,
+      "grad_norm": 0.4790934920310974,
+      "learning_rate": 8.745241214956483e-05,
+      "loss": 1.6461,
+      "step": 246
+    },
+    {
+      "epoch": 0.09842352999775858,
+      "grad_norm": 0.5450412631034851,
+      "learning_rate": 8.673952431405148e-05,
+      "loss": 1.7215,
+      "step": 247
+    },
+    {
+      "epoch": 0.09882200582770902,
+      "grad_norm": 0.5299497842788696,
+      "learning_rate": 8.602732179964017e-05,
+      "loss": 1.7454,
+      "step": 248
+    },
+    {
+      "epoch": 0.09922048165765945,
+      "grad_norm": 0.5010784268379211,
+      "learning_rate": 8.531584141401591e-05,
+      "loss": 1.6028,
+      "step": 249
+    },
+    {
+      "epoch": 0.09961895748760989,
+      "grad_norm": 0.4926188886165619,
+      "learning_rate": 8.4605119927543e-05,
+      "loss": 1.6837,
+      "step": 250
+    },
+    {
+      "epoch": 0.10001743331756033,
+      "grad_norm": 0.5703017115592957,
+      "learning_rate": 8.38951940713647e-05,
+      "loss": 1.8639,
+      "step": 251
+    },
+    {
+      "epoch": 0.10041590914751077,
+      "grad_norm": 0.5429261326789856,
+      "learning_rate": 8.318610053550497e-05,
+      "loss": 1.7258,
+      "step": 252
+    },
+    {
+      "epoch": 0.10081438497746122,
+      "grad_norm": 0.48338782787323,
+      "learning_rate": 8.247787596697218e-05,
+      "loss": 1.5873,
+      "step": 253
+    },
+    {
+      "epoch": 0.10121286080741165,
+      "grad_norm": 0.506877601146698,
+      "learning_rate": 8.177055696786516e-05,
+      "loss": 1.6736,
+      "step": 254
+    },
+    {
+      "epoch": 0.10161133663736209,
+      "grad_norm": 0.537820041179657,
+      "learning_rate": 8.106418009348157e-05,
+      "loss": 1.9075,
+      "step": 255
+    },
+    {
+      "epoch": 0.10200981246731253,
+      "grad_norm": 0.4729152023792267,
+      "learning_rate": 8.035878185042868e-05,
+      "loss": 1.5359,
+      "step": 256
+    },
+    {
+      "epoch": 0.10240828829726296,
+      "grad_norm": 0.4413747191429138,
+      "learning_rate": 7.965439869473664e-05,
+      "loss": 1.6245,
+      "step": 257
+    },
+    {
+      "epoch": 0.10280676412721342,
+      "grad_norm": 0.5398510694503784,
+      "learning_rate": 7.895106702997437e-05,
+      "loss": 1.6318,
+      "step": 258
+    },
+    {
+      "epoch": 0.10320523995716385,
+      "grad_norm": 0.5172785520553589,
+      "learning_rate": 7.824882320536814e-05,
+      "loss": 1.6601,
+      "step": 259
+    },
+    {
+      "epoch": 0.10360371578711429,
+      "grad_norm": 0.4824993908405304,
+      "learning_rate": 7.754770351392311e-05,
+      "loss": 1.5672,
+      "step": 260
+    },
+    {
+      "epoch": 0.10400219161706473,
+      "grad_norm": 0.4745709300041199,
+      "learning_rate": 7.684774419054747e-05,
+      "loss": 1.7128,
+      "step": 261
+    },
+    {
+      "epoch": 0.10440066744701516,
+      "grad_norm": 0.5071855783462524,
+      "learning_rate": 7.614898141017996e-05,
+      "loss": 1.7368,
+      "step": 262
+    },
+    {
+      "epoch": 0.1047991432769656,
+      "grad_norm": 0.5377690196037292,
+      "learning_rate": 7.54514512859201e-05,
+      "loss": 1.8659,
+      "step": 263
+    },
+    {
+      "epoch": 0.10519761910691605,
+      "grad_norm": 0.4762866199016571,
+      "learning_rate": 7.475518986716194e-05,
+      "loss": 1.6012,
+      "step": 264
+    },
+    {
+      "epoch": 0.10559609493686649,
+      "grad_norm": 0.46296924352645874,
+      "learning_rate": 7.406023313773097e-05,
+      "loss": 1.5484,
+      "step": 265
+    },
+    {
+      "epoch": 0.10599457076681693,
+      "grad_norm": 0.47845426201820374,
+      "learning_rate": 7.336661701402439e-05,
+      "loss": 1.6248,
+      "step": 266
+    },
+    {
+      "epoch": 0.10639304659676736,
+      "grad_norm": 0.48351001739501953,
+      "learning_rate": 7.267437734315492e-05,
+      "loss": 1.5549,
+      "step": 267
+    },
+    {
+      "epoch": 0.1067915224267178,
+      "grad_norm": 0.48554375767707825,
+      "learning_rate": 7.198354990109805e-05,
+      "loss": 1.5708,
+      "step": 268
+    },
+    {
+      "epoch": 0.10718999825666825,
+      "grad_norm": 0.47755077481269836,
+      "learning_rate": 7.129417039084333e-05,
+      "loss": 1.5864,
+      "step": 269
+    },
+    {
+      "epoch": 0.10758847408661869,
+      "grad_norm": 0.4970269799232483,
+      "learning_rate": 7.060627444054893e-05,
+      "loss": 1.6373,
+      "step": 270
+    },
+    {
+      "epoch": 0.10798694991656912,
+      "grad_norm": 0.47547978162765503,
+      "learning_rate": 6.99198976017005e-05,
+      "loss": 1.7433,
+      "step": 271
+    },
+    {
+      "epoch": 0.10838542574651956,
+      "grad_norm": 0.5408848524093628,
+      "learning_rate": 6.923507534727373e-05,
+      "loss": 1.77,
+      "step": 272
+    },
+    {
+      "epoch": 0.10878390157647,
+      "grad_norm": 0.49777430295944214,
+      "learning_rate": 6.855184306990106e-05,
+      "loss": 1.6071,
+      "step": 273
+    },
+    {
+      "epoch": 0.10918237740642044,
+      "grad_norm": 0.4691534638404846,
+      "learning_rate": 6.78702360800425e-05,
+      "loss": 1.5913,
+      "step": 274
+    },
+    {
+      "epoch": 0.10958085323637089,
+      "grad_norm": 0.5284269452095032,
+      "learning_rate": 6.719028960416098e-05,
+      "loss": 1.8038,
+      "step": 275
+    },
+    {
+      "epoch": 0.10997932906632132,
+      "grad_norm": 0.49061042070388794,
+      "learning_rate": 6.651203878290139e-05,
+      "loss": 1.5991,
+      "step": 276
+    },
+    {
+      "epoch": 0.11037780489627176,
+      "grad_norm": 0.5676330327987671,
+      "learning_rate": 6.583551866927475e-05,
+      "loss": 1.8924,
+      "step": 277
+    },
+    {
+      "epoch": 0.1107762807262222,
+      "grad_norm": 0.5392544865608215,
+      "learning_rate": 6.516076422684654e-05,
+      "loss": 1.7611,
+      "step": 278
+    },
+    {
+      "epoch": 0.11117475655617264,
+      "grad_norm": 0.5719506740570068,
+      "learning_rate": 6.448781032792972e-05,
+      "loss": 1.756,
+      "step": 279
+    },
+    {
+      "epoch": 0.11157323238612307,
+      "grad_norm": 0.4809233248233795,
+      "learning_rate": 6.381669175178248e-05,
+      "loss": 1.641,
+      "step": 280
+    },
+    {
+      "epoch": 0.11197170821607352,
+      "grad_norm": 0.48434188961982727,
+      "learning_rate": 6.31474431828108e-05,
+      "loss": 1.579,
+      "step": 281
+    },
+    {
+      "epoch": 0.11237018404602396,
+      "grad_norm": 0.5024405717849731,
+      "learning_rate": 6.248009920877592e-05,
+      "loss": 1.6653,
+      "step": 282
+    },
+    {
+      "epoch": 0.1127686598759744,
+      "grad_norm": 0.441279798746109,
+      "learning_rate": 6.181469431900672e-05,
+      "loss": 1.5105,
+      "step": 283
+    },
+    {
+      "epoch": 0.11316713570592483,
+      "grad_norm": 0.5233234763145447,
+      "learning_rate": 6.115126290261745e-05,
+      "loss": 1.7695,
+      "step": 284
+    },
+    {
+      "epoch": 0.11356561153587527,
+      "grad_norm": 0.5281261801719666,
+      "learning_rate": 6.048983924673022e-05,
+      "loss": 1.76,
+      "step": 285
+    },
+    {
+      "epoch": 0.11396408736582572,
+      "grad_norm": 0.534590482711792,
+      "learning_rate": 5.983045753470308e-05,
+      "loss": 1.8155,
+      "step": 286
+    },
+    {
+      "epoch": 0.11436256319577616,
+      "grad_norm": 0.5247072577476501,
+      "learning_rate": 5.917315184436345e-05,
+      "loss": 1.6073,
+      "step": 287
+    },
+    {
+      "epoch": 0.1147610390257266,
+      "grad_norm": 0.4829355776309967,
+      "learning_rate": 5.851795614624682e-05,
+      "loss": 1.5224,
+      "step": 288
+    },
+    {
+      "epoch": 0.11515951485567703,
+      "grad_norm": 0.516015887260437,
+      "learning_rate": 5.786490430184115e-05,
+      "loss": 1.6813,
+      "step": 289
+    },
+    {
+      "epoch": 0.11555799068562747,
+      "grad_norm": 0.48894891142845154,
+      "learning_rate": 5.72140300618369e-05,
+      "loss": 1.7965,
+      "step": 290
+    },
+    {
+      "epoch": 0.11595646651557791,
+      "grad_norm": 0.49149996042251587,
+      "learning_rate": 5.656536706438267e-05,
+      "loss": 1.6388,
+      "step": 291
+    },
+    {
+      "epoch": 0.11635494234552836,
+      "grad_norm": 0.4835774898529053,
+      "learning_rate": 5.591894883334667e-05,
+      "loss": 1.6856,
+      "step": 292
+    },
+    {
+      "epoch": 0.1167534181754788,
+      "grad_norm": 0.5278857946395874,
+      "learning_rate": 5.5274808776584367e-05,
+      "loss": 1.6883,
+      "step": 293
+    },
+    {
+      "epoch": 0.11715189400542923,
+      "grad_norm": 0.4995588958263397,
+      "learning_rate": 5.463298018421171e-05,
+      "loss": 1.519,
+      "step": 294
+    },
+    {
+      "epoch": 0.11755036983537967,
+      "grad_norm": 0.5236543416976929,
+      "learning_rate": 5.399349622688479e-05,
+      "loss": 1.7372,
+      "step": 295
+    },
+    {
+      "epoch": 0.11794884566533011,
+      "grad_norm": 0.45699524879455566,
+      "learning_rate": 5.335638995408545e-05,
+      "loss": 1.6082,
+      "step": 296
+    },
+    {
+      "epoch": 0.11834732149528056,
+      "grad_norm": 0.5191316604614258,
+      "learning_rate": 5.272169429241325e-05,
+      "loss": 1.7123,
+      "step": 297
+    },
+    {
+      "epoch": 0.118745797325231,
+      "grad_norm": 0.42880895733833313,
+      "learning_rate": 5.208944204388377e-05,
+      "loss": 1.4809,
+      "step": 298
+    },
+    {
+      "epoch": 0.11914427315518143,
+      "grad_norm": 0.5574065446853638,
+      "learning_rate": 5.145966588423341e-05,
+      "loss": 1.8128,
+      "step": 299
+    },
+    {
+      "epoch": 0.11954274898513187,
+      "grad_norm": 0.47847244143486023,
+      "learning_rate": 5.0832398361230596e-05,
+      "loss": 1.5699,
+      "step": 300
+    },
+    {
+      "epoch": 0.11954274898513187,
+      "eval_loss": 1.700640082359314,
+      "eval_runtime": 603.0633,
+      "eval_samples_per_second": 14.018,
+      "eval_steps_per_second": 3.505,
+      "step": 300
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 8.388464319922176e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null