Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c5291ec521c4ffa0f6091cddc747315cbf4c9a929679ec85bd1e336e78285313
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:7993d945186d48109808dd9e39074172419bdd0ce292e09bc25998ffa17c76eb
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:97abbfb5bdbe157285163dc12c6ab1fb09f57b6499d8424fee0d83db4b7d10fb
 size 341314196

 version https://git-lfs.github.com/spec/v1
+oid sha256:409fd39598fff084809512962341d0d9ce9406426b93a8dc3f2ae3a406c9b46e
 size 341314196

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb3272b647dedb061d06dda8a662cd98ad3318b7d10b09d9a8cd15bb2de3fbb6
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:8dc9b9d51b2c8f62b25764e8e867e5bdc935ba8dea15d53055f09a65c0b97475
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d606eeb1aa97b417de3c30d0a970be83ac979e2c7cc0fa41135c63d459909e5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ba4af3b1b4fa156d60adeec70df709d1741ac2f3147c676ab2805007313fc707
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.42113959789276123,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.011503949689393358,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 8.917,
       "eval_steps_per_second": 2.23,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.563011021196493e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.41263335943222046,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.015338599585857812,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 8.917,
       "eval_steps_per_second": 2.23,
       "step": 150
+    },
+    {
+      "epoch": 0.011580642687322647,
+      "grad_norm": 1.7774934768676758,
+      "learning_rate": 2.6047368421052634e-05,
+      "loss": 0.6266,
+      "step": 151
+    },
+    {
+      "epoch": 0.011657335685251936,
+      "grad_norm": 1.7610536813735962,
+      "learning_rate": 2.5515789473684213e-05,
+      "loss": 0.6174,
+      "step": 152
+    },
+    {
+      "epoch": 0.011734028683181226,
+      "grad_norm": 1.7590079307556152,
+      "learning_rate": 2.4984210526315788e-05,
+      "loss": 0.6718,
+      "step": 153
+    },
+    {
+      "epoch": 0.011810721681110515,
+      "grad_norm": 1.7000463008880615,
+      "learning_rate": 2.445263157894737e-05,
+      "loss": 0.6889,
+      "step": 154
+    },
+    {
+      "epoch": 0.011887414679039804,
+      "grad_norm": 1.9639273881912231,
+      "learning_rate": 2.3921052631578946e-05,
+      "loss": 0.6887,
+      "step": 155
+    },
+    {
+      "epoch": 0.011964107676969093,
+      "grad_norm": 1.995015263557434,
+      "learning_rate": 2.3389473684210528e-05,
+      "loss": 0.79,
+      "step": 156
+    },
+    {
+      "epoch": 0.012040800674898381,
+      "grad_norm": 2.2741172313690186,
+      "learning_rate": 2.2857894736842106e-05,
+      "loss": 0.9187,
+      "step": 157
+    },
+    {
+      "epoch": 0.01211749367282767,
+      "grad_norm": 2.2824699878692627,
+      "learning_rate": 2.2326315789473685e-05,
+      "loss": 0.8648,
+      "step": 158
+    },
+    {
+      "epoch": 0.01219418667075696,
+      "grad_norm": 1.9985893964767456,
+      "learning_rate": 2.1794736842105264e-05,
+      "loss": 0.8466,
+      "step": 159
+    },
+    {
+      "epoch": 0.012270879668686249,
+      "grad_norm": 2.1604766845703125,
+      "learning_rate": 2.1263157894736842e-05,
+      "loss": 0.9342,
+      "step": 160
+    },
+    {
+      "epoch": 0.012347572666615538,
+      "grad_norm": 2.151719570159912,
+      "learning_rate": 2.073157894736842e-05,
+      "loss": 0.9231,
+      "step": 161
+    },
+    {
+      "epoch": 0.012424265664544827,
+      "grad_norm": 2.173888921737671,
+      "learning_rate": 2.0200000000000003e-05,
+      "loss": 0.8891,
+      "step": 162
+    },
+    {
+      "epoch": 0.012500958662474117,
+      "grad_norm": 2.26214337348938,
+      "learning_rate": 1.966842105263158e-05,
+      "loss": 0.8486,
+      "step": 163
+    },
+    {
+      "epoch": 0.012577651660403406,
+      "grad_norm": 2.2833595275878906,
+      "learning_rate": 1.913684210526316e-05,
+      "loss": 0.8788,
+      "step": 164
+    },
+    {
+      "epoch": 0.012654344658332694,
+      "grad_norm": 2.260359048843384,
+      "learning_rate": 1.8605263157894736e-05,
+      "loss": 0.8988,
+      "step": 165
+    },
+    {
+      "epoch": 0.012731037656261983,
+      "grad_norm": 2.4860446453094482,
+      "learning_rate": 1.8073684210526318e-05,
+      "loss": 1.0125,
+      "step": 166
+    },
+    {
+      "epoch": 0.012807730654191272,
+      "grad_norm": 2.1822967529296875,
+      "learning_rate": 1.7542105263157897e-05,
+      "loss": 0.8164,
+      "step": 167
+    },
+    {
+      "epoch": 0.012884423652120561,
+      "grad_norm": 2.27885103225708,
+      "learning_rate": 1.7010526315789475e-05,
+      "loss": 0.8992,
+      "step": 168
+    },
+    {
+      "epoch": 0.01296111665004985,
+      "grad_norm": 2.3960371017456055,
+      "learning_rate": 1.6478947368421054e-05,
+      "loss": 0.9645,
+      "step": 169
+    },
+    {
+      "epoch": 0.01303780964797914,
+      "grad_norm": 2.3640167713165283,
+      "learning_rate": 1.5947368421052633e-05,
+      "loss": 0.9532,
+      "step": 170
+    },
+    {
+      "epoch": 0.01311450264590843,
+      "grad_norm": 2.4090967178344727,
+      "learning_rate": 1.541578947368421e-05,
+      "loss": 0.96,
+      "step": 171
+    },
+    {
+      "epoch": 0.013191195643837717,
+      "grad_norm": 2.4853227138519287,
+      "learning_rate": 1.4884210526315788e-05,
+      "loss": 0.8816,
+      "step": 172
+    },
+    {
+      "epoch": 0.013267888641767006,
+      "grad_norm": 2.5192534923553467,
+      "learning_rate": 1.4352631578947369e-05,
+      "loss": 1.1604,
+      "step": 173
+    },
+    {
+      "epoch": 0.013344581639696295,
+      "grad_norm": 2.189826726913452,
+      "learning_rate": 1.3821052631578949e-05,
+      "loss": 0.8103,
+      "step": 174
+    },
+    {
+      "epoch": 0.013421274637625585,
+      "grad_norm": 2.375955104827881,
+      "learning_rate": 1.3289473684210526e-05,
+      "loss": 0.8048,
+      "step": 175
+    },
+    {
+      "epoch": 0.013497967635554874,
+      "grad_norm": 2.3395192623138428,
+      "learning_rate": 1.2757894736842106e-05,
+      "loss": 0.729,
+      "step": 176
+    },
+    {
+      "epoch": 0.013574660633484163,
+      "grad_norm": 2.595853567123413,
+      "learning_rate": 1.2226315789473685e-05,
+      "loss": 0.9764,
+      "step": 177
+    },
+    {
+      "epoch": 0.013651353631413453,
+      "grad_norm": 2.513397693634033,
+      "learning_rate": 1.1694736842105264e-05,
+      "loss": 0.9254,
+      "step": 178
+    },
+    {
+      "epoch": 0.013728046629342742,
+      "grad_norm": 2.5810961723327637,
+      "learning_rate": 1.1163157894736842e-05,
+      "loss": 0.9575,
+      "step": 179
+    },
+    {
+      "epoch": 0.01380473962727203,
+      "grad_norm": 2.5363872051239014,
+      "learning_rate": 1.0631578947368421e-05,
+      "loss": 0.9717,
+      "step": 180
+    },
+    {
+      "epoch": 0.013881432625201319,
+      "grad_norm": 2.658205032348633,
+      "learning_rate": 1.0100000000000002e-05,
+      "loss": 1.0027,
+      "step": 181
+    },
+    {
+      "epoch": 0.013958125623130608,
+      "grad_norm": 2.4423038959503174,
+      "learning_rate": 9.56842105263158e-06,
+      "loss": 0.8482,
+      "step": 182
+    },
+    {
+      "epoch": 0.014034818621059897,
+      "grad_norm": 2.4603989124298096,
+      "learning_rate": 9.036842105263159e-06,
+      "loss": 0.8098,
+      "step": 183
+    },
+    {
+      "epoch": 0.014111511618989186,
+      "grad_norm": 2.3027265071868896,
+      "learning_rate": 8.505263157894738e-06,
+      "loss": 0.684,
+      "step": 184
+    },
+    {
+      "epoch": 0.014188204616918476,
+      "grad_norm": 2.7949798107147217,
+      "learning_rate": 7.973684210526316e-06,
+      "loss": 0.824,
+      "step": 185
+    },
+    {
+      "epoch": 0.014264897614847765,
+      "grad_norm": 2.814291477203369,
+      "learning_rate": 7.442105263157894e-06,
+      "loss": 0.8379,
+      "step": 186
+    },
+    {
+      "epoch": 0.014341590612777053,
+      "grad_norm": 2.4458296298980713,
+      "learning_rate": 6.9105263157894745e-06,
+      "loss": 0.6741,
+      "step": 187
+    },
+    {
+      "epoch": 0.014418283610706342,
+      "grad_norm": 3.0586514472961426,
+      "learning_rate": 6.378947368421053e-06,
+      "loss": 0.9712,
+      "step": 188
+    },
+    {
+      "epoch": 0.014494976608635631,
+      "grad_norm": 2.7947068214416504,
+      "learning_rate": 5.847368421052632e-06,
+      "loss": 0.8372,
+      "step": 189
+    },
+    {
+      "epoch": 0.01457166960656492,
+      "grad_norm": 2.613374948501587,
+      "learning_rate": 5.315789473684211e-06,
+      "loss": 0.6013,
+      "step": 190
+    },
+    {
+      "epoch": 0.01464836260449421,
+      "grad_norm": 2.9723777770996094,
+      "learning_rate": 4.78421052631579e-06,
+      "loss": 0.6484,
+      "step": 191
+    },
+    {
+      "epoch": 0.014725055602423499,
+      "grad_norm": 2.829347848892212,
+      "learning_rate": 4.252631578947369e-06,
+      "loss": 0.7399,
+      "step": 192
+    },
+    {
+      "epoch": 0.014801748600352788,
+      "grad_norm": 2.903120517730713,
+      "learning_rate": 3.721052631578947e-06,
+      "loss": 0.5897,
+      "step": 193
+    },
+    {
+      "epoch": 0.014878441598282078,
+      "grad_norm": 3.4690754413604736,
+      "learning_rate": 3.1894736842105266e-06,
+      "loss": 0.8253,
+      "step": 194
+    },
+    {
+      "epoch": 0.014955134596211365,
+      "grad_norm": 3.187955617904663,
+      "learning_rate": 2.6578947368421053e-06,
+      "loss": 0.779,
+      "step": 195
+    },
+    {
+      "epoch": 0.015031827594140654,
+      "grad_norm": 3.7387547492980957,
+      "learning_rate": 2.1263157894736844e-06,
+      "loss": 0.526,
+      "step": 196
+    },
+    {
+      "epoch": 0.015108520592069944,
+      "grad_norm": 4.0911335945129395,
+      "learning_rate": 1.5947368421052633e-06,
+      "loss": 0.7947,
+      "step": 197
+    },
+    {
+      "epoch": 0.015185213589999233,
+      "grad_norm": 4.285370349884033,
+      "learning_rate": 1.0631578947368422e-06,
+      "loss": 0.7217,
+      "step": 198
+    },
+    {
+      "epoch": 0.015261906587928522,
+      "grad_norm": 4.095808029174805,
+      "learning_rate": 5.315789473684211e-07,
+      "loss": 0.5874,
+      "step": 199
+    },
+    {
+      "epoch": 0.015338599585857812,
+      "grad_norm": 7.362918376922607,
+      "learning_rate": 0.0,
+      "loss": 0.933,
+      "step": 200
+    },
+    {
+      "epoch": 0.015338599585857812,
+      "eval_loss": 0.41263335943222046,
+      "eval_runtime": 614.809,
+      "eval_samples_per_second": 8.93,
+      "eval_steps_per_second": 2.233,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7.423310526676992e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null