Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ff6777d9d462812754ab4b67903ad12bc650727105fc9e11940d3b440fbda53c
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:708c798d1ac1eb1587970713ca88b8f251db861dbb29e101a10b4a6a97eb4e14
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:066847901b0ceb796db9602639891496c798163d3d355daba67dd6b06e6cf705
 size 341314196

 version https://git-lfs.github.com/spec/v1
+oid sha256:80695967c7294a869e72f8feea1e80b63efda1d546374ee9b82103037a892bbe
 size 341314196

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:62b0167b39b17fc77c599e8f75068d49be9c0c98d2af3e042c9b059b3175fb4d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:00d2508e3aa2a7256203a06140520161452c7874056d11c270d8a0f8a70c287f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:22c9dfa784729c93db12d225bfc25c64e7ae0e1e9f4be7b45dc255fae6ea42c4
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e90d1a9917ccbc9819e77d754e81c093aaa1bcc13b46bc6c7bb4bcae17159bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 2.761136770248413,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.04657661853749418,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 7.778,
       "eval_steps_per_second": 1.945,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 6.528928416006144e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 2.710070848464966,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.06210215804999224,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 7.778,
       "eval_steps_per_second": 1.945,
       "step": 150
+    },
+    {
+      "epoch": 0.04688712932774414,
+      "grad_norm": 8.627881050109863,
+      "learning_rate": 2.586684210526316e-05,
+      "loss": 5.0342,
+      "step": 151
+    },
+    {
+      "epoch": 0.0471976401179941,
+      "grad_norm": 3.0994157791137695,
+      "learning_rate": 2.5338947368421054e-05,
+      "loss": 5.6128,
+      "step": 152
+    },
+    {
+      "epoch": 0.04750815090824406,
+      "grad_norm": 7.781631946563721,
+      "learning_rate": 2.4811052631578945e-05,
+      "loss": 5.6382,
+      "step": 153
+    },
+    {
+      "epoch": 0.047818661698494025,
+      "grad_norm": 8.706255912780762,
+      "learning_rate": 2.428315789473684e-05,
+      "loss": 5.369,
+      "step": 154
+    },
+    {
+      "epoch": 0.04812917248874398,
+      "grad_norm": 10.600417137145996,
+      "learning_rate": 2.3755263157894736e-05,
+      "loss": 5.1998,
+      "step": 155
+    },
+    {
+      "epoch": 0.04843968327899394,
+      "grad_norm": 3.304457664489746,
+      "learning_rate": 2.322736842105263e-05,
+      "loss": 5.8636,
+      "step": 156
+    },
+    {
+      "epoch": 0.048750194069243906,
+      "grad_norm": 3.3973822593688965,
+      "learning_rate": 2.2699473684210526e-05,
+      "loss": 5.9097,
+      "step": 157
+    },
+    {
+      "epoch": 0.04906070485949387,
+      "grad_norm": 6.2585039138793945,
+      "learning_rate": 2.217157894736842e-05,
+      "loss": 6.0155,
+      "step": 158
+    },
+    {
+      "epoch": 0.04937121564974383,
+      "grad_norm": 4.313554286956787,
+      "learning_rate": 2.1643684210526316e-05,
+      "loss": 5.977,
+      "step": 159
+    },
+    {
+      "epoch": 0.04968172643999379,
+      "grad_norm": 6.310354232788086,
+      "learning_rate": 2.1115789473684208e-05,
+      "loss": 5.8565,
+      "step": 160
+    },
+    {
+      "epoch": 0.04999223723024375,
+      "grad_norm": 18.50254249572754,
+      "learning_rate": 2.0587894736842106e-05,
+      "loss": 5.9798,
+      "step": 161
+    },
+    {
+      "epoch": 0.05030274802049371,
+      "grad_norm": 11.727089881896973,
+      "learning_rate": 2.006e-05,
+      "loss": 6.3748,
+      "step": 162
+    },
+    {
+      "epoch": 0.050613258810743675,
+      "grad_norm": 4.172285556793213,
+      "learning_rate": 1.9532105263157893e-05,
+      "loss": 6.3266,
+      "step": 163
+    },
+    {
+      "epoch": 0.05092376960099364,
+      "grad_norm": 3.4230823516845703,
+      "learning_rate": 1.900421052631579e-05,
+      "loss": 5.8562,
+      "step": 164
+    },
+    {
+      "epoch": 0.05123428039124359,
+      "grad_norm": 3.503084421157837,
+      "learning_rate": 1.8476315789473683e-05,
+      "loss": 5.3421,
+      "step": 165
+    },
+    {
+      "epoch": 0.051544791181493556,
+      "grad_norm": 3.9270567893981934,
+      "learning_rate": 1.7948421052631578e-05,
+      "loss": 5.8137,
+      "step": 166
+    },
+    {
+      "epoch": 0.05185530197174352,
+      "grad_norm": 5.9435882568359375,
+      "learning_rate": 1.7420526315789473e-05,
+      "loss": 5.5091,
+      "step": 167
+    },
+    {
+      "epoch": 0.05216581276199348,
+      "grad_norm": 3.307124137878418,
+      "learning_rate": 1.6892631578947368e-05,
+      "loss": 6.1505,
+      "step": 168
+    },
+    {
+      "epoch": 0.052476323552243444,
+      "grad_norm": 3.08250093460083,
+      "learning_rate": 1.6364736842105263e-05,
+      "loss": 5.857,
+      "step": 169
+    },
+    {
+      "epoch": 0.0527868343424934,
+      "grad_norm": 3.2450950145721436,
+      "learning_rate": 1.5836842105263158e-05,
+      "loss": 5.7891,
+      "step": 170
+    },
+    {
+      "epoch": 0.05309734513274336,
+      "grad_norm": 3.96562123298645,
+      "learning_rate": 1.5308947368421053e-05,
+      "loss": 5.5198,
+      "step": 171
+    },
+    {
+      "epoch": 0.053407855922993325,
+      "grad_norm": 3.0473573207855225,
+      "learning_rate": 1.4781052631578945e-05,
+      "loss": 5.7508,
+      "step": 172
+    },
+    {
+      "epoch": 0.05371836671324329,
+      "grad_norm": 6.961787223815918,
+      "learning_rate": 1.4253157894736842e-05,
+      "loss": 5.6795,
+      "step": 173
+    },
+    {
+      "epoch": 0.05402887750349324,
+      "grad_norm": 3.1459827423095703,
+      "learning_rate": 1.3725263157894737e-05,
+      "loss": 5.4152,
+      "step": 174
+    },
+    {
+      "epoch": 0.054339388293743206,
+      "grad_norm": 4.134361267089844,
+      "learning_rate": 1.319736842105263e-05,
+      "loss": 5.8967,
+      "step": 175
+    },
+    {
+      "epoch": 0.05464989908399317,
+      "grad_norm": 8.840166091918945,
+      "learning_rate": 1.2669473684210527e-05,
+      "loss": 5.2314,
+      "step": 176
+    },
+    {
+      "epoch": 0.05496040987424313,
+      "grad_norm": 3.1707940101623535,
+      "learning_rate": 1.214157894736842e-05,
+      "loss": 5.8824,
+      "step": 177
+    },
+    {
+      "epoch": 0.055270920664493094,
+      "grad_norm": 2.967905044555664,
+      "learning_rate": 1.1613684210526315e-05,
+      "loss": 5.9145,
+      "step": 178
+    },
+    {
+      "epoch": 0.05558143145474305,
+      "grad_norm": 4.332707405090332,
+      "learning_rate": 1.108578947368421e-05,
+      "loss": 5.7882,
+      "step": 179
+    },
+    {
+      "epoch": 0.05589194224499301,
+      "grad_norm": 3.2459423542022705,
+      "learning_rate": 1.0557894736842104e-05,
+      "loss": 5.0901,
+      "step": 180
+    },
+    {
+      "epoch": 0.056202453035242975,
+      "grad_norm": 2.94150710105896,
+      "learning_rate": 1.003e-05,
+      "loss": 5.3414,
+      "step": 181
+    },
+    {
+      "epoch": 0.05651296382549294,
+      "grad_norm": 5.878596305847168,
+      "learning_rate": 9.502105263157896e-06,
+      "loss": 4.6183,
+      "step": 182
+    },
+    {
+      "epoch": 0.0568234746157429,
+      "grad_norm": 4.656161308288574,
+      "learning_rate": 8.974210526315789e-06,
+      "loss": 5.2575,
+      "step": 183
+    },
+    {
+      "epoch": 0.057133985405992856,
+      "grad_norm": 3.8339030742645264,
+      "learning_rate": 8.446315789473684e-06,
+      "loss": 5.2496,
+      "step": 184
+    },
+    {
+      "epoch": 0.05744449619624282,
+      "grad_norm": 4.35939884185791,
+      "learning_rate": 7.918421052631579e-06,
+      "loss": 5.0508,
+      "step": 185
+    },
+    {
+      "epoch": 0.05775500698649278,
+      "grad_norm": 4.872880935668945,
+      "learning_rate": 7.3905263157894725e-06,
+      "loss": 5.2661,
+      "step": 186
+    },
+    {
+      "epoch": 0.058065517776742744,
+      "grad_norm": 3.8898978233337402,
+      "learning_rate": 6.862631578947368e-06,
+      "loss": 5.0692,
+      "step": 187
+    },
+    {
+      "epoch": 0.058376028566992706,
+      "grad_norm": 9.882028579711914,
+      "learning_rate": 6.3347368421052634e-06,
+      "loss": 5.5958,
+      "step": 188
+    },
+    {
+      "epoch": 0.05868653935724266,
+      "grad_norm": 6.079442024230957,
+      "learning_rate": 5.806842105263158e-06,
+      "loss": 5.6492,
+      "step": 189
+    },
+    {
+      "epoch": 0.058997050147492625,
+      "grad_norm": 5.305960655212402,
+      "learning_rate": 5.278947368421052e-06,
+      "loss": 5.9404,
+      "step": 190
+    },
+    {
+      "epoch": 0.05930756093774259,
+      "grad_norm": 9.125515937805176,
+      "learning_rate": 4.751052631578948e-06,
+      "loss": 5.1715,
+      "step": 191
+    },
+    {
+      "epoch": 0.05961807172799255,
+      "grad_norm": 13.075247764587402,
+      "learning_rate": 4.223157894736842e-06,
+      "loss": 5.723,
+      "step": 192
+    },
+    {
+      "epoch": 0.059928582518242506,
+      "grad_norm": 11.328529357910156,
+      "learning_rate": 3.6952631578947362e-06,
+      "loss": 5.354,
+      "step": 193
+    },
+    {
+      "epoch": 0.06023909330849247,
+      "grad_norm": 7.932187080383301,
+      "learning_rate": 3.1673684210526317e-06,
+      "loss": 4.9868,
+      "step": 194
+    },
+    {
+      "epoch": 0.06054960409874243,
+      "grad_norm": 4.846871852874756,
+      "learning_rate": 2.639473684210526e-06,
+      "loss": 5.9433,
+      "step": 195
+    },
+    {
+      "epoch": 0.06086011488899239,
+      "grad_norm": 7.914638519287109,
+      "learning_rate": 2.111578947368421e-06,
+      "loss": 5.3406,
+      "step": 196
+    },
+    {
+      "epoch": 0.061170625679242356,
+      "grad_norm": 11.115379333496094,
+      "learning_rate": 1.5836842105263159e-06,
+      "loss": 4.7257,
+      "step": 197
+    },
+    {
+      "epoch": 0.06148113646949231,
+      "grad_norm": 12.03275203704834,
+      "learning_rate": 1.0557894736842105e-06,
+      "loss": 3.4278,
+      "step": 198
+    },
+    {
+      "epoch": 0.061791647259742274,
+      "grad_norm": 8.894533157348633,
+      "learning_rate": 5.278947368421053e-07,
+      "loss": 3.2262,
+      "step": 199
+    },
+    {
+      "epoch": 0.06210215804999224,
+      "grad_norm": 5.871228218078613,
+      "learning_rate": 0.0,
+      "loss": 4.6102,
+      "step": 200
+    },
+    {
+      "epoch": 0.06210215804999224,
+      "eval_loss": 2.710070848464966,
+      "eval_runtime": 174.2701,
+      "eval_samples_per_second": 7.781,
+      "eval_steps_per_second": 1.945,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 8.78275028016169e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null