Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ef81cbdb59a8c470cace88c899af8d7871756273d5160a82eb917c9c68b9a0d3
 size 34456

 version https://git-lfs.github.com/spec/v1
+oid sha256:7e4d0f54b1f3106cbd3519766da7a08e6cdcc123bdba88033376082f33b67ad6
 size 34456

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b8446a3caf87bb3a830d8e42add8ab24dd1984f45869b97b4b97df96bf4a6c3b
 size 73222

 version https://git-lfs.github.com/spec/v1
+oid sha256:4f942e934133617a13c489b8cb2108b0e45657e35673be933bc69e9df09db2b9
 size 73222

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c4eec9317659153fe5e4a7695448cf380647f00608ba3fad024c38f12a8ca027
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1c9592af05cb226b7004e5a32aa07d54ff305e99ba624e24daed0944207570d6
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8ce05761f46e7cf72fb17a02e3a0ca15c9d25ce3babf590eeb40568923b8bac
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 11.5,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.01693910392140256,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 220.467,
       "eval_steps_per_second": 55.121,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1103,7 +1461,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 24118847078400.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 11.5,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.02258547189520341,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 220.467,
       "eval_steps_per_second": 55.121,
       "step": 150
+    },
+    {
+      "epoch": 0.017052031280878576,
+      "grad_norm": 0.00022687959426548332,
+      "learning_rate": 1.553232954407171e-05,
+      "loss": 46.0,
+      "step": 151
+    },
+    {
+      "epoch": 0.017164958640354593,
+      "grad_norm": 0.00025120441569015384,
+      "learning_rate": 1.4938160786375572e-05,
+      "loss": 46.0,
+      "step": 152
+    },
+    {
+      "epoch": 0.01727788599983061,
+      "grad_norm": 0.0002398672659182921,
+      "learning_rate": 1.435357758543015e-05,
+      "loss": 46.0,
+      "step": 153
+    },
+    {
+      "epoch": 0.017390813359306628,
+      "grad_norm": 0.00023704004706814885,
+      "learning_rate": 1.3778739760445552e-05,
+      "loss": 46.0,
+      "step": 154
+    },
+    {
+      "epoch": 0.017503740718782645,
+      "grad_norm": 0.00026754479040391743,
+      "learning_rate": 1.3213804466343421e-05,
+      "loss": 46.0,
+      "step": 155
+    },
+    {
+      "epoch": 0.01761666807825866,
+      "grad_norm": 0.0002529373741708696,
+      "learning_rate": 1.2658926150792322e-05,
+      "loss": 46.0,
+      "step": 156
+    },
+    {
+      "epoch": 0.017729595437734676,
+      "grad_norm": 0.00032713040127418935,
+      "learning_rate": 1.2114256511983274e-05,
+      "loss": 46.0,
+      "step": 157
+    },
+    {
+      "epoch": 0.017842522797210693,
+      "grad_norm": 0.00031030309037305415,
+      "learning_rate": 1.157994445715706e-05,
+      "loss": 46.0,
+      "step": 158
+    },
+    {
+      "epoch": 0.01795545015668671,
+      "grad_norm": 0.00032333206036128104,
+      "learning_rate": 1.1056136061894384e-05,
+      "loss": 46.0,
+      "step": 159
+    },
+    {
+      "epoch": 0.018068377516162727,
+      "grad_norm": 0.00028134314925409853,
+      "learning_rate": 1.0542974530180327e-05,
+      "loss": 46.0,
+      "step": 160
+    },
+    {
+      "epoch": 0.018181304875638744,
+      "grad_norm": 0.000390295113902539,
+      "learning_rate": 1.0040600155253765e-05,
+      "loss": 46.0,
+      "step": 161
+    },
+    {
+      "epoch": 0.01829423223511476,
+      "grad_norm": 0.0002561201108619571,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 46.0,
+      "step": 162
+    },
+    {
+      "epoch": 0.01840715959459078,
+      "grad_norm": 0.000311757146846503,
+      "learning_rate": 9.068759265665384e-06,
+      "loss": 46.0,
+      "step": 163
+    },
+    {
+      "epoch": 0.018520086954066796,
+      "grad_norm": 0.00025277171516790986,
+      "learning_rate": 8.599558442598998e-06,
+      "loss": 46.0,
+      "step": 164
+    },
+    {
+      "epoch": 0.018633014313542813,
+      "grad_norm": 0.0003587704268284142,
+      "learning_rate": 8.141676086873572e-06,
+      "loss": 46.0,
+      "step": 165
+    },
+    {
+      "epoch": 0.01874594167301883,
+      "grad_norm": 0.00032000569626688957,
+      "learning_rate": 7.695237378953223e-06,
+      "loss": 46.0,
+      "step": 166
+    },
+    {
+      "epoch": 0.018858869032494847,
+      "grad_norm": 0.0003866164479404688,
+      "learning_rate": 7.260364370723044e-06,
+      "loss": 46.0,
+      "step": 167
+    },
+    {
+      "epoch": 0.018971796391970865,
+      "grad_norm": 0.00030903815058991313,
+      "learning_rate": 6.837175952121306e-06,
+      "loss": 46.0,
+      "step": 168
+    },
+    {
+      "epoch": 0.019084723751446882,
+      "grad_norm": 0.00035700431908480823,
+      "learning_rate": 6.425787818636131e-06,
+      "loss": 46.0,
+      "step": 169
+    },
+    {
+      "epoch": 0.0191976511109229,
+      "grad_norm": 0.00036900988197885454,
+      "learning_rate": 6.026312439675552e-06,
+      "loss": 46.0,
+      "step": 170
+    },
+    {
+      "epoch": 0.019310578470398916,
+      "grad_norm": 0.0003007377963513136,
+      "learning_rate": 5.6388590278194096e-06,
+      "loss": 46.0,
+      "step": 171
+    },
+    {
+      "epoch": 0.019423505829874933,
+      "grad_norm": 0.0003097676963079721,
+      "learning_rate": 5.263533508961827e-06,
+      "loss": 46.0,
+      "step": 172
+    },
+    {
+      "epoch": 0.01953643318935095,
+      "grad_norm": 0.00035511297755874693,
+      "learning_rate": 4.900438493352055e-06,
+      "loss": 46.0,
+      "step": 173
+    },
+    {
+      "epoch": 0.019649360548826968,
+      "grad_norm": 0.0003163871879223734,
+      "learning_rate": 4.549673247541875e-06,
+      "loss": 46.0,
+      "step": 174
+    },
+    {
+      "epoch": 0.019762287908302985,
+      "grad_norm": 0.00040754114161245525,
+      "learning_rate": 4.2113336672471245e-06,
+      "loss": 46.0,
+      "step": 175
+    },
+    {
+      "epoch": 0.019875215267779002,
+      "grad_norm": 0.00029164121951907873,
+      "learning_rate": 3.885512251130763e-06,
+      "loss": 46.0,
+      "step": 176
+    },
+    {
+      "epoch": 0.01998814262725502,
+      "grad_norm": 0.0003299713716842234,
+      "learning_rate": 3.5722980755146517e-06,
+      "loss": 46.0,
+      "step": 177
+    },
+    {
+      "epoch": 0.020101069986731036,
+      "grad_norm": 0.0004000996705144644,
+      "learning_rate": 3.271776770026963e-06,
+      "loss": 46.0,
+      "step": 178
+    },
+    {
+      "epoch": 0.020213997346207054,
+      "grad_norm": 0.0002446115540806204,
+      "learning_rate": 2.9840304941919415e-06,
+      "loss": 46.0,
+      "step": 179
+    },
+    {
+      "epoch": 0.02032692470568307,
+      "grad_norm": 0.0003760512627195567,
+      "learning_rate": 2.7091379149682685e-06,
+      "loss": 46.0,
+      "step": 180
+    },
+    {
+      "epoch": 0.020439852065159088,
+      "grad_norm": 0.0003339052200317383,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 46.0,
+      "step": 181
+    },
+    {
+      "epoch": 0.020552779424635105,
+      "grad_norm": 0.00032041073427535594,
+      "learning_rate": 2.1982109232821178e-06,
+      "loss": 46.0,
+      "step": 182
+    },
+    {
+      "epoch": 0.020665706784111122,
+      "grad_norm": 0.00033732884912751615,
+      "learning_rate": 1.962316193157593e-06,
+      "loss": 46.0,
+      "step": 183
+    },
+    {
+      "epoch": 0.020778634143587136,
+      "grad_norm": 0.0002904120774473995,
+      "learning_rate": 1.7395544861325718e-06,
+      "loss": 46.0,
+      "step": 184
+    },
+    {
+      "epoch": 0.020891561503063153,
+      "grad_norm": 0.0002944996813312173,
+      "learning_rate": 1.5299867030334814e-06,
+      "loss": 46.0,
+      "step": 185
+    },
+    {
+      "epoch": 0.02100448886253917,
+      "grad_norm": 0.0003913106338586658,
+      "learning_rate": 1.333670137599713e-06,
+      "loss": 46.0,
+      "step": 186
+    },
+    {
+      "epoch": 0.021117416222015187,
+      "grad_norm": 0.00030264805536717176,
+      "learning_rate": 1.1506584608200367e-06,
+      "loss": 46.0,
+      "step": 187
+    },
+    {
+      "epoch": 0.021230343581491205,
+      "grad_norm": 0.0003101128386333585,
+      "learning_rate": 9.810017062595322e-07,
+      "loss": 46.0,
+      "step": 188
+    },
+    {
+      "epoch": 0.021343270940967222,
+      "grad_norm": 0.00043780505075119436,
+      "learning_rate": 8.247462563808817e-07,
+      "loss": 46.0,
+      "step": 189
+    },
+    {
+      "epoch": 0.02145619830044324,
+      "grad_norm": 0.0003100104513578117,
+      "learning_rate": 6.819348298638839e-07,
+      "loss": 46.0,
+      "step": 190
+    },
+    {
+      "epoch": 0.021569125659919256,
+      "grad_norm": 0.0003175575693603605,
+      "learning_rate": 5.526064699265753e-07,
+      "loss": 46.0,
+      "step": 191
+    },
+    {
+      "epoch": 0.021682053019395273,
+      "grad_norm": 0.00034154922468587756,
+      "learning_rate": 4.367965336512403e-07,
+      "loss": 46.0,
+      "step": 192
+    },
+    {
+      "epoch": 0.02179498037887129,
+      "grad_norm": 0.00029154884396120906,
+      "learning_rate": 3.3453668231809286e-07,
+      "loss": 46.0,
+      "step": 193
+    },
+    {
+      "epoch": 0.021907907738347308,
+      "grad_norm": 0.0002526903699617833,
+      "learning_rate": 2.458548727494292e-07,
+      "loss": 46.0,
+      "step": 194
+    },
+    {
+      "epoch": 0.022020835097823325,
+      "grad_norm": 0.00047206023009493947,
+      "learning_rate": 1.7077534966650766e-07,
+      "loss": 46.0,
+      "step": 195
+    },
+    {
+      "epoch": 0.022133762457299342,
+      "grad_norm": 0.0002853488549590111,
+      "learning_rate": 1.0931863906127327e-07,
+      "loss": 46.0,
+      "step": 196
+    },
+    {
+      "epoch": 0.02224668981677536,
+      "grad_norm": 0.0003255183401051909,
+      "learning_rate": 6.150154258476315e-08,
+      "loss": 46.0,
+      "step": 197
+    },
+    {
+      "epoch": 0.022359617176251376,
+      "grad_norm": 0.00031697956728748977,
+      "learning_rate": 2.7337132953697554e-08,
+      "loss": 46.0,
+      "step": 198
+    },
+    {
+      "epoch": 0.022472544535727394,
+      "grad_norm": 0.00035115343052893877,
+      "learning_rate": 6.834750376549792e-09,
+      "loss": 46.0,
+      "step": 199
+    },
+    {
+      "epoch": 0.02258547189520341,
+      "grad_norm": 0.00033498730044811964,
+      "learning_rate": 0.0,
+      "loss": 46.0,
+      "step": 200
+    },
+    {
+      "epoch": 0.02258547189520341,
+      "eval_loss": 11.5,
+      "eval_runtime": 67.5521,
+      "eval_samples_per_second": 220.792,
+      "eval_steps_per_second": 55.202,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 32158462771200.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null