Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6f7d6a320a3e50511491e410ce1b92ab6609fc6b63f4899678a1920b4ebaabbe
 size 891333320

 version https://git-lfs.github.com/spec/v1
+oid sha256:6818522492ff9270a93b640c773c153a0553453704366c769c67f21e1cbe2980
 size 891333320

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ca96afc210261a977d9a57b5d9126920814459272c707cf06210a6ee8e176cd6
 size 452984596

 version https://git-lfs.github.com/spec/v1
+oid sha256:6e9c802edc0f13b2cb08cb17ac67d11bd8df02cd6fb84b5681a704793c6c3847
 size 452984596

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:57645f2c6130b0019a3d70e4f7a375692f5e849e44c5f7d42df1b8670004659b
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:151ba8290837d1789e85c8f0409fc783ab80ce1480475b46c68232fab884254c
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7431a5f5e7b667e1ad74dfbaf504ae1d5a622ff6eb54e4f385c161883d2319ba
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e1748c065d82adf475a51dba0ff56fd123a17810f07662718ca6d5704be8f9bd
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.22924812138080597,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.12269938650306748,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 3.128,
       "eval_steps_per_second": 0.784,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.870788720328704e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.21579746901988983,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.16359918200409,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 3.128,
       "eval_steps_per_second": 0.784,
       "step": 150
+    },
+    {
+      "epoch": 0.12351738241308793,
+      "grad_norm": 0.42591241002082825,
+      "learning_rate": 2.6124736842105265e-05,
+      "loss": 0.847,
+      "step": 151
+    },
+    {
+      "epoch": 0.12433537832310838,
+      "grad_norm": 0.37749016284942627,
+      "learning_rate": 2.5591578947368422e-05,
+      "loss": 0.643,
+      "step": 152
+    },
+    {
+      "epoch": 0.12515337423312883,
+      "grad_norm": 7.373504638671875,
+      "learning_rate": 2.5058421052631576e-05,
+      "loss": 1.1166,
+      "step": 153
+    },
+    {
+      "epoch": 0.1259713701431493,
+      "grad_norm": 0.4218708276748657,
+      "learning_rate": 2.4525263157894737e-05,
+      "loss": 0.5926,
+      "step": 154
+    },
+    {
+      "epoch": 0.12678936605316973,
+      "grad_norm": 0.415558397769928,
+      "learning_rate": 2.3992105263157894e-05,
+      "loss": 0.539,
+      "step": 155
+    },
+    {
+      "epoch": 0.1276073619631902,
+      "grad_norm": 0.367865651845932,
+      "learning_rate": 2.345894736842105e-05,
+      "loss": 0.4419,
+      "step": 156
+    },
+    {
+      "epoch": 0.12842535787321063,
+      "grad_norm": 0.47910165786743164,
+      "learning_rate": 2.292578947368421e-05,
+      "loss": 0.72,
+      "step": 157
+    },
+    {
+      "epoch": 0.1292433537832311,
+      "grad_norm": 0.547726035118103,
+      "learning_rate": 2.2392631578947366e-05,
+      "loss": 0.7997,
+      "step": 158
+    },
+    {
+      "epoch": 0.13006134969325153,
+      "grad_norm": 0.49771469831466675,
+      "learning_rate": 2.1859473684210527e-05,
+      "loss": 0.7018,
+      "step": 159
+    },
+    {
+      "epoch": 0.130879345603272,
+      "grad_norm": 0.6116258502006531,
+      "learning_rate": 2.132631578947368e-05,
+      "loss": 0.907,
+      "step": 160
+    },
+    {
+      "epoch": 0.13169734151329243,
+      "grad_norm": 0.5866420865058899,
+      "learning_rate": 2.0793157894736842e-05,
+      "loss": 0.6216,
+      "step": 161
+    },
+    {
+      "epoch": 0.1325153374233129,
+      "grad_norm": 0.6702650785446167,
+      "learning_rate": 2.026e-05,
+      "loss": 0.6452,
+      "step": 162
+    },
+    {
+      "epoch": 0.13333333333333333,
+      "grad_norm": 0.5496453046798706,
+      "learning_rate": 1.9726842105263157e-05,
+      "loss": 0.6332,
+      "step": 163
+    },
+    {
+      "epoch": 0.1341513292433538,
+      "grad_norm": 0.37603631615638733,
+      "learning_rate": 1.9193684210526314e-05,
+      "loss": 0.3876,
+      "step": 164
+    },
+    {
+      "epoch": 0.13496932515337423,
+      "grad_norm": 0.4126095473766327,
+      "learning_rate": 1.866052631578947e-05,
+      "loss": 0.4168,
+      "step": 165
+    },
+    {
+      "epoch": 0.1357873210633947,
+      "grad_norm": 0.6646726131439209,
+      "learning_rate": 1.8127368421052632e-05,
+      "loss": 0.8142,
+      "step": 166
+    },
+    {
+      "epoch": 0.13660531697341513,
+      "grad_norm": 0.3889711797237396,
+      "learning_rate": 1.759421052631579e-05,
+      "loss": 0.4465,
+      "step": 167
+    },
+    {
+      "epoch": 0.1374233128834356,
+      "grad_norm": 0.44813233613967896,
+      "learning_rate": 1.7061052631578947e-05,
+      "loss": 0.4387,
+      "step": 168
+    },
+    {
+      "epoch": 0.13824130879345603,
+      "grad_norm": 0.4522460699081421,
+      "learning_rate": 1.6527894736842104e-05,
+      "loss": 0.4947,
+      "step": 169
+    },
+    {
+      "epoch": 0.1390593047034765,
+      "grad_norm": 0.5248112082481384,
+      "learning_rate": 1.599473684210526e-05,
+      "loss": 0.5836,
+      "step": 170
+    },
+    {
+      "epoch": 0.13987730061349693,
+      "grad_norm": 0.6201620697975159,
+      "learning_rate": 1.546157894736842e-05,
+      "loss": 0.8913,
+      "step": 171
+    },
+    {
+      "epoch": 0.1406952965235174,
+      "grad_norm": 0.565233051776886,
+      "learning_rate": 1.4928421052631576e-05,
+      "loss": 0.5188,
+      "step": 172
+    },
+    {
+      "epoch": 0.14151329243353783,
+      "grad_norm": 0.4735608994960785,
+      "learning_rate": 1.4395263157894735e-05,
+      "loss": 0.3935,
+      "step": 173
+    },
+    {
+      "epoch": 0.1423312883435583,
+      "grad_norm": 0.6751230955123901,
+      "learning_rate": 1.3862105263157895e-05,
+      "loss": 0.7061,
+      "step": 174
+    },
+    {
+      "epoch": 0.14314928425357873,
+      "grad_norm": 0.5896326303482056,
+      "learning_rate": 1.3328947368421052e-05,
+      "loss": 0.6753,
+      "step": 175
+    },
+    {
+      "epoch": 0.1439672801635992,
+      "grad_norm": 0.46921586990356445,
+      "learning_rate": 1.2795789473684211e-05,
+      "loss": 0.4023,
+      "step": 176
+    },
+    {
+      "epoch": 0.14478527607361963,
+      "grad_norm": 0.8884191513061523,
+      "learning_rate": 1.2262631578947368e-05,
+      "loss": 0.5684,
+      "step": 177
+    },
+    {
+      "epoch": 0.1456032719836401,
+      "grad_norm": 0.34956350922584534,
+      "learning_rate": 1.1729473684210526e-05,
+      "loss": 0.1983,
+      "step": 178
+    },
+    {
+      "epoch": 0.14642126789366053,
+      "grad_norm": 0.6150217056274414,
+      "learning_rate": 1.1196315789473683e-05,
+      "loss": 0.6407,
+      "step": 179
+    },
+    {
+      "epoch": 0.147239263803681,
+      "grad_norm": 0.41867491602897644,
+      "learning_rate": 1.066315789473684e-05,
+      "loss": 0.3322,
+      "step": 180
+    },
+    {
+      "epoch": 0.14805725971370143,
+      "grad_norm": 0.6983818411827087,
+      "learning_rate": 1.013e-05,
+      "loss": 0.7427,
+      "step": 181
+    },
+    {
+      "epoch": 0.1488752556237219,
+      "grad_norm": 0.7040908336639404,
+      "learning_rate": 9.596842105263157e-06,
+      "loss": 0.6962,
+      "step": 182
+    },
+    {
+      "epoch": 0.14969325153374233,
+      "grad_norm": 0.6211279630661011,
+      "learning_rate": 9.063684210526316e-06,
+      "loss": 0.5084,
+      "step": 183
+    },
+    {
+      "epoch": 0.1505112474437628,
+      "grad_norm": 0.4377440810203552,
+      "learning_rate": 8.530526315789473e-06,
+      "loss": 0.226,
+      "step": 184
+    },
+    {
+      "epoch": 0.15132924335378323,
+      "grad_norm": 0.1414121389389038,
+      "learning_rate": 7.99736842105263e-06,
+      "loss": 0.052,
+      "step": 185
+    },
+    {
+      "epoch": 0.1521472392638037,
+      "grad_norm": 0.06823990494012833,
+      "learning_rate": 7.464210526315788e-06,
+      "loss": 0.0031,
+      "step": 186
+    },
+    {
+      "epoch": 0.15296523517382413,
+      "grad_norm": 0.013092203065752983,
+      "learning_rate": 6.931052631578947e-06,
+      "loss": 0.0006,
+      "step": 187
+    },
+    {
+      "epoch": 0.1537832310838446,
+      "grad_norm": 0.19664902985095978,
+      "learning_rate": 6.3978947368421055e-06,
+      "loss": 0.0032,
+      "step": 188
+    },
+    {
+      "epoch": 0.15460122699386503,
+      "grad_norm": 0.016594722867012024,
+      "learning_rate": 5.864736842105263e-06,
+      "loss": 0.0006,
+      "step": 189
+    },
+    {
+      "epoch": 0.1554192229038855,
+      "grad_norm": 0.009124848060309887,
+      "learning_rate": 5.33157894736842e-06,
+      "loss": 0.0005,
+      "step": 190
+    },
+    {
+      "epoch": 0.15623721881390593,
+      "grad_norm": 0.10051782429218292,
+      "learning_rate": 4.7984210526315785e-06,
+      "loss": 0.0042,
+      "step": 191
+    },
+    {
+      "epoch": 0.1570552147239264,
+      "grad_norm": 0.0568082332611084,
+      "learning_rate": 4.265263157894737e-06,
+      "loss": 0.0029,
+      "step": 192
+    },
+    {
+      "epoch": 0.15787321063394683,
+      "grad_norm": 0.09655116498470306,
+      "learning_rate": 3.732105263157894e-06,
+      "loss": 0.0036,
+      "step": 193
+    },
+    {
+      "epoch": 0.1586912065439673,
+      "grad_norm": 0.17335928976535797,
+      "learning_rate": 3.1989473684210527e-06,
+      "loss": 0.007,
+      "step": 194
+    },
+    {
+      "epoch": 0.15950920245398773,
+      "grad_norm": 0.008331399410963058,
+      "learning_rate": 2.66578947368421e-06,
+      "loss": 0.0005,
+      "step": 195
+    },
+    {
+      "epoch": 0.1603271983640082,
+      "grad_norm": 0.1623714417219162,
+      "learning_rate": 2.1326315789473684e-06,
+      "loss": 0.0043,
+      "step": 196
+    },
+    {
+      "epoch": 0.16114519427402862,
+      "grad_norm": 0.05319036543369293,
+      "learning_rate": 1.5994736842105264e-06,
+      "loss": 0.0018,
+      "step": 197
+    },
+    {
+      "epoch": 0.1619631901840491,
+      "grad_norm": 0.006065657362341881,
+      "learning_rate": 1.0663157894736842e-06,
+      "loss": 0.0004,
+      "step": 198
+    },
+    {
+      "epoch": 0.16278118609406952,
+      "grad_norm": 0.010582847520709038,
+      "learning_rate": 5.331578947368421e-07,
+      "loss": 0.0005,
+      "step": 199
+    },
+    {
+      "epoch": 0.16359918200409,
+      "grad_norm": 0.026805628091096878,
+      "learning_rate": 0.0,
+      "loss": 0.001,
+      "step": 200
+    },
+    {
+      "epoch": 0.16359918200409,
+      "eval_loss": 0.21579746901988983,
+      "eval_runtime": 165.9203,
+      "eval_samples_per_second": 3.104,
+      "eval_steps_per_second": 0.777,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.484049111154688e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null