Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:587ac73a554a03c7b6389062d77f5b947615516ebf1d6cebefbe9b90a4f2018d
 size 645975704

 version https://git-lfs.github.com/spec/v1
+oid sha256:3edb8bf1d0ccc6d0a6bbd6c5f80b548f2877681c734e9cbf3728233632a89173
 size 645975704

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d0a0e4b357f7fc0f79723ab25de12da4bd4cb42adafedafff471118214737398
 size 328468404

 version https://git-lfs.github.com/spec/v1
+oid sha256:7da7046ae848e05a7db62fc091f53d07130df1c18963dfc0c580c3590db0446a
 size 328468404

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a1e7844d3728f1aed59cc5dad5ec19142b5a2c2541290693d64d759cd9172ce6
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0f0e14df6959312e181575df4d63c4a7d90bb8b2df0b1b26d2ca6e478b5db01c
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:daaa41fca9fd049986fcf3363fbaab5418032d644dd089607cb2ffe0f0da8945
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c186522be8b385dca026dd73d1e109571521fa75822c734f2765fa171800e01
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.26867982745170593,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.15873015873015872,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 9.481,
       "eval_steps_per_second": 2.382,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.3320825700352e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.24321098625659943,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.21164021164021163,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.481,
       "eval_steps_per_second": 2.382,
       "step": 150
+    },
+    {
+      "epoch": 0.15978835978835979,
+      "grad_norm": 1.0392180681228638,
+      "learning_rate": 2.6150526315789476e-05,
+      "loss": 0.0403,
+      "step": 151
+    },
+    {
+      "epoch": 0.16084656084656085,
+      "grad_norm": 1.738742709159851,
+      "learning_rate": 2.561684210526316e-05,
+      "loss": 0.1876,
+      "step": 152
+    },
+    {
+      "epoch": 0.1619047619047619,
+      "grad_norm": 1.2620779275894165,
+      "learning_rate": 2.508315789473684e-05,
+      "loss": 0.1012,
+      "step": 153
+    },
+    {
+      "epoch": 0.16296296296296298,
+      "grad_norm": 2.1409807205200195,
+      "learning_rate": 2.4549473684210527e-05,
+      "loss": 0.2515,
+      "step": 154
+    },
+    {
+      "epoch": 0.164021164021164,
+      "grad_norm": 0.6598687767982483,
+      "learning_rate": 2.401578947368421e-05,
+      "loss": 0.0336,
+      "step": 155
+    },
+    {
+      "epoch": 0.16507936507936508,
+      "grad_norm": 2.0522665977478027,
+      "learning_rate": 2.3482105263157894e-05,
+      "loss": 0.0748,
+      "step": 156
+    },
+    {
+      "epoch": 0.16613756613756614,
+      "grad_norm": 3.9888670444488525,
+      "learning_rate": 2.294842105263158e-05,
+      "loss": 0.3477,
+      "step": 157
+    },
+    {
+      "epoch": 0.1671957671957672,
+      "grad_norm": 1.104380488395691,
+      "learning_rate": 2.241473684210526e-05,
+      "loss": 0.1015,
+      "step": 158
+    },
+    {
+      "epoch": 0.16825396825396827,
+      "grad_norm": 2.145385503768921,
+      "learning_rate": 2.1881052631578948e-05,
+      "loss": 0.2046,
+      "step": 159
+    },
+    {
+      "epoch": 0.1693121693121693,
+      "grad_norm": 2.4562976360321045,
+      "learning_rate": 2.134736842105263e-05,
+      "loss": 0.2442,
+      "step": 160
+    },
+    {
+      "epoch": 0.17037037037037037,
+      "grad_norm": 2.2245967388153076,
+      "learning_rate": 2.0813684210526315e-05,
+      "loss": 0.4186,
+      "step": 161
+    },
+    {
+      "epoch": 0.17142857142857143,
+      "grad_norm": 2.019033432006836,
+      "learning_rate": 2.0280000000000002e-05,
+      "loss": 0.1792,
+      "step": 162
+    },
+    {
+      "epoch": 0.1724867724867725,
+      "grad_norm": 1.430423617362976,
+      "learning_rate": 1.9746315789473683e-05,
+      "loss": 0.0728,
+      "step": 163
+    },
+    {
+      "epoch": 0.17354497354497356,
+      "grad_norm": 1.957362413406372,
+      "learning_rate": 1.921263157894737e-05,
+      "loss": 0.1585,
+      "step": 164
+    },
+    {
+      "epoch": 0.1746031746031746,
+      "grad_norm": 1.5552687644958496,
+      "learning_rate": 1.867894736842105e-05,
+      "loss": 0.0985,
+      "step": 165
+    },
+    {
+      "epoch": 0.17566137566137566,
+      "grad_norm": 0.8113088607788086,
+      "learning_rate": 1.8145263157894737e-05,
+      "loss": 0.0418,
+      "step": 166
+    },
+    {
+      "epoch": 0.17671957671957672,
+      "grad_norm": 1.317171335220337,
+      "learning_rate": 1.761157894736842e-05,
+      "loss": 0.096,
+      "step": 167
+    },
+    {
+      "epoch": 0.17777777777777778,
+      "grad_norm": 2.197812795639038,
+      "learning_rate": 1.7077894736842104e-05,
+      "loss": 0.1626,
+      "step": 168
+    },
+    {
+      "epoch": 0.17883597883597885,
+      "grad_norm": 2.421410322189331,
+      "learning_rate": 1.654421052631579e-05,
+      "loss": 0.2939,
+      "step": 169
+    },
+    {
+      "epoch": 0.17989417989417988,
+      "grad_norm": 1.9575234651565552,
+      "learning_rate": 1.601052631578947e-05,
+      "loss": 0.1181,
+      "step": 170
+    },
+    {
+      "epoch": 0.18095238095238095,
+      "grad_norm": 2.36352801322937,
+      "learning_rate": 1.5476842105263158e-05,
+      "loss": 0.1494,
+      "step": 171
+    },
+    {
+      "epoch": 0.182010582010582,
+      "grad_norm": 2.2821767330169678,
+      "learning_rate": 1.494315789473684e-05,
+      "loss": 0.2514,
+      "step": 172
+    },
+    {
+      "epoch": 0.18306878306878308,
+      "grad_norm": 1.5309607982635498,
+      "learning_rate": 1.4409473684210525e-05,
+      "loss": 0.1239,
+      "step": 173
+    },
+    {
+      "epoch": 0.18412698412698414,
+      "grad_norm": 1.3841410875320435,
+      "learning_rate": 1.387578947368421e-05,
+      "loss": 0.0923,
+      "step": 174
+    },
+    {
+      "epoch": 0.18518518518518517,
+      "grad_norm": 3.6834170818328857,
+      "learning_rate": 1.3342105263157894e-05,
+      "loss": 0.3616,
+      "step": 175
+    },
+    {
+      "epoch": 0.18624338624338624,
+      "grad_norm": 2.0545542240142822,
+      "learning_rate": 1.280842105263158e-05,
+      "loss": 0.0803,
+      "step": 176
+    },
+    {
+      "epoch": 0.1873015873015873,
+      "grad_norm": 2.6623830795288086,
+      "learning_rate": 1.2274736842105263e-05,
+      "loss": 0.1061,
+      "step": 177
+    },
+    {
+      "epoch": 0.18835978835978837,
+      "grad_norm": 2.9348082542419434,
+      "learning_rate": 1.1741052631578947e-05,
+      "loss": 0.3249,
+      "step": 178
+    },
+    {
+      "epoch": 0.18941798941798943,
+      "grad_norm": 1.3830448389053345,
+      "learning_rate": 1.120736842105263e-05,
+      "loss": 0.0611,
+      "step": 179
+    },
+    {
+      "epoch": 0.19047619047619047,
+      "grad_norm": 3.927304744720459,
+      "learning_rate": 1.0673684210526314e-05,
+      "loss": 0.1971,
+      "step": 180
+    },
+    {
+      "epoch": 0.19153439153439153,
+      "grad_norm": 1.9497954845428467,
+      "learning_rate": 1.0140000000000001e-05,
+      "loss": 0.2122,
+      "step": 181
+    },
+    {
+      "epoch": 0.1925925925925926,
+      "grad_norm": 2.313905954360962,
+      "learning_rate": 9.606315789473685e-06,
+      "loss": 0.2029,
+      "step": 182
+    },
+    {
+      "epoch": 0.19365079365079366,
+      "grad_norm": 3.8197977542877197,
+      "learning_rate": 9.072631578947368e-06,
+      "loss": 0.7685,
+      "step": 183
+    },
+    {
+      "epoch": 0.19470899470899472,
+      "grad_norm": 1.723537802696228,
+      "learning_rate": 8.538947368421052e-06,
+      "loss": 0.4412,
+      "step": 184
+    },
+    {
+      "epoch": 0.19576719576719576,
+      "grad_norm": 3.3449349403381348,
+      "learning_rate": 8.005263157894736e-06,
+      "loss": 0.2243,
+      "step": 185
+    },
+    {
+      "epoch": 0.19682539682539682,
+      "grad_norm": 0.9286437034606934,
+      "learning_rate": 7.47157894736842e-06,
+      "loss": 0.0623,
+      "step": 186
+    },
+    {
+      "epoch": 0.19788359788359788,
+      "grad_norm": 3.832223415374756,
+      "learning_rate": 6.937894736842105e-06,
+      "loss": 0.3214,
+      "step": 187
+    },
+    {
+      "epoch": 0.19894179894179895,
+      "grad_norm": 2.1527602672576904,
+      "learning_rate": 6.40421052631579e-06,
+      "loss": 0.1369,
+      "step": 188
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 1.3586947917938232,
+      "learning_rate": 5.8705263157894735e-06,
+      "loss": 0.072,
+      "step": 189
+    },
+    {
+      "epoch": 0.20105820105820105,
+      "grad_norm": 1.1720460653305054,
+      "learning_rate": 5.336842105263157e-06,
+      "loss": 0.0694,
+      "step": 190
+    },
+    {
+      "epoch": 0.2021164021164021,
+      "grad_norm": 1.863162875175476,
+      "learning_rate": 4.803157894736842e-06,
+      "loss": 0.154,
+      "step": 191
+    },
+    {
+      "epoch": 0.20317460317460317,
+      "grad_norm": 3.274844169616699,
+      "learning_rate": 4.269473684210526e-06,
+      "loss": 0.3523,
+      "step": 192
+    },
+    {
+      "epoch": 0.20423280423280424,
+      "grad_norm": 2.0571043491363525,
+      "learning_rate": 3.73578947368421e-06,
+      "loss": 0.1565,
+      "step": 193
+    },
+    {
+      "epoch": 0.2052910052910053,
+      "grad_norm": 2.8300046920776367,
+      "learning_rate": 3.202105263157895e-06,
+      "loss": 0.3163,
+      "step": 194
+    },
+    {
+      "epoch": 0.20634920634920634,
+      "grad_norm": 2.4953830242156982,
+      "learning_rate": 2.6684210526315785e-06,
+      "loss": 0.2559,
+      "step": 195
+    },
+    {
+      "epoch": 0.2074074074074074,
+      "grad_norm": 1.1636903285980225,
+      "learning_rate": 2.134736842105263e-06,
+      "loss": 0.0823,
+      "step": 196
+    },
+    {
+      "epoch": 0.20846560846560847,
+      "grad_norm": 3.529049873352051,
+      "learning_rate": 1.6010526315789475e-06,
+      "loss": 0.3016,
+      "step": 197
+    },
+    {
+      "epoch": 0.20952380952380953,
+      "grad_norm": 3.8077683448791504,
+      "learning_rate": 1.0673684210526315e-06,
+      "loss": 0.3713,
+      "step": 198
+    },
+    {
+      "epoch": 0.2105820105820106,
+      "grad_norm": 4.698509216308594,
+      "learning_rate": 5.336842105263158e-07,
+      "loss": 0.421,
+      "step": 199
+    },
+    {
+      "epoch": 0.21164021164021163,
+      "grad_norm": 5.6042633056640625,
+      "learning_rate": 0.0,
+      "loss": 0.3003,
+      "step": 200
+    },
+    {
+      "epoch": 0.21164021164021163,
+      "eval_loss": 0.24321098625659943,
+      "eval_runtime": 41.9092,
+      "eval_samples_per_second": 9.497,
+      "eval_steps_per_second": 2.386,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7.1094434267136e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null