Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6b4dfb75e3bb36816a45936a76d91dc0b40216d72775541173f218346b52a381
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:2ebcb0ce8bb0a0514c99066cb83bc613e9b58beb61ed47e68d5dc20aa80c2868
 size 639691872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1dc930c85188c0f6f19bd99d615e1b089090248242b7313b1c97c251aef17759
 size 325339796

 version https://git-lfs.github.com/spec/v1
+oid sha256:d0e1a2cd5b9e69c89bf71445eeda0568d14fe2de12f2d20210f7955e4476e03c
 size 325339796

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d67e4cbb70b875eca287ce528ca272964bab34c5da6a7f1d231b43b73f33b1fe
 size 14180

 version https://git-lfs.github.com/spec/v1
+oid sha256:ff4f31e0fad56e0024715e0b91908795c82b4e10eb8406d808f78bc1e72230ac
 size 14180

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b26348e70deb5f7d6cee52f5eb084c0e6829440001a3c6d0128b93cd074af8c2
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b60d6f1383abda4776549360effee800fe6cfe2c0604503e9e3fbaa79347f790
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.7497462034225464,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 1.6129032258064515,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 10.644,
       "eval_steps_per_second": 2.712,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.994931420832727e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.7186845541000366,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 2.150537634408602,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 10.644,
       "eval_steps_per_second": 2.712,
       "step": 150
+    },
+    {
+      "epoch": 1.6236559139784945,
+      "grad_norm": 3.5955772399902344,
+      "learning_rate": 1.0872630680850196e-05,
+      "loss": 2.3566,
+      "step": 151
+    },
+    {
+      "epoch": 1.6344086021505375,
+      "grad_norm": 3.7463667392730713,
+      "learning_rate": 1.0456712550462898e-05,
+      "loss": 1.9469,
+      "step": 152
+    },
+    {
+      "epoch": 1.6451612903225805,
+      "grad_norm": 2.806169033050537,
+      "learning_rate": 1.0047504309801104e-05,
+      "loss": 2.1513,
+      "step": 153
+    },
+    {
+      "epoch": 1.6559139784946235,
+      "grad_norm": 3.7219080924987793,
+      "learning_rate": 9.645117832311886e-06,
+      "loss": 2.5836,
+      "step": 154
+    },
+    {
+      "epoch": 1.6666666666666665,
+      "grad_norm": 3.056955337524414,
+      "learning_rate": 9.249663126440394e-06,
+      "loss": 1.6478,
+      "step": 155
+    },
+    {
+      "epoch": 1.6774193548387095,
+      "grad_norm": 3.247894525527954,
+      "learning_rate": 8.861248305554624e-06,
+      "loss": 1.455,
+      "step": 156
+    },
+    {
+      "epoch": 1.6881720430107527,
+      "grad_norm": 3.3513553142547607,
+      "learning_rate": 8.47997955838829e-06,
+      "loss": 2.0281,
+      "step": 157
+    },
+    {
+      "epoch": 1.6989247311827957,
+      "grad_norm": 3.108705759048462,
+      "learning_rate": 8.10596112000994e-06,
+      "loss": 1.5561,
+      "step": 158
+    },
+    {
+      "epoch": 1.7096774193548387,
+      "grad_norm": 3.947622299194336,
+      "learning_rate": 7.739295243326067e-06,
+      "loss": 1.7348,
+      "step": 159
+    },
+    {
+      "epoch": 1.7204301075268817,
+      "grad_norm": 4.6300530433654785,
+      "learning_rate": 7.380082171126228e-06,
+      "loss": 1.9787,
+      "step": 160
+    },
+    {
+      "epoch": 1.7311827956989247,
+      "grad_norm": 4.57441520690918,
+      "learning_rate": 7.028420108677635e-06,
+      "loss": 1.4791,
+      "step": 161
+    },
+    {
+      "epoch": 1.7419354838709677,
+      "grad_norm": 4.058063983917236,
+      "learning_rate": 6.684405196876842e-06,
+      "loss": 0.762,
+      "step": 162
+    },
+    {
+      "epoch": 1.7526881720430108,
+      "grad_norm": 2.3837802410125732,
+      "learning_rate": 6.3481314859657675e-06,
+      "loss": 5.4107,
+      "step": 163
+    },
+    {
+      "epoch": 1.7634408602150538,
+      "grad_norm": 3.205528974533081,
+      "learning_rate": 6.019690909819298e-06,
+      "loss": 5.3617,
+      "step": 164
+    },
+    {
+      "epoch": 1.7741935483870968,
+      "grad_norm": 3.044926881790161,
+      "learning_rate": 5.6991732608115e-06,
+      "loss": 4.1357,
+      "step": 165
+    },
+    {
+      "epoch": 1.7849462365591398,
+      "grad_norm": 3.4720242023468018,
+      "learning_rate": 5.386666165267256e-06,
+      "loss": 4.7002,
+      "step": 166
+    },
+    {
+      "epoch": 1.7956989247311828,
+      "grad_norm": 3.4407799243927,
+      "learning_rate": 5.08225505950613e-06,
+      "loss": 3.6684,
+      "step": 167
+    },
+    {
+      "epoch": 1.8064516129032258,
+      "grad_norm": 3.670388698577881,
+      "learning_rate": 4.786023166484913e-06,
+      "loss": 2.7388,
+      "step": 168
+    },
+    {
+      "epoch": 1.817204301075269,
+      "grad_norm": 3.6209826469421387,
+      "learning_rate": 4.498051473045291e-06,
+      "loss": 3.9281,
+      "step": 169
+    },
+    {
+      "epoch": 1.827956989247312,
+      "grad_norm": 3.7421019077301025,
+      "learning_rate": 4.218418707772886e-06,
+      "loss": 3.1656,
+      "step": 170
+    },
+    {
+      "epoch": 1.838709677419355,
+      "grad_norm": 3.480229377746582,
+      "learning_rate": 3.947201319473587e-06,
+      "loss": 1.5602,
+      "step": 171
+    },
+    {
+      "epoch": 1.849462365591398,
+      "grad_norm": 2.7893009185791016,
+      "learning_rate": 3.684473456273278e-06,
+      "loss": 1.3773,
+      "step": 172
+    },
+    {
+      "epoch": 1.860215053763441,
+      "grad_norm": 3.345553398132324,
+      "learning_rate": 3.4303069453464383e-06,
+      "loss": 1.6992,
+      "step": 173
+    },
+    {
+      "epoch": 1.870967741935484,
+      "grad_norm": 4.188620567321777,
+      "learning_rate": 3.184771273279312e-06,
+      "loss": 2.5594,
+      "step": 174
+    },
+    {
+      "epoch": 1.881720430107527,
+      "grad_norm": 3.4975194931030273,
+      "learning_rate": 2.947933567072987e-06,
+      "loss": 2.225,
+      "step": 175
+    },
+    {
+      "epoch": 1.89247311827957,
+      "grad_norm": 3.273622989654541,
+      "learning_rate": 2.719858575791534e-06,
+      "loss": 1.1841,
+      "step": 176
+    },
+    {
+      "epoch": 1.903225806451613,
+      "grad_norm": 3.025174617767334,
+      "learning_rate": 2.500608652860256e-06,
+      "loss": 1.6974,
+      "step": 177
+    },
+    {
+      "epoch": 1.913978494623656,
+      "grad_norm": 2.607513427734375,
+      "learning_rate": 2.2902437390188737e-06,
+      "loss": 1.1319,
+      "step": 178
+    },
+    {
+      "epoch": 1.924731182795699,
+      "grad_norm": 3.18086576461792,
+      "learning_rate": 2.0888213459343587e-06,
+      "loss": 1.8406,
+      "step": 179
+    },
+    {
+      "epoch": 1.935483870967742,
+      "grad_norm": 3.675136089324951,
+      "learning_rate": 1.8963965404777875e-06,
+      "loss": 2.3657,
+      "step": 180
+    },
+    {
+      "epoch": 1.946236559139785,
+      "grad_norm": 3.96602725982666,
+      "learning_rate": 1.7130219296696263e-06,
+      "loss": 1.5121,
+      "step": 181
+    },
+    {
+      "epoch": 1.956989247311828,
+      "grad_norm": 3.6097054481506348,
+      "learning_rate": 1.5387476462974824e-06,
+      "loss": 1.5113,
+      "step": 182
+    },
+    {
+      "epoch": 1.967741935483871,
+      "grad_norm": 3.757528781890869,
+      "learning_rate": 1.3736213352103147e-06,
+      "loss": 1.5258,
+      "step": 183
+    },
+    {
+      "epoch": 1.978494623655914,
+      "grad_norm": 4.669776439666748,
+      "learning_rate": 1.2176881402928002e-06,
+      "loss": 1.3748,
+      "step": 184
+    },
+    {
+      "epoch": 1.989247311827957,
+      "grad_norm": 3.6529650688171387,
+      "learning_rate": 1.0709906921234367e-06,
+      "loss": 0.7173,
+      "step": 185
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 3.2291080951690674,
+      "learning_rate": 9.33569096319799e-07,
+      "loss": 2.3399,
+      "step": 186
+    },
+    {
+      "epoch": 2.010752688172043,
+      "grad_norm": 2.3444361686706543,
+      "learning_rate": 8.054609225740255e-07,
+      "loss": 5.6927,
+      "step": 187
+    },
+    {
+      "epoch": 2.021505376344086,
+      "grad_norm": 2.7011375427246094,
+      "learning_rate": 6.867011943816724e-07,
+      "loss": 5.3949,
+      "step": 188
+    },
+    {
+      "epoch": 2.032258064516129,
+      "grad_norm": 2.745737314224243,
+      "learning_rate": 5.77322379466617e-07,
+      "loss": 4.3841,
+      "step": 189
+    },
+    {
+      "epoch": 2.043010752688172,
+      "grad_norm": 2.8492581844329834,
+      "learning_rate": 4.773543809047186e-07,
+      "loss": 3.7209,
+      "step": 190
+    },
+    {
+      "epoch": 2.053763440860215,
+      "grad_norm": 3.298788547515869,
+      "learning_rate": 3.868245289486027e-07,
+      "loss": 4.1478,
+      "step": 191
+    },
+    {
+      "epoch": 2.064516129032258,
+      "grad_norm": 2.956620931625366,
+      "learning_rate": 3.0575757355586817e-07,
+      "loss": 2.9301,
+      "step": 192
+    },
+    {
+      "epoch": 2.075268817204301,
+      "grad_norm": 2.8849871158599854,
+      "learning_rate": 2.3417567762266497e-07,
+      "loss": 3.1787,
+      "step": 193
+    },
+    {
+      "epoch": 2.086021505376344,
+      "grad_norm": 3.1618738174438477,
+      "learning_rate": 1.7209841092460043e-07,
+      "loss": 3.161,
+      "step": 194
+    },
+    {
+      "epoch": 2.096774193548387,
+      "grad_norm": 3.032891273498535,
+      "learning_rate": 1.1954274476655534e-07,
+      "loss": 2.5093,
+      "step": 195
+    },
+    {
+      "epoch": 2.10752688172043,
+      "grad_norm": 2.315091133117676,
+      "learning_rate": 7.652304734289127e-08,
+      "loss": 1.0642,
+      "step": 196
+    },
+    {
+      "epoch": 2.118279569892473,
+      "grad_norm": 2.705153226852417,
+      "learning_rate": 4.30510798093342e-08,
+      "loss": 2.0394,
+      "step": 197
+    },
+    {
+      "epoch": 2.129032258064516,
+      "grad_norm": 2.7641355991363525,
+      "learning_rate": 1.9135993067588284e-08,
+      "loss": 1.127,
+      "step": 198
+    },
+    {
+      "epoch": 2.139784946236559,
+      "grad_norm": 3.088827610015869,
+      "learning_rate": 4.784325263584854e-09,
+      "loss": 1.8748,
+      "step": 199
+    },
+    {
+      "epoch": 2.150537634408602,
+      "grad_norm": 2.703360080718994,
+      "learning_rate": 0.0,
+      "loss": 1.2501,
+      "step": 200
+    },
+    {
+      "epoch": 2.150537634408602,
+      "eval_loss": 0.7186845541000366,
+      "eval_runtime": 14.749,
+      "eval_samples_per_second": 10.645,
+      "eval_steps_per_second": 2.712,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.659354182502318e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null