Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a1b15eeb9008c5660904719b6cc164554489c49294e9ecfcb63bcecc62e13d58
 size 295488936

 version https://git-lfs.github.com/spec/v1
+oid sha256:bbe766bef05db7183694d2ed0967e300355b1e04553840f84ca8bb4a584d4710
 size 295488936

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0acd1b9fa201c5bbba696e9b8359bfbbc319c48843f6befcc058c7a95b162f57
 size 150486964

 version https://git-lfs.github.com/spec/v1
+oid sha256:94692e53b356f6fb89fc67ecd1ffcabb911f140238b298941d4b65cfc28232d1
 size 150486964

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e50a795028321794c6ddcc60f95cfd10f06c51e97fdeed5b5513e6731d843390
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:da72e881394c26de7cd931dd3746d44eecbf775abe21e65074891391ab6006b7
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:01c5525f1d8420ca8a81a7fd2ec397a508131d03210dfd36c7ac5758b0e6313b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a3e88ef6a2716260516e17223973d6a3b0a4c88bf12c72ed47e80e6f2a6782fd
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 2.0085387229919434,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.01392305193298371,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 21.428,
       "eval_steps_per_second": 5.361,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.098784350142464e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 2.0041327476501465,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.01856406924397828,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 21.428,
       "eval_steps_per_second": 5.361,
       "step": 150
+    },
+    {
+      "epoch": 0.014015872279203602,
+      "grad_norm": 0.4076049029827118,
+      "learning_rate": 2.589263157894737e-05,
+      "loss": 2.2603,
+      "step": 151
+    },
+    {
+      "epoch": 0.014108692625423493,
+      "grad_norm": 0.4122617244720459,
+      "learning_rate": 2.536421052631579e-05,
+      "loss": 2.0841,
+      "step": 152
+    },
+    {
+      "epoch": 0.014201512971643385,
+      "grad_norm": 0.4217035472393036,
+      "learning_rate": 2.483578947368421e-05,
+      "loss": 2.0217,
+      "step": 153
+    },
+    {
+      "epoch": 0.014294333317863275,
+      "grad_norm": 0.4271675646305084,
+      "learning_rate": 2.430736842105263e-05,
+      "loss": 1.7876,
+      "step": 154
+    },
+    {
+      "epoch": 0.014387153664083167,
+      "grad_norm": 0.4109157621860504,
+      "learning_rate": 2.3778947368421052e-05,
+      "loss": 1.8487,
+      "step": 155
+    },
+    {
+      "epoch": 0.014479974010303058,
+      "grad_norm": 0.42467454075813293,
+      "learning_rate": 2.3250526315789473e-05,
+      "loss": 1.8176,
+      "step": 156
+    },
+    {
+      "epoch": 0.01457279435652295,
+      "grad_norm": 0.43306881189346313,
+      "learning_rate": 2.2722105263157894e-05,
+      "loss": 2.0096,
+      "step": 157
+    },
+    {
+      "epoch": 0.014665614702742842,
+      "grad_norm": 0.4301247298717499,
+      "learning_rate": 2.2193684210526316e-05,
+      "loss": 1.7983,
+      "step": 158
+    },
+    {
+      "epoch": 0.014758435048962733,
+      "grad_norm": 0.43399932980537415,
+      "learning_rate": 2.1665263157894737e-05,
+      "loss": 2.138,
+      "step": 159
+    },
+    {
+      "epoch": 0.014851255395182623,
+      "grad_norm": 0.4387844204902649,
+      "learning_rate": 2.1136842105263158e-05,
+      "loss": 1.6287,
+      "step": 160
+    },
+    {
+      "epoch": 0.014944075741402515,
+      "grad_norm": 0.4472537636756897,
+      "learning_rate": 2.060842105263158e-05,
+      "loss": 2.1351,
+      "step": 161
+    },
+    {
+      "epoch": 0.015036896087622407,
+      "grad_norm": 0.45660072565078735,
+      "learning_rate": 2.008e-05,
+      "loss": 2.0469,
+      "step": 162
+    },
+    {
+      "epoch": 0.015129716433842299,
+      "grad_norm": 0.7435435056686401,
+      "learning_rate": 1.9551578947368422e-05,
+      "loss": 1.919,
+      "step": 163
+    },
+    {
+      "epoch": 0.01522253678006219,
+      "grad_norm": 0.4554401934146881,
+      "learning_rate": 1.9023157894736843e-05,
+      "loss": 1.951,
+      "step": 164
+    },
+    {
+      "epoch": 0.01531535712628208,
+      "grad_norm": 0.4483166038990021,
+      "learning_rate": 1.849473684210526e-05,
+      "loss": 2.0071,
+      "step": 165
+    },
+    {
+      "epoch": 0.015408177472501972,
+      "grad_norm": 0.4810929596424103,
+      "learning_rate": 1.7966315789473686e-05,
+      "loss": 2.0367,
+      "step": 166
+    },
+    {
+      "epoch": 0.015500997818721864,
+      "grad_norm": 0.4459546208381653,
+      "learning_rate": 1.7437894736842107e-05,
+      "loss": 1.8893,
+      "step": 167
+    },
+    {
+      "epoch": 0.015593818164941755,
+      "grad_norm": 0.4915485084056854,
+      "learning_rate": 1.6909473684210525e-05,
+      "loss": 1.9784,
+      "step": 168
+    },
+    {
+      "epoch": 0.015686638511161647,
+      "grad_norm": 0.4772583544254303,
+      "learning_rate": 1.638105263157895e-05,
+      "loss": 1.9509,
+      "step": 169
+    },
+    {
+      "epoch": 0.015779458857381537,
+      "grad_norm": 0.4593046307563782,
+      "learning_rate": 1.5852631578947368e-05,
+      "loss": 1.8422,
+      "step": 170
+    },
+    {
+      "epoch": 0.01587227920360143,
+      "grad_norm": 0.4797092080116272,
+      "learning_rate": 1.532421052631579e-05,
+      "loss": 1.8923,
+      "step": 171
+    },
+    {
+      "epoch": 0.01596509954982132,
+      "grad_norm": 0.4819195866584778,
+      "learning_rate": 1.4795789473684209e-05,
+      "loss": 1.9566,
+      "step": 172
+    },
+    {
+      "epoch": 0.016057919896041214,
+      "grad_norm": 0.5063287615776062,
+      "learning_rate": 1.4267368421052632e-05,
+      "loss": 2.1537,
+      "step": 173
+    },
+    {
+      "epoch": 0.016150740242261104,
+      "grad_norm": 0.47720491886138916,
+      "learning_rate": 1.3738947368421053e-05,
+      "loss": 2.0131,
+      "step": 174
+    },
+    {
+      "epoch": 0.016243560588480994,
+      "grad_norm": 0.4763629138469696,
+      "learning_rate": 1.3210526315789473e-05,
+      "loss": 1.9978,
+      "step": 175
+    },
+    {
+      "epoch": 0.016336380934700887,
+      "grad_norm": 0.4971455931663513,
+      "learning_rate": 1.2682105263157896e-05,
+      "loss": 1.9961,
+      "step": 176
+    },
+    {
+      "epoch": 0.016429201280920777,
+      "grad_norm": 0.5000568628311157,
+      "learning_rate": 1.2153684210526315e-05,
+      "loss": 1.9357,
+      "step": 177
+    },
+    {
+      "epoch": 0.01652202162714067,
+      "grad_norm": 0.5184934139251709,
+      "learning_rate": 1.1625263157894737e-05,
+      "loss": 1.9728,
+      "step": 178
+    },
+    {
+      "epoch": 0.01661484197336056,
+      "grad_norm": 0.5139727592468262,
+      "learning_rate": 1.1096842105263158e-05,
+      "loss": 1.9561,
+      "step": 179
+    },
+    {
+      "epoch": 0.01670766231958045,
+      "grad_norm": 0.49820300936698914,
+      "learning_rate": 1.0568421052631579e-05,
+      "loss": 1.9569,
+      "step": 180
+    },
+    {
+      "epoch": 0.016800482665800344,
+      "grad_norm": 0.5008655190467834,
+      "learning_rate": 1.004e-05,
+      "loss": 2.0881,
+      "step": 181
+    },
+    {
+      "epoch": 0.016893303012020234,
+      "grad_norm": 0.5131986737251282,
+      "learning_rate": 9.511578947368422e-06,
+      "loss": 2.066,
+      "step": 182
+    },
+    {
+      "epoch": 0.016986123358240127,
+      "grad_norm": 0.5675308108329773,
+      "learning_rate": 8.983157894736843e-06,
+      "loss": 1.9731,
+      "step": 183
+    },
+    {
+      "epoch": 0.017078943704460017,
+      "grad_norm": 0.5388391017913818,
+      "learning_rate": 8.454736842105263e-06,
+      "loss": 2.0149,
+      "step": 184
+    },
+    {
+      "epoch": 0.01717176405067991,
+      "grad_norm": 0.5240312814712524,
+      "learning_rate": 7.926315789473684e-06,
+      "loss": 1.8937,
+      "step": 185
+    },
+    {
+      "epoch": 0.0172645843968998,
+      "grad_norm": 0.5105009078979492,
+      "learning_rate": 7.397894736842104e-06,
+      "loss": 2.041,
+      "step": 186
+    },
+    {
+      "epoch": 0.01735740474311969,
+      "grad_norm": 0.5370680689811707,
+      "learning_rate": 6.8694736842105265e-06,
+      "loss": 2.0445,
+      "step": 187
+    },
+    {
+      "epoch": 0.017450225089339584,
+      "grad_norm": 0.5596001744270325,
+      "learning_rate": 6.341052631578948e-06,
+      "loss": 2.1666,
+      "step": 188
+    },
+    {
+      "epoch": 0.017543045435559474,
+      "grad_norm": 0.5792972445487976,
+      "learning_rate": 5.812631578947368e-06,
+      "loss": 2.0378,
+      "step": 189
+    },
+    {
+      "epoch": 0.017635865781779368,
+      "grad_norm": 0.5508564114570618,
+      "learning_rate": 5.2842105263157896e-06,
+      "loss": 1.968,
+      "step": 190
+    },
+    {
+      "epoch": 0.017728686127999257,
+      "grad_norm": 0.5679344534873962,
+      "learning_rate": 4.755789473684211e-06,
+      "loss": 2.0277,
+      "step": 191
+    },
+    {
+      "epoch": 0.017821506474219147,
+      "grad_norm": 0.529935359954834,
+      "learning_rate": 4.227368421052631e-06,
+      "loss": 2.0955,
+      "step": 192
+    },
+    {
+      "epoch": 0.01791432682043904,
+      "grad_norm": 0.6319153904914856,
+      "learning_rate": 3.698947368421052e-06,
+      "loss": 2.0734,
+      "step": 193
+    },
+    {
+      "epoch": 0.01800714716665893,
+      "grad_norm": 0.5862286686897278,
+      "learning_rate": 3.170526315789474e-06,
+      "loss": 2.1698,
+      "step": 194
+    },
+    {
+      "epoch": 0.018099967512878824,
+      "grad_norm": 0.5513949990272522,
+      "learning_rate": 2.6421052631578948e-06,
+      "loss": 2.0404,
+      "step": 195
+    },
+    {
+      "epoch": 0.018192787859098714,
+      "grad_norm": 0.5874564051628113,
+      "learning_rate": 2.1136842105263157e-06,
+      "loss": 2.0934,
+      "step": 196
+    },
+    {
+      "epoch": 0.018285608205318604,
+      "grad_norm": 0.6707521677017212,
+      "learning_rate": 1.585263157894737e-06,
+      "loss": 2.2616,
+      "step": 197
+    },
+    {
+      "epoch": 0.018378428551538498,
+      "grad_norm": 0.6378986239433289,
+      "learning_rate": 1.0568421052631578e-06,
+      "loss": 2.0425,
+      "step": 198
+    },
+    {
+      "epoch": 0.018471248897758388,
+      "grad_norm": 0.7162299156188965,
+      "learning_rate": 5.284210526315789e-07,
+      "loss": 2.3415,
+      "step": 199
+    },
+    {
+      "epoch": 0.01856406924397828,
+      "grad_norm": 0.6265064477920532,
+      "learning_rate": 0.0,
+      "loss": 2.1299,
+      "step": 200
+    },
+    {
+      "epoch": 0.01856406924397828,
+      "eval_loss": 2.0041327476501465,
+      "eval_runtime": 212.1368,
+      "eval_samples_per_second": 21.387,
+      "eval_steps_per_second": 5.35,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.472983354834944e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null