Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:038333def0a50807c8209bc5d4e8eabf1686a2aea2feb8f6ad1aef35b07a2e4d
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:288dfa4bcc8e21fcd200b848ee8e583a29b8f590df5278ea8cedd43ddf5ef6d4
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:10870354337bb7537f40367e4ba3bebe9b6e5911b64f00b7023468c59f1e90c8
 size 1342555602

 version https://git-lfs.github.com/spec/v1
+oid sha256:586d70272fa8dd22d6a46265470171868176f157cb70b13c18b406d7d4ffa34e
 size 1342555602

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a598fd8f8f9cde241152abaa0a43c6358d4b5b511c374db9e98b8549549a306b
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:565f4d0f70ff703c757160453fba7ef1084c894222fd5dd7506b3b90c2d681e2
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4c9c807f0681c8b7e53ada9b6ec3dba530d303de7da0d0a0562a3d8d0bbba08
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.1049232482910156,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 1.5625,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 11.887,
       "eval_steps_per_second": 5.944,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.2825696088398234e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.0826399326324463,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 2.0833333333333335,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 11.887,
       "eval_steps_per_second": 5.944,
       "step": 150
+    },
+    {
+      "epoch": 1.5729166666666665,
+      "grad_norm": 0.6735657453536987,
+      "learning_rate": 1.7197048550474643e-05,
+      "loss": 0.9896,
+      "step": 151
+    },
+    {
+      "epoch": 1.5833333333333335,
+      "grad_norm": 0.7103169560432434,
+      "learning_rate": 1.6543469682057106e-05,
+      "loss": 0.9488,
+      "step": 152
+    },
+    {
+      "epoch": 1.59375,
+      "grad_norm": 0.6837167739868164,
+      "learning_rate": 1.5900081996875083e-05,
+      "loss": 0.9789,
+      "step": 153
+    },
+    {
+      "epoch": 1.6041666666666665,
+      "grad_norm": 0.6514947414398193,
+      "learning_rate": 1.526708147705013e-05,
+      "loss": 1.0009,
+      "step": 154
+    },
+    {
+      "epoch": 1.6145833333333335,
+      "grad_norm": 0.6609664559364319,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 0.9559,
+      "step": 155
+    },
+    {
+      "epoch": 1.625,
+      "grad_norm": 0.6664807200431824,
+      "learning_rate": 1.4033009983067452e-05,
+      "loss": 0.9171,
+      "step": 156
+    },
+    {
+      "epoch": 1.6354166666666665,
+      "grad_norm": 0.6664649844169617,
+      "learning_rate": 1.3432314919041478e-05,
+      "loss": 0.8995,
+      "step": 157
+    },
+    {
+      "epoch": 1.6458333333333335,
+      "grad_norm": 0.6916347742080688,
+      "learning_rate": 1.2842758726130283e-05,
+      "loss": 0.8661,
+      "step": 158
+    },
+    {
+      "epoch": 1.65625,
+      "grad_norm": 0.6753320097923279,
+      "learning_rate": 1.22645209888614e-05,
+      "loss": 0.9085,
+      "step": 159
+    },
+    {
+      "epoch": 1.6666666666666665,
+      "grad_norm": 0.6635647416114807,
+      "learning_rate": 1.1697777844051105e-05,
+      "loss": 0.8536,
+      "step": 160
+    },
+    {
+      "epoch": 1.6770833333333335,
+      "grad_norm": 0.7056015729904175,
+      "learning_rate": 1.1142701927151456e-05,
+      "loss": 0.8024,
+      "step": 161
+    },
+    {
+      "epoch": 1.6875,
+      "grad_norm": 0.7135605812072754,
+      "learning_rate": 1.0599462319663905e-05,
+      "loss": 0.8668,
+      "step": 162
+    },
+    {
+      "epoch": 1.6979166666666665,
+      "grad_norm": 0.745603084564209,
+      "learning_rate": 1.006822449763537e-05,
+      "loss": 0.8019,
+      "step": 163
+    },
+    {
+      "epoch": 1.7083333333333335,
+      "grad_norm": 0.7806446552276611,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 1.0347,
+      "step": 164
+    },
+    {
+      "epoch": 1.71875,
+      "grad_norm": 0.9726524949073792,
+      "learning_rate": 9.042397785550405e-06,
+      "loss": 0.9745,
+      "step": 165
+    },
+    {
+      "epoch": 1.7291666666666665,
+      "grad_norm": 0.47184664011001587,
+      "learning_rate": 8.548121372247918e-06,
+      "loss": 1.1018,
+      "step": 166
+    },
+    {
+      "epoch": 1.7395833333333335,
+      "grad_norm": 0.5795819163322449,
+      "learning_rate": 8.066471602728803e-06,
+      "loss": 1.0654,
+      "step": 167
+    },
+    {
+      "epoch": 1.75,
+      "grad_norm": 0.5761855840682983,
+      "learning_rate": 7.597595192178702e-06,
+      "loss": 1.0779,
+      "step": 168
+    },
+    {
+      "epoch": 1.7604166666666665,
+      "grad_norm": 0.5878072381019592,
+      "learning_rate": 7.1416349648943894e-06,
+      "loss": 1.0233,
+      "step": 169
+    },
+    {
+      "epoch": 1.7708333333333335,
+      "grad_norm": 0.5870786905288696,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 1.0185,
+      "step": 170
+    },
+    {
+      "epoch": 1.78125,
+      "grad_norm": 0.5810210704803467,
+      "learning_rate": 6.269014643030213e-06,
+      "loss": 0.9702,
+      "step": 171
+    },
+    {
+      "epoch": 1.7916666666666665,
+      "grad_norm": 0.6185545325279236,
+      "learning_rate": 5.852620357053651e-06,
+      "loss": 0.9864,
+      "step": 172
+    },
+    {
+      "epoch": 1.8020833333333335,
+      "grad_norm": 0.6088296175003052,
+      "learning_rate": 5.449673790581611e-06,
+      "loss": 0.9794,
+      "step": 173
+    },
+    {
+      "epoch": 1.8125,
+      "grad_norm": 0.6336007714271545,
+      "learning_rate": 5.060297685041659e-06,
+      "loss": 0.9781,
+      "step": 174
+    },
+    {
+      "epoch": 1.8229166666666665,
+      "grad_norm": 0.635280191898346,
+      "learning_rate": 4.684610648167503e-06,
+      "loss": 0.8847,
+      "step": 175
+    },
+    {
+      "epoch": 1.8333333333333335,
+      "grad_norm": 0.6503217220306396,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 0.8849,
+      "step": 176
+    },
+    {
+      "epoch": 1.84375,
+      "grad_norm": 0.6389548182487488,
+      "learning_rate": 3.974757327377981e-06,
+      "loss": 0.9125,
+      "step": 177
+    },
+    {
+      "epoch": 1.8541666666666665,
+      "grad_norm": 0.6556768417358398,
+      "learning_rate": 3.6408072716606346e-06,
+      "loss": 0.9179,
+      "step": 178
+    },
+    {
+      "epoch": 1.8645833333333335,
+      "grad_norm": 0.6496260762214661,
+      "learning_rate": 3.3209786751399187e-06,
+      "loss": 0.9347,
+      "step": 179
+    },
+    {
+      "epoch": 1.875,
+      "grad_norm": 0.6618404388427734,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 0.8607,
+      "step": 180
+    },
+    {
+      "epoch": 1.8854166666666665,
+      "grad_norm": 0.6887024641036987,
+      "learning_rate": 2.724071220034158e-06,
+      "loss": 0.8698,
+      "step": 181
+    },
+    {
+      "epoch": 1.8958333333333335,
+      "grad_norm": 0.6851520538330078,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.8412,
+      "step": 182
+    },
+    {
+      "epoch": 1.90625,
+      "grad_norm": 0.7331283092498779,
+      "learning_rate": 2.1847622018482283e-06,
+      "loss": 0.8442,
+      "step": 183
+    },
+    {
+      "epoch": 1.9166666666666665,
+      "grad_norm": 0.7035320997238159,
+      "learning_rate": 1.9369152030840556e-06,
+      "loss": 0.8199,
+      "step": 184
+    },
+    {
+      "epoch": 1.9270833333333335,
+      "grad_norm": 0.7680994272232056,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 0.8528,
+      "step": 185
+    },
+    {
+      "epoch": 1.9375,
+      "grad_norm": 0.756158709526062,
+      "learning_rate": 1.4852136862001764e-06,
+      "loss": 0.7988,
+      "step": 186
+    },
+    {
+      "epoch": 1.9479166666666665,
+      "grad_norm": 0.8097379207611084,
+      "learning_rate": 1.2814967607382432e-06,
+      "loss": 0.9105,
+      "step": 187
+    },
+    {
+      "epoch": 1.9583333333333335,
+      "grad_norm": 0.8740496039390564,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 1.0824,
+      "step": 188
+    },
+    {
+      "epoch": 1.96875,
+      "grad_norm": 0.4817550480365753,
+      "learning_rate": 9.186408276168013e-07,
+      "loss": 1.0318,
+      "step": 189
+    },
+    {
+      "epoch": 1.9791666666666665,
+      "grad_norm": 0.6054410934448242,
+      "learning_rate": 7.596123493895991e-07,
+      "loss": 0.9603,
+      "step": 190
+    },
+    {
+      "epoch": 1.9895833333333335,
+      "grad_norm": 0.6801382303237915,
+      "learning_rate": 6.15582970243117e-07,
+      "loss": 0.8966,
+      "step": 191
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.852452278137207,
+      "learning_rate": 4.865965629214819e-07,
+      "loss": 0.8796,
+      "step": 192
+    },
+    {
+      "epoch": 2.0104166666666665,
+      "grad_norm": 0.3836596608161926,
+      "learning_rate": 3.7269241793390085e-07,
+      "loss": 1.0342,
+      "step": 193
+    },
+    {
+      "epoch": 2.0208333333333335,
+      "grad_norm": 0.4494467079639435,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 1.0453,
+      "step": 194
+    },
+    {
+      "epoch": 2.03125,
+      "grad_norm": 0.5086709856987,
+      "learning_rate": 1.9026509541272275e-07,
+      "loss": 1.0034,
+      "step": 195
+    },
+    {
+      "epoch": 2.0416666666666665,
+      "grad_norm": 0.505764365196228,
+      "learning_rate": 1.2179748700879012e-07,
+      "loss": 1.0083,
+      "step": 196
+    },
+    {
+      "epoch": 2.0520833333333335,
+      "grad_norm": 0.5370025634765625,
+      "learning_rate": 6.852326227130834e-08,
+      "loss": 0.9276,
+      "step": 197
+    },
+    {
+      "epoch": 2.0625,
+      "grad_norm": 0.5703802108764648,
+      "learning_rate": 3.04586490452119e-08,
+      "loss": 0.961,
+      "step": 198
+    },
+    {
+      "epoch": 2.0729166666666665,
+      "grad_norm": 0.5515142679214478,
+      "learning_rate": 7.615242180436522e-09,
+      "loss": 0.8952,
+      "step": 199
+    },
+    {
+      "epoch": 2.0833333333333335,
+      "grad_norm": 0.6029378175735474,
+      "learning_rate": 0.0,
+      "loss": 0.8896,
+      "step": 200
+    },
+    {
+      "epoch": 2.0833333333333335,
+      "eval_loss": 1.0826399326324463,
+      "eval_runtime": 13.6062,
+      "eval_samples_per_second": 11.906,
+      "eval_steps_per_second": 5.953,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.041540503146332e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null