Training in progress, step 193, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +305 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ec6c70d02926ceaad335a0100cf74749c51b9965734af9abb176fc361ef43d16
 size 201892112

 version https://git-lfs.github.com/spec/v1
+oid sha256:38bce14ef22ca30cb7842c91346c893c96f6d4b1229c0c15c5674571ac6f5d4e
 size 201892112

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1a94c48d26b881d79abd3097469e8f5783eefc08a8dea05c6c89d1da2fb16137
 size 102864548

 version https://git-lfs.github.com/spec/v1
+oid sha256:0cd95f68fd8a1a6d11a01fd68d92cebe30891911861682a758bab130fbe57b32
 size 102864548

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f2e43872645f9f101ee403b5a709d668383b6b6bfeb7f368e5762a1069266f6e
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1d6f2d95fee078ae6685f33ce2ab0558591568bec11bb8996c3ce0c08be35279
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3c9bd7e4942c5b62a922cd992f05c7b79b177cefa5f8b5cc986083011c9c9245
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e35b64cddddbcb296f3261846a2764caa97bfc895b4a9f0211dd2b8a6e73b2a5
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.8657492399215698,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 2.3346303501945527,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,307 @@
       "eval_samples_per_second": 41.447,
       "eval_steps_per_second": 10.362,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1413,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.829000395187814e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.8657492399215698,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 3.0038910505836576,
   "eval_steps": 50,
+  "global_step": 193,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 41.447,
       "eval_steps_per_second": 10.362,
       "step": 150
+    },
+    {
+      "epoch": 2.350194552529183,
+      "grad_norm": 0.374606192111969,
+      "learning_rate": 1.2443403456474017e-05,
+      "loss": 0.7851,
+      "step": 151
+    },
+    {
+      "epoch": 2.3657587548638133,
+      "grad_norm": 0.3662430942058563,
+      "learning_rate": 1.1882318057580489e-05,
+      "loss": 0.74,
+      "step": 152
+    },
+    {
+      "epoch": 2.3813229571984436,
+      "grad_norm": 0.3952215909957886,
+      "learning_rate": 1.1332466114513512e-05,
+      "loss": 0.8131,
+      "step": 153
+    },
+    {
+      "epoch": 2.396887159533074,
+      "grad_norm": 0.38979917764663696,
+      "learning_rate": 1.0794009671164484e-05,
+      "loss": 0.821,
+      "step": 154
+    },
+    {
+      "epoch": 2.412451361867704,
+      "grad_norm": 0.37041887640953064,
+      "learning_rate": 1.0267107413118742e-05,
+      "loss": 0.7174,
+      "step": 155
+    },
+    {
+      "epoch": 2.4280155642023344,
+      "grad_norm": 0.41075101494789124,
+      "learning_rate": 9.751914620890206e-06,
+      "loss": 0.7549,
+      "step": 156
+    },
+    {
+      "epoch": 2.443579766536965,
+      "grad_norm": 0.4010712206363678,
+      "learning_rate": 9.248583124159438e-06,
+      "loss": 0.7279,
+      "step": 157
+    },
+    {
+      "epoch": 2.4591439688715955,
+      "grad_norm": 0.40612390637397766,
+      "learning_rate": 8.757261257028777e-06,
+      "loss": 0.7278,
+      "step": 158
+    },
+    {
+      "epoch": 2.4747081712062258,
+      "grad_norm": 0.4142809510231018,
+      "learning_rate": 8.278093814307637e-06,
+      "loss": 0.7059,
+      "step": 159
+    },
+    {
+      "epoch": 2.490272373540856,
+      "grad_norm": 0.46315258741378784,
+      "learning_rate": 7.81122200884072e-06,
+      "loss": 0.7573,
+      "step": 160
+    },
+    {
+      "epoch": 2.5058365758754864,
+      "grad_norm": 0.38741129636764526,
+      "learning_rate": 7.356783429892023e-06,
+      "loss": 0.7816,
+      "step": 161
+    },
+    {
+      "epoch": 2.5214007782101167,
+      "grad_norm": 0.31710007786750793,
+      "learning_rate": 6.9149120025965905e-06,
+      "loss": 0.6611,
+      "step": 162
+    },
+    {
+      "epoch": 2.536964980544747,
+      "grad_norm": 0.3693569004535675,
+      "learning_rate": 6.4857379484922375e-06,
+      "loss": 0.8636,
+      "step": 163
+    },
+    {
+      "epoch": 2.5525291828793772,
+      "grad_norm": 0.34923169016838074,
+      "learning_rate": 6.069387747142591e-06,
+      "loss": 0.7413,
+      "step": 164
+    },
+    {
+      "epoch": 2.5680933852140075,
+      "grad_norm": 0.3575231432914734,
+      "learning_rate": 5.665984098862992e-06,
+      "loss": 0.7553,
+      "step": 165
+    },
+    {
+      "epoch": 2.5836575875486383,
+      "grad_norm": 0.3520582318305969,
+      "learning_rate": 5.275645888560232e-06,
+      "loss": 0.7394,
+      "step": 166
+    },
+    {
+      "epoch": 2.5992217898832686,
+      "grad_norm": 0.37276095151901245,
+      "learning_rate": 4.898488150696467e-06,
+      "loss": 0.7774,
+      "step": 167
+    },
+    {
+      "epoch": 2.614785992217899,
+      "grad_norm": 0.3675203323364258,
+      "learning_rate": 4.534622035388214e-06,
+      "loss": 0.7305,
+      "step": 168
+    },
+    {
+      "epoch": 2.630350194552529,
+      "grad_norm": 0.3742014467716217,
+      "learning_rate": 4.184154775649768e-06,
+      "loss": 0.7991,
+      "step": 169
+    },
+    {
+      "epoch": 2.6459143968871595,
+      "grad_norm": 0.38438355922698975,
+      "learning_rate": 3.8471896557912e-06,
+      "loss": 0.7519,
+      "step": 170
+    },
+    {
+      "epoch": 2.6614785992217898,
+      "grad_norm": 0.41580691933631897,
+      "learning_rate": 3.523825980979989e-06,
+      "loss": 0.7663,
+      "step": 171
+    },
+    {
+      "epoch": 2.6770428015564205,
+      "grad_norm": 0.4046590030193329,
+      "learning_rate": 3.2141590479753236e-06,
+      "loss": 0.7113,
+      "step": 172
+    },
+    {
+      "epoch": 2.692607003891051,
+      "grad_norm": 0.4197351038455963,
+      "learning_rate": 2.918280117043709e-06,
+      "loss": 0.8028,
+      "step": 173
+    },
+    {
+      "epoch": 2.708171206225681,
+      "grad_norm": 0.4047519862651825,
+      "learning_rate": 2.636276385064157e-06,
+      "loss": 0.7148,
+      "step": 174
+    },
+    {
+      "epoch": 2.7237354085603114,
+      "grad_norm": 0.42242541909217834,
+      "learning_rate": 2.3682309598308747e-06,
+      "loss": 0.6966,
+      "step": 175
+    },
+    {
+      "epoch": 2.7392996108949417,
+      "grad_norm": 0.4608907103538513,
+      "learning_rate": 2.114222835560986e-06,
+      "loss": 0.7255,
+      "step": 176
+    },
+    {
+      "epoch": 2.754863813229572,
+      "grad_norm": 0.4060788154602051,
+      "learning_rate": 1.8743268696145954e-06,
+      "loss": 0.833,
+      "step": 177
+    },
+    {
+      "epoch": 2.7704280155642023,
+      "grad_norm": 0.33689865469932556,
+      "learning_rate": 1.6486137604339813e-06,
+      "loss": 0.7972,
+      "step": 178
+    },
+    {
+      "epoch": 2.7859922178988326,
+      "grad_norm": 0.3548411726951599,
+      "learning_rate": 1.4371500267084338e-06,
+      "loss": 0.7801,
+      "step": 179
+    },
+    {
+      "epoch": 2.801556420233463,
+      "grad_norm": 0.3427428603172302,
+      "learning_rate": 1.2399979877708745e-06,
+      "loss": 0.7001,
+      "step": 180
+    },
+    {
+      "epoch": 2.817120622568093,
+      "grad_norm": 0.36466020345687866,
+      "learning_rate": 1.0572157452321097e-06,
+      "loss": 0.7898,
+      "step": 181
+    },
+    {
+      "epoch": 2.832684824902724,
+      "grad_norm": 0.39266034960746765,
+      "learning_rate": 8.888571658579703e-07,
+      "loss": 0.7809,
+      "step": 182
+    },
+    {
+      "epoch": 2.848249027237354,
+      "grad_norm": 0.3834577202796936,
+      "learning_rate": 7.349718656945504e-07,
+      "loss": 0.7889,
+      "step": 183
+    },
+    {
+      "epoch": 2.8638132295719845,
+      "grad_norm": 0.3879198133945465,
+      "learning_rate": 5.956051954461472e-07,
+      "loss": 0.8306,
+      "step": 184
+    },
+    {
+      "epoch": 2.8793774319066148,
+      "grad_norm": 0.36868515610694885,
+      "learning_rate": 4.7079822711015296e-07,
+      "loss": 0.7332,
+      "step": 185
+    },
+    {
+      "epoch": 2.894941634241245,
+      "grad_norm": 0.38267624378204346,
+      "learning_rate": 3.605877418729975e-07,
+      "loss": 0.6865,
+      "step": 186
+    },
+    {
+      "epoch": 2.9105058365758754,
+      "grad_norm": 0.39616188406944275,
+      "learning_rate": 2.6500621927054715e-07,
+      "loss": 0.7302,
+      "step": 187
+    },
+    {
+      "epoch": 2.926070038910506,
+      "grad_norm": 0.4041755199432373,
+      "learning_rate": 1.840818276162226e-07,
+      "loss": 0.8152,
+      "step": 188
+    },
+    {
+      "epoch": 2.9416342412451364,
+      "grad_norm": 0.4070306122303009,
+      "learning_rate": 1.1783841569968367e-07,
+      "loss": 0.6876,
+      "step": 189
+    },
+    {
+      "epoch": 2.9571984435797667,
+      "grad_norm": 0.42624837160110474,
+      "learning_rate": 6.629550575847354e-08,
+      "loss": 0.7026,
+      "step": 190
+    },
+    {
+      "epoch": 2.972762645914397,
+      "grad_norm": 0.4421103298664093,
+      "learning_rate": 2.946828772473764e-08,
+      "loss": 0.7546,
+      "step": 191
+    },
+    {
+      "epoch": 2.9883268482490273,
+      "grad_norm": 0.4374568462371826,
+      "learning_rate": 7.36761474865455e-09,
+      "loss": 0.7218,
+      "step": 192
+    },
+    {
+      "epoch": 3.0038910505836576,
+      "grad_norm": 0.8102526068687439,
+      "learning_rate": 0.0,
+      "loss": 1.2001,
+      "step": 193
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.879575712530432e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null