Training in progress, step 172, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +158 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b306333575fc7fd7bb28f0bd22ea0b5791041a3a2f01a1616ba2ea08acfd831c
 size 295488936

 version https://git-lfs.github.com/spec/v1
+oid sha256:56f731df1d4febec950be79145e7379e0e79dc54279b65427e78338e460746f7
 size 295488936

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b65dc3a521ad03594ab8f9c63b3e882b6e5cf1b3ad7ed08c81c1d620fc62d6db
 size 150486964

 version https://git-lfs.github.com/spec/v1
+oid sha256:78e34273ac0caf4a64ed8e6d3efc5804780af801d215059967d19b398187a979
 size 150486964

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c4e2483b8926df6c53297e391517b8cd7f5ba3ae446c4a91d29ee2b4c539272c
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:fb46c97376cc29ab421ace0b527386279777c610aff138cdf5af0397d77787b0
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7bc2ced7b135c293cfea73ba311aa2c673462445da1a975d4c5be5ca782caaa5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:7d5dcbabca39db933080949f3d050deaca52ed2481401ecb28d097a787c7bad9
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.07863566279411316,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 2.6200873362445414,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,160 @@
       "eval_samples_per_second": 33.272,
       "eval_steps_per_second": 8.575,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1266,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.128095383584768e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.07863566279411316,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 3.004366812227074,
   "eval_steps": 50,
+  "global_step": 172,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 33.272,
       "eval_steps_per_second": 8.575,
       "step": 150
+    },
+    {
+      "epoch": 2.6375545851528384,
+      "grad_norm": 0.7440508604049683,
+      "learning_rate": 4.089194655986306e-06,
+      "loss": 0.0413,
+      "step": 151
+    },
+    {
+      "epoch": 2.6550218340611353,
+      "grad_norm": 0.4938758313655853,
+      "learning_rate": 3.7138015365554833e-06,
+      "loss": 0.0321,
+      "step": 152
+    },
+    {
+      "epoch": 2.6724890829694323,
+      "grad_norm": 0.47660332918167114,
+      "learning_rate": 3.3558147633999728e-06,
+      "loss": 0.0272,
+      "step": 153
+    },
+    {
+      "epoch": 2.6899563318777293,
+      "grad_norm": 0.7508668899536133,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 0.0389,
+      "step": 154
+    },
+    {
+      "epoch": 2.7074235807860263,
+      "grad_norm": 2.5183489322662354,
+      "learning_rate": 2.692592156212487e-06,
+      "loss": 0.0222,
+      "step": 155
+    },
+    {
+      "epoch": 2.7248908296943233,
+      "grad_norm": 0.7234876751899719,
+      "learning_rate": 2.3876057330792346e-06,
+      "loss": 0.0377,
+      "step": 156
+    },
+    {
+      "epoch": 2.74235807860262,
+      "grad_norm": 0.44617539644241333,
+      "learning_rate": 2.100524384225555e-06,
+      "loss": 0.0596,
+      "step": 157
+    },
+    {
+      "epoch": 2.7598253275109172,
+      "grad_norm": 0.5034026503562927,
+      "learning_rate": 1.8314560692059835e-06,
+      "loss": 0.0542,
+      "step": 158
+    },
+    {
+      "epoch": 2.777292576419214,
+      "grad_norm": 0.45781925320625305,
+      "learning_rate": 1.5805019736097104e-06,
+      "loss": 0.0488,
+      "step": 159
+    },
+    {
+      "epoch": 2.7947598253275108,
+      "grad_norm": 0.39495936036109924,
+      "learning_rate": 1.3477564710088098e-06,
+      "loss": 0.0281,
+      "step": 160
+    },
+    {
+      "epoch": 2.8122270742358078,
+      "grad_norm": 0.6215051412582397,
+      "learning_rate": 1.1333070874682216e-06,
+      "loss": 0.0502,
+      "step": 161
+    },
+    {
+      "epoch": 2.8296943231441047,
+      "grad_norm": 0.27189934253692627,
+      "learning_rate": 9.372344686307655e-07,
+      "loss": 0.0157,
+      "step": 162
+    },
+    {
+      "epoch": 2.8471615720524017,
+      "grad_norm": 0.4099377691745758,
+      "learning_rate": 7.596123493895991e-07,
+      "loss": 0.0285,
+      "step": 163
+    },
+    {
+      "epoch": 2.8646288209606987,
+      "grad_norm": 0.4258248209953308,
+      "learning_rate": 6.005075261595494e-07,
+      "loss": 0.0281,
+      "step": 164
+    },
+    {
+      "epoch": 2.8820960698689957,
+      "grad_norm": 0.3853057026863098,
+      "learning_rate": 4.5997983175773417e-07,
+      "loss": 0.0248,
+      "step": 165
+    },
+    {
+      "epoch": 2.8995633187772927,
+      "grad_norm": 0.503648042678833,
+      "learning_rate": 3.380821129028489e-07,
+      "loss": 0.0272,
+      "step": 166
+    },
+    {
+      "epoch": 2.9170305676855897,
+      "grad_norm": 4.854583740234375,
+      "learning_rate": 2.3486021034170857e-07,
+      "loss": 0.0287,
+      "step": 167
+    },
+    {
+      "epoch": 2.934497816593886,
+      "grad_norm": 0.6928201913833618,
+      "learning_rate": 1.503529416103988e-07,
+      "loss": 0.0327,
+      "step": 168
+    },
+    {
+      "epoch": 2.9519650655021836,
+      "grad_norm": 0.25627920031547546,
+      "learning_rate": 8.459208643659122e-08,
+      "loss": 0.011,
+      "step": 169
+    },
+    {
+      "epoch": 2.96943231441048,
+      "grad_norm": 0.8357424736022949,
+      "learning_rate": 3.760237478849793e-08,
+      "loss": 0.0533,
+      "step": 170
+    },
+    {
+      "epoch": 2.986899563318777,
+      "grad_norm": 2.073831081390381,
+      "learning_rate": 9.401477574932926e-09,
+      "loss": 0.0415,
+      "step": 171
+    },
+    {
+      "epoch": 3.004366812227074,
+      "grad_norm": 0.7584478259086609,
+      "learning_rate": 0.0,
+      "loss": 0.0659,
+      "step": 172
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.729365147942912e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null