Training in progress, step 114, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +102 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:82d67cc171e7cef6437374d3890cd52fa82c29c2f8dc74627b09f129bb934dcb
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d1d7f7a7fa25cc05666325cc38bf37fb2d4071bf5962f9a6829452a81194d16
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:04c5cbf95495eff51819f747c1308edcb572b9f9ca9dfc663ea03d9c1d86c57e
 size 341314196

 version https://git-lfs.github.com/spec/v1
+oid sha256:345fbfe4ce9d0ba2b9385682a1e993acb3dceb99fae2cf0641e6538f6fbf117f
 size 341314196

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ddf2d95f55605664fc73443e4d56e2c5a5e0fe63fdd9ec66adb81e46fa7f6d80
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2065d66796103fae2f54ca58f9ec7a05aa56d2f1117a1081e676ca9be8ce846
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ca7e3cdff407f71f983ac7ffef25e6d551bbdff8be7e11f26e01dcebe54f37b0
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c1d5246f77ee07700ec5a7b53a9e537af6a636eb3ad53e20a4a10e8c7238521f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.8274506330490112,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 2.6315789473684212,
   "eval_steps": 50,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -731,6 +731,104 @@
       "eval_samples_per_second": 13.385,
       "eval_steps_per_second": 3.346,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -754,12 +852,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.4247389962646323e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.8274506330490112,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 3.0,
   "eval_steps": 50,
+  "global_step": 114,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 13.385,
       "eval_steps_per_second": 3.346,
       "step": 100
+    },
+    {
+      "epoch": 2.6578947368421053,
+      "grad_norm": 7.54060697555542,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 0.2268,
+      "step": 101
+    },
+    {
+      "epoch": 2.6842105263157894,
+      "grad_norm": 9.745018005371094,
+      "learning_rate": 3.249187865729264e-06,
+      "loss": 0.5083,
+      "step": 102
+    },
+    {
+      "epoch": 2.7105263157894735,
+      "grad_norm": 6.40946102142334,
+      "learning_rate": 2.7350092482679836e-06,
+      "loss": 0.202,
+      "step": 103
+    },
+    {
+      "epoch": 2.736842105263158,
+      "grad_norm": 7.4240217208862305,
+      "learning_rate": 2.2639566745727205e-06,
+      "loss": 0.9122,
+      "step": 104
+    },
+    {
+      "epoch": 2.763157894736842,
+      "grad_norm": 6.273193836212158,
+      "learning_rate": 1.8364599476241862e-06,
+      "loss": 0.4703,
+      "step": 105
+    },
+    {
+      "epoch": 2.7894736842105265,
+      "grad_norm": 7.9733967781066895,
+      "learning_rate": 1.4529091286973995e-06,
+      "loss": 0.4487,
+      "step": 106
+    },
+    {
+      "epoch": 2.8157894736842106,
+      "grad_norm": 7.2762579917907715,
+      "learning_rate": 1.1136541814576573e-06,
+      "loss": 0.3321,
+      "step": 107
+    },
+    {
+      "epoch": 2.8421052631578947,
+      "grad_norm": 6.559560298919678,
+      "learning_rate": 8.190046526428242e-07,
+      "loss": 0.3086,
+      "step": 108
+    },
+    {
+      "epoch": 2.8684210526315788,
+      "grad_norm": 7.2038397789001465,
+      "learning_rate": 5.692293896232936e-07,
+      "loss": 0.2334,
+      "step": 109
+    },
+    {
+      "epoch": 2.8947368421052633,
+      "grad_norm": 8.095062255859375,
+      "learning_rate": 3.6455629509730136e-07,
+      "loss": 0.3355,
+      "step": 110
+    },
+    {
+      "epoch": 2.9210526315789473,
+      "grad_norm": 8.475146293640137,
+      "learning_rate": 2.0517211914545254e-07,
+      "loss": 0.2103,
+      "step": 111
+    },
+    {
+      "epoch": 2.9473684210526314,
+      "grad_norm": 8.619668960571289,
+      "learning_rate": 9.12222888341252e-08,
+      "loss": 0.2427,
+      "step": 112
+    },
+    {
+      "epoch": 2.973684210526316,
+      "grad_norm": 5.392899036407471,
+      "learning_rate": 2.2810775523329773e-08,
+      "loss": 0.3473,
+      "step": 113
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 12.812440872192383,
+      "learning_rate": 0.0,
+      "loss": 0.3174,
+      "step": 114
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.6219486313408102e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null