Training in progress, step 150, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +48 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1fa695feddce03c21355299741548ef01414a73e84113e5eeaf6585af14973b4
 size 100697728

 version https://git-lfs.github.com/spec/v1
+oid sha256:b7f918c26d69f33331d37b75b868e141ca18d1cdd93f49fde57032a9c8ee4c15
 size 100697728

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:60ebdd50680977b4a41233c3a9447e96277631cd19f43cc43d573d351750c516
 size 51418196

 version https://git-lfs.github.com/spec/v1
+oid sha256:44afee9e69378b5db64658d6fbab28fed6f648f9009b03a36a1c8afab726f091
 size 51418196

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:518e3d3c2c08ef3f14d6f9dca0c125a6a693fcc7f4de3079ce531745772767d3
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:92841b692d803cc88356b7ad5ae13429ba7f74f4dcb983a5058e2275b1e31d52
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0051c53bcb92b7c913136d782f625b409707ede35cdcc9bbc83a63d788098e04
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:596785cc644037bdf9b1374ba5340995054de5f4bde563878d8bc4f03a7aa10e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.8446639180183411,
-  "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.11768167107972934,
   "eval_steps": 50,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -101,6 +101,49 @@
       "eval_samples_per_second": 22.121,
       "eval_steps_per_second": 5.53,
       "step": 100
     }
   ],
   "logging_steps": 10,
@@ -129,7 +172,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6.723634005540864e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.8176446557044983,
+  "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 0.176522506619594,
   "eval_steps": 50,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 22.121,
       "eval_steps_per_second": 5.53,
       "step": 100
+    },
+    {
+      "epoch": 0.12944983818770225,
+      "grad_norm": 0.867832362651825,
+      "learning_rate": 0.0001861554081393806,
+      "loss": 6.0238,
+      "step": 110
+    },
+    {
+      "epoch": 0.1412180052956752,
+      "grad_norm": 0.8156207203865051,
+      "learning_rate": 0.0001833313919082515,
+      "loss": 4.7902,
+      "step": 120
+    },
+    {
+      "epoch": 0.15298617240364812,
+      "grad_norm": 0.802191436290741,
+      "learning_rate": 0.00018027116379309638,
+      "loss": 2.8416,
+      "step": 130
+    },
+    {
+      "epoch": 0.16475433951162108,
+      "grad_norm": 1.274532675743103,
+      "learning_rate": 0.00017698339834299061,
+      "loss": 1.3511,
+      "step": 140
+    },
+    {
+      "epoch": 0.176522506619594,
+      "grad_norm": 1.8214877843856812,
+      "learning_rate": 0.00017347741508630672,
+      "loss": 1.4454,
+      "step": 150
+    },
+    {
+      "epoch": 0.176522506619594,
+      "eval_loss": 0.8176446557044983,
+      "eval_runtime": 64.9157,
+      "eval_samples_per_second": 22.059,
+      "eval_steps_per_second": 5.515,
+      "step": 150
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.010387192339497e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null