Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +48 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:09e18b61ae27ec68b9b88d44e61be3008df85974a508e09b2d24bb905c81ded2
 size 1163996488

 version https://git-lfs.github.com/spec/v1
+oid sha256:ed0112a1fa1878bd35573c0fa0ddb8d84e6fd9901e238a34db7659a8624eff09
 size 1163996488

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3b130348ae3941f2079204df2d240458fcf468f254d8ee8d8b3fc84f5c89f90a
 size 325339796

 version https://git-lfs.github.com/spec/v1
+oid sha256:adf820a480dedb677a37abe0a7ec050b11be6d025757b8e7d13324c3e22f5c99
 size 325339796

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:75e72a99565ae8bb95213b93c1292dc017d22b82a9c2b29ae6424067697a68ab
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:4b57655dfbf233206ef3fe7fe48418b92685c119aada804482a248b58c86d31f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d4ffe1cc6011db9bcd26d2a5ee9a2f60bf90f43b3c6aed165ee32997fe344b31
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c41568ec8d15c28a2145796b65c5977bed142a18699c02f8db4b41d7a5440c4c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 2.153864622116089,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.036210018105009054,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -144,6 +144,49 @@
       "eval_samples_per_second": 17.302,
       "eval_steps_per_second": 4.333,
       "step": 150
     }
   ],
   "logging_steps": 10,
@@ -158,7 +201,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
@@ -167,12 +210,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.49468108668928e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 2.153864622116089,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.04828002414001207,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 17.302,
       "eval_steps_per_second": 4.333,
       "step": 150
+    },
+    {
+      "epoch": 0.03862401931200966,
+      "grad_norm": 4.070739269256592,
+      "learning_rate": 0.00018740803823691298,
+      "loss": 2.1066,
+      "step": 160
+    },
+    {
+      "epoch": 0.04103802051901026,
+      "grad_norm": 4.031443119049072,
+      "learning_rate": 0.00018193523609311556,
+      "loss": 2.1219,
+      "step": 170
+    },
+    {
+      "epoch": 0.04345202172601086,
+      "grad_norm": 5.493770599365234,
+      "learning_rate": 0.00017610710081049675,
+      "loss": 1.9506,
+      "step": 180
+    },
+    {
+      "epoch": 0.045866022933011466,
+      "grad_norm": 4.385707378387451,
+      "learning_rate": 0.00016995202647831142,
+      "loss": 2.0755,
+      "step": 190
+    },
+    {
+      "epoch": 0.04828002414001207,
+      "grad_norm": 8.468510627746582,
+      "learning_rate": 0.00016350000000000002,
+      "loss": 2.2808,
+      "step": 200
+    },
+    {
+      "epoch": 0.04828002414001207,
+      "eval_loss": 2.17218017578125,
+      "eval_runtime": 100.7675,
+      "eval_samples_per_second": 17.317,
+      "eval_steps_per_second": 4.337,
+      "step": 200
     }
   ],
   "logging_steps": 10,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.32624144891904e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null