Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +48 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:770c5ef10e164b978041217efa00995725baee1a96e1bcab08e7c0f08a1a1142
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:75f59464bfc44a795ce67c13121418dcb7761ee089cb4bd4498344162bfbd975
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:15b57d57833f094609f80c37c5bd1f6d794f57eed1b2568d8bd0fe6960af82d5
 size 341314196

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c0b8266476e98e91064e6e8cb6cb63d19610892851ea71d739ed34351148dfa
 size 341314196

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e6cb22d889af96bc5968098482fe54159da994b406807fec9cba907cd974afc0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:987bfc5266f94f9eaf4eb2e472c93f1ab68943e41dbe5bc6bc4a25adabfaef8c
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:59836f5c73dcbc1cd9e839a79f50d96177860a3d03c4212fba8b4249bf77b097
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5bd3e6d855545c1432814c389e729ea8251b6467b50d9c34c2b5ab61d032f043
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.6451599597930908,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.45112781954887216,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -144,6 +144,49 @@
       "eval_samples_per_second": 16.363,
       "eval_steps_per_second": 4.091,
       "step": 150
     }
   ],
   "logging_steps": 10,
@@ -172,7 +215,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.82846296014848e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.3894352912902832,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.6015037593984962,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 16.363,
       "eval_steps_per_second": 4.091,
       "step": 150
+    },
+    {
+      "epoch": 0.48120300751879697,
+      "grad_norm": 6.32410192489624,
+      "learning_rate": 0.00014634335741888678,
+      "loss": 1.5461,
+      "step": 160
+    },
+    {
+      "epoch": 0.5112781954887218,
+      "grad_norm": 3.9985594749450684,
+      "learning_rate": 0.00013476898507990882,
+      "loss": 1.1999,
+      "step": 170
+    },
+    {
+      "epoch": 0.5413533834586466,
+      "grad_norm": 5.48227071762085,
+      "learning_rate": 0.00012287737989477975,
+      "loss": 1.088,
+      "step": 180
+    },
+    {
+      "epoch": 0.5714285714285714,
+      "grad_norm": 6.1915602684021,
+      "learning_rate": 0.0001108149352020996,
+      "loss": 1.3135,
+      "step": 190
+    },
+    {
+      "epoch": 0.6015037593984962,
+      "grad_norm": 8.36176586151123,
+      "learning_rate": 9.873014748512275e-05,
+      "loss": 1.4151,
+      "step": 200
+    },
+    {
+      "epoch": 0.6015037593984962,
+      "eval_loss": 1.3894352912902832,
+      "eval_runtime": 8.5422,
+      "eval_samples_per_second": 16.389,
+      "eval_steps_per_second": 4.097,
+      "step": 200
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 3.77128394686464e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null