Training in progress, step 99, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:67a0881e45d8104a5bf4f9ec3058a71b86346de1a5d2783a96d2102976c0bce6
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:2890b22b8d16b839d6a4f0baabe69535d30efc754d82431c76d1e5391daa8f13
 size 167832240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:34ef536bfbd191c1be5e59ff5b7fd6eb51628cfa7fe1277387bb487a1b44ecff
 size 85723284

 version https://git-lfs.github.com/spec/v1
+oid sha256:e90280d950db50e3fbd0d35bc1ccd287212b1696debbb91484f78d9c90d3ceb8
 size 85723284

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2d60c52c89ae1b8dea8b0e631db813fb01835161912d95e4b3c74f23bd6bb801
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f669c4ad6b2bd61dcd9030272aae3f63c838ba4d3048aebbb119d24f228c7544
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0569f9f06b6a114946825bdc699040cc55958cfef46aba8eb5fb625896d49025
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5b64833e2721bfb57dae2a544bde34ace02492c2f80bfbff48034c9946dc6b4a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.017922039129785433,
   "eval_steps": 9,
-  "global_step": 90,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -305,6 +305,35 @@
       "eval_samples_per_second": 13.48,
       "eval_steps_per_second": 1.686,
       "step": 90
     }
   ],
   "logging_steps": 3,
@@ -324,7 +353,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.3330164053390131e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.019714243042763976,
   "eval_steps": 9,
+  "global_step": 99,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 13.48,
       "eval_steps_per_second": 1.686,
       "step": 90
+    },
+    {
+      "epoch": 0.018519440434111613,
+      "grad_norm": 4.4008331298828125,
+      "learning_rate": 7.426068431000882e-07,
+      "loss": 6.3269,
+      "step": 93
+    },
+    {
+      "epoch": 0.019116841738437796,
+      "grad_norm": 3.7891712188720703,
+      "learning_rate": 2.4329828146074095e-07,
+      "loss": 6.312,
+      "step": 96
+    },
+    {
+      "epoch": 0.019714243042763976,
+      "grad_norm": 4.460823059082031,
+      "learning_rate": 1.522932452260595e-08,
+      "loss": 6.0766,
+      "step": 99
+    },
+    {
+      "epoch": 0.019714243042763976,
+      "eval_loss": 1.5946401357650757,
+      "eval_runtime": 626.8725,
+      "eval_samples_per_second": 13.492,
+      "eval_steps_per_second": 1.688,
+      "step": 99
     }
   ],
   "logging_steps": 3,
       "attributes": {}
     }
   },
+  "total_flos": 1.4666697652410778e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null