Training in progress, step 136, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +53 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:246bf8076c518304bac7d2e4a017f4c3112377a1aa2dc942314ee72e66aad1c1
 size 34793120

 version https://git-lfs.github.com/spec/v1
+oid sha256:e476837c264c187d86e0fad575c9ce12c9e61fb9e5ad9867160b79e2ffd22dd1
 size 34793120

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d5b03ad2888d0d966800d8a482ae7057a420d358a09e6227fbf94fc5b3dc03c3
 size 18132116

 version https://git-lfs.github.com/spec/v1
+oid sha256:fbcd527c4a25607f123a9175463e52fecf4cbf19113043db3c3d264cf07577b9
 size 18132116

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:51b20048a8c1ca3b5378d455de4604d2f5e3f3bf98e06ac306e6b0abcffe9c8c
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:691c06ffc8e47879d3e6ee441edbd180275d54de37278fc931ad6385e3470814
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c91934808157be4b4581cbac88c1dcb8ab73e7092f7b8aa05c4fbac8ab77615f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ffd0ef2a827b219b75915f5a88a30c53ebe86f536eec93a6252baab983329eb7
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.6565517241379311,
   "eval_steps": 17,
-  "global_step": 119,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -344,6 +344,56 @@
       "eval_samples_per_second": 68.518,
       "eval_steps_per_second": 8.761,
       "step": 119
     }
   ],
   "logging_steps": 3,
@@ -363,7 +413,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7564478581309440.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.7503448275862069,
   "eval_steps": 17,
+  "global_step": 136,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 68.518,
       "eval_steps_per_second": 8.761,
       "step": 119
+    },
+    {
+      "epoch": 0.6620689655172414,
+      "grad_norm": 0.5721688270568848,
+      "learning_rate": 3.772572564296005e-05,
+      "loss": 2.0894,
+      "step": 120
+    },
+    {
+      "epoch": 0.6786206896551724,
+      "grad_norm": 0.5939153432846069,
+      "learning_rate": 3.533749813077677e-05,
+      "loss": 2.0367,
+      "step": 123
+    },
+    {
+      "epoch": 0.6951724137931035,
+      "grad_norm": 0.5539590716362,
+      "learning_rate": 3.298534127791785e-05,
+      "loss": 2.1166,
+      "step": 126
+    },
+    {
+      "epoch": 0.7117241379310345,
+      "grad_norm": 0.5420939326286316,
+      "learning_rate": 3.0675041535377405e-05,
+      "loss": 1.9757,
+      "step": 129
+    },
+    {
+      "epoch": 0.7282758620689656,
+      "grad_norm": 0.5419110059738159,
+      "learning_rate": 2.8412282383075363e-05,
+      "loss": 1.9603,
+      "step": 132
+    },
+    {
+      "epoch": 0.7448275862068966,
+      "grad_norm": 0.5101600289344788,
+      "learning_rate": 2.6202630348146324e-05,
+      "loss": 1.9635,
+      "step": 135
+    },
+    {
+      "epoch": 0.7503448275862069,
+      "eval_loss": 2.1131086349487305,
+      "eval_runtime": 4.4579,
+      "eval_samples_per_second": 68.417,
+      "eval_steps_per_second": 8.748,
+      "step": 136
     }
   ],
   "logging_steps": 3,
       "attributes": {}
     }
   },
+  "total_flos": 8645118378639360.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null