Training in progress, step 72, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:569475ef979edab3c418976d9c91404412b404952406423eb2e9ccff805dc5cb
 size 159967880

 version https://git-lfs.github.com/spec/v1
+oid sha256:c78c4039c0ef5d48c84b09fee69de715cdf087602671509298654b1e25b665ad
 size 159967880

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ce08756aa8570024caa604b64eed8a0ee274be2e8e1985fbb30c916f38666ade
 size 81730196

 version https://git-lfs.github.com/spec/v1
+oid sha256:64cfff51fa60393ab818a80b234d34156b8ff4bb4da412e6a5ef4af60f457bbf
 size 81730196

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:37a754b69282774388bd86935d90c82a6607dc2b521ab62662f3cd5555009319
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:8a765c29edce8e690169b13766c8687b0524f8bf018b715481afc8aa10e7fa5e
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:922634a168fad3088c2a461ec82359f2941891b1472f492b835996e27c3cba9d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d899e046e5eb8fee9b81979db8db7b88d6dc92f30a13d049b906f3a3be1dfc0f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.04428044280442804,
   "eval_steps": 9,
-  "global_step": 63,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -218,6 +218,35 @@
       "eval_samples_per_second": 7.112,
       "eval_steps_per_second": 0.89,
       "step": 63
     }
   ],
   "logging_steps": 3,
@@ -237,7 +266,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.6659571745488896e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.050606220347917764,
   "eval_steps": 9,
+  "global_step": 72,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 7.112,
       "eval_steps_per_second": 0.89,
       "step": 63
+    },
+    {
+      "epoch": 0.04638903531892462,
+      "grad_norm": 0.22084610164165497,
+      "learning_rate": 1.56348351646022e-05,
+      "loss": 0.7496,
+      "step": 66
+    },
+    {
+      "epoch": 0.048497627833421195,
+      "grad_norm": 0.18980462849140167,
+      "learning_rate": 1.3263210930352737e-05,
+      "loss": 0.7441,
+      "step": 69
+    },
+    {
+      "epoch": 0.050606220347917764,
+      "grad_norm": 0.18127837777137756,
+      "learning_rate": 1.1020177413231334e-05,
+      "loss": 0.6734,
+      "step": 72
+    },
+    {
+      "epoch": 0.050606220347917764,
+      "eval_loss": 0.7157524824142456,
+      "eval_runtime": 337.0656,
+      "eval_samples_per_second": 7.111,
+      "eval_steps_per_second": 0.89,
+      "step": 72
     }
   ],
   "logging_steps": 3,
       "attributes": {}
     }
   },
+  "total_flos": 1.9039510566273024e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null