Training in progress, step 187, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +53 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:173d604cc61bfdc03a6439d4bd9bca0e6844c93775e18fab1195e363362ee3c3
 size 159967880

 version https://git-lfs.github.com/spec/v1
+oid sha256:5ab8bd92a5457e6a83088d6ee86aea30cc7d3d313ed238f6a9556ae9af5d9523
 size 159967880

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8797cf582d42f22551802ecf9adfcff97688f7c5c3eaf86242092dd821c8bf5c
 size 81730196

 version https://git-lfs.github.com/spec/v1
+oid sha256:b5327897190b510245142ef3102eb541973d4181966402c0b68263bae8e7c2d1
 size 81730196

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:450a0bca7a68bb7a683174c2bc1009464e30a352c58b6a2d436b2e9c90d821ae
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:b788255f43ed346a2f05cbdfc10299905ae58c13da185ea457f87ede82c55897
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fab1f30942a5e2bb9a9e5cc3477ef48cdcd39a7b78f8a45a46db0926bdbf2b4f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:43427779b8e6e83ef9780afcdea7472d2419de40e6f85cfedb90b60f89a2ff0f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.2972972972972974,
   "eval_steps": 17,
-  "global_step": 170,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -487,6 +487,56 @@
       "eval_samples_per_second": 7.249,
       "eval_steps_per_second": 0.928,
       "step": 170
     }
   ],
   "logging_steps": 3,
@@ -506,7 +556,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.471231801888604e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.527027027027027,
   "eval_steps": 17,
+  "global_step": 187,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 7.249,
       "eval_steps_per_second": 0.928,
       "step": 170
+    },
+    {
+      "epoch": 2.310810810810811,
+      "grad_norm": 0.5467652082443237,
+      "learning_rate": 5.6388590278194096e-06,
+      "loss": 0.8334,
+      "step": 171
+    },
+    {
+      "epoch": 2.3513513513513513,
+      "grad_norm": 0.534943699836731,
+      "learning_rate": 4.549673247541875e-06,
+      "loss": 0.8773,
+      "step": 174
+    },
+    {
+      "epoch": 2.391891891891892,
+      "grad_norm": 0.5133833885192871,
+      "learning_rate": 3.5722980755146517e-06,
+      "loss": 0.8081,
+      "step": 177
+    },
+    {
+      "epoch": 2.4324324324324325,
+      "grad_norm": 0.5191574096679688,
+      "learning_rate": 2.7091379149682685e-06,
+      "loss": 0.8595,
+      "step": 180
+    },
+    {
+      "epoch": 2.472972972972973,
+      "grad_norm": 0.5596672892570496,
+      "learning_rate": 1.962316193157593e-06,
+      "loss": 0.8525,
+      "step": 183
+    },
+    {
+      "epoch": 2.5135135135135136,
+      "grad_norm": 0.5567631125450134,
+      "learning_rate": 1.333670137599713e-06,
+      "loss": 0.8269,
+      "step": 186
+    },
+    {
+      "epoch": 2.527027027027027,
+      "eval_loss": 0.9086253643035889,
+      "eval_runtime": 17.2348,
+      "eval_samples_per_second": 7.253,
+      "eval_steps_per_second": 0.928,
+      "step": 187
     }
   ],
   "logging_steps": 3,
       "attributes": {}
     }
   },
+  "total_flos": 4.919508635693875e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null