Training in progress, step 72, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +66 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f652211dac610e80872598da1df98e590795842f441ad646c668780471aa84c1
 size 319876032

 version https://git-lfs.github.com/spec/v1
+oid sha256:f9e3f00b92269c5bdae63492c182a839f8e5e904876efb0a0a617caf4cd2237d
 size 319876032

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b61ce972ea96e2306fbf26a1d40d3335709da2c916fff3ac1116fa3c2f0f5737
 size 640009682

 version https://git-lfs.github.com/spec/v1
+oid sha256:5b617c63d1a2a5b2653a62ea756af8d2d2118bd4e37e6ba99d0c7b3fab15a255
 size 640009682

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c1098628841f4bcb07d5546445f4de27d56441cc7dc92917c1ddcac8b9e18a99
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:a39a051c4c7e5dc853790ab7eaf069e615c99cbb9b4d6bc98eaf5292d23ad463
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d85eac2daddfe8e3f78a5d6ef1e9ba13c04651694635a9ed76369a20726389db
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:8d43bd90ad476e419738deb9472ad85fd5991005a147e1627aa99867bdfc5655
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.3557498318762609,
   "eval_steps": 50,
-  "global_step": 63,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -464,6 +464,69 @@
       "learning_rate": 6.618469519066217e-05,
       "loss": 25.726,
       "step": 63
     }
   ],
   "logging_steps": 1,
@@ -483,7 +546,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.3132684787266355e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.5494283792871553,
   "eval_steps": 50,
+  "global_step": 72,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 6.618469519066217e-05,
       "loss": 25.726,
       "step": 63
+    },
+    {
+      "epoch": 1.3772696704774714,
+      "grad_norm": 17.17249870300293,
+      "learning_rate": 6.326741512198266e-05,
+      "loss": 26.7663,
+      "step": 64
+    },
+    {
+      "epoch": 1.398789509078682,
+      "grad_norm": 21.07649803161621,
+      "learning_rate": 6.036822584879038e-05,
+      "loss": 17.8787,
+      "step": 65
+    },
+    {
+      "epoch": 1.4203093476798925,
+      "grad_norm": 18.028573989868164,
+      "learning_rate": 5.7491597710807114e-05,
+      "loss": 16.4428,
+      "step": 66
+    },
+    {
+      "epoch": 1.4418291862811028,
+      "grad_norm": 20.420089721679688,
+      "learning_rate": 5.464196626011943e-05,
+      "loss": 21.7565,
+      "step": 67
+    },
+    {
+      "epoch": 1.4633490248823133,
+      "grad_norm": 16.814950942993164,
+      "learning_rate": 5.182372542187895e-05,
+      "loss": 21.8306,
+      "step": 68
+    },
+    {
+      "epoch": 1.4848688634835239,
+      "grad_norm": 18.8593692779541,
+      "learning_rate": 4.904122071918801e-05,
+      "loss": 21.4311,
+      "step": 69
+    },
+    {
+      "epoch": 1.5063887020847344,
+      "grad_norm": 21.228858947753906,
+      "learning_rate": 4.6298742572618266e-05,
+      "loss": 24.4591,
+      "step": 70
+    },
+    {
+      "epoch": 1.527908540685945,
+      "grad_norm": 22.029273986816406,
+      "learning_rate": 4.360051968469291e-05,
+      "loss": 27.6417,
+      "step": 71
+    },
+    {
+      "epoch": 1.5494283792871553,
+      "grad_norm": 22.491724014282227,
+      "learning_rate": 4.095071251953399e-05,
+      "loss": 28.1538,
+      "step": 72
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 3.786592547116155e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null