Training in progress, step 50, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +81 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a8e48345b6f49cde9a04076d158f14096ad439b59ab1ce9fe81aac7b73f1083e
 size 80013120

 version https://git-lfs.github.com/spec/v1
+oid sha256:d5fd24818ed1bd35dcd9daac2476d80b2c4cf9879bf2f505c1f36c6f7e260c4e
 size 80013120

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dd2f27261211fb576f520499eefd608b6b6315a9fd1fe80853a5d98e349308f8
 size 41119636

 version https://git-lfs.github.com/spec/v1
+oid sha256:92f1dee8546faa93128c54691b6817ff51c1d991d4a87a8d887301777719d0ff
 size 41119636

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ed392f1e42c4e7f65661970b1ec941968361613202f48a3141f9aacaa4003064
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:b0ba5d87c7444a833ee26f2d1c68d635206254e2a48012a055a0b86cb260dd27
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:28418a35cb7e15ebbce37743b08fd366c25ee320167b307a3e449a74781d02de
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:439e51f57871ee9c2bc8b35458a0c03f9b948af7a0d15ffe5e1cf9789955c6c8
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.007752708478282477,
   "eval_steps": 13,
-  "global_step": 39,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -312,6 +312,83 @@
       "eval_samples_per_second": 27.902,
       "eval_steps_per_second": 13.957,
       "step": 39
     }
   ],
   "logging_steps": 1,
@@ -326,12 +403,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 6392777558458368.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.009939369843951893,
   "eval_steps": 13,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 27.902,
       "eval_steps_per_second": 13.957,
       "step": 39
+    },
+    {
+      "epoch": 0.007951495875161516,
+      "grad_norm": 3.137924909591675,
+      "learning_rate": 7.3223304703363135e-06,
+      "loss": 12.3101,
+      "step": 40
+    },
+    {
+      "epoch": 0.008150283272040553,
+      "grad_norm": 2.641319751739502,
+      "learning_rate": 5.989850859999227e-06,
+      "loss": 11.4512,
+      "step": 41
+    },
+    {
+      "epoch": 0.00834907066891959,
+      "grad_norm": 3.6729891300201416,
+      "learning_rate": 4.7745751406263165e-06,
+      "loss": 11.6769,
+      "step": 42
+    },
+    {
+      "epoch": 0.008547858065798629,
+      "grad_norm": 3.528848648071289,
+      "learning_rate": 3.6839958911476957e-06,
+      "loss": 11.1653,
+      "step": 43
+    },
+    {
+      "epoch": 0.008746645462677666,
+      "grad_norm": 3.0228517055511475,
+      "learning_rate": 2.7248368952908053e-06,
+      "loss": 10.7299,
+      "step": 44
+    },
+    {
+      "epoch": 0.008945432859556703,
+      "grad_norm": 3.2013304233551025,
+      "learning_rate": 1.9030116872178316e-06,
+      "loss": 12.7148,
+      "step": 45
+    },
+    {
+      "epoch": 0.009144220256435742,
+      "grad_norm": 2.7772860527038574,
+      "learning_rate": 1.2235870926211619e-06,
+      "loss": 11.8285,
+      "step": 46
+    },
+    {
+      "epoch": 0.00934300765331478,
+      "grad_norm": 3.5433526039123535,
+      "learning_rate": 6.907519900580861e-07,
+      "loss": 11.6592,
+      "step": 47
+    },
+    {
+      "epoch": 0.009541795050193818,
+      "grad_norm": 3.839385509490967,
+      "learning_rate": 3.077914851215585e-07,
+      "loss": 11.4824,
+      "step": 48
+    },
+    {
+      "epoch": 0.009740582447072856,
+      "grad_norm": 2.9668209552764893,
+      "learning_rate": 7.706665667180091e-08,
+      "loss": 11.6165,
+      "step": 49
+    },
+    {
+      "epoch": 0.009939369843951893,
+      "grad_norm": 3.1155428886413574,
+      "learning_rate": 0.0,
+      "loss": 12.5005,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 8184384007962624.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null