Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +187 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1ae8ace300e12d30f6390a2cc072fdb9f1e0a53fcc779ce5238ef148ff276a31
 size 319876032

 version https://git-lfs.github.com/spec/v1
+oid sha256:2c7440a946adeccaa1f9a32e13d99cc424d2332aa23836e46a7c83ce0cd5f1f1
 size 319876032

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1d1d0fa5ca01c2727c53be24cd18ac445a32f10ad07f47c05a7ad42396defcf1
 size 640009682

 version https://git-lfs.github.com/spec/v1
+oid sha256:fa4f056aa3b92eb92f523c56049d5267a5740650caefd455091507520565fba7
 size 640009682

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:578acde78f6a447fc0517aac23d36ee69192b3fc3b75bdf58c30ec072bad5ed1
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:c0ab97c302f39989fc6757542b79e173aa6fa52c9a999a25cf9e5df7887bc07a
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:235a0975b4079872ea2050c463fa1daf74c9576c2b61f34bbefa1ec24e4b7039
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:dc20fc93eb5807d5c8e1e305f27f8fcc425c469a673f2b881a2757fdda0fc540
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9f19bff52b62c696a56800de8e38cf1898306741a00c6a31a09bfef0ba022a50
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:2adee19f518f2ca0ee66e38799b19a260ce8216ca8128b9b2d2811fc62080a0e
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f7954b71caea69a964dbdb05f04f665a35c2ca6347f24d6b9956fb7d657ecc84
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:f8fb9fa190f3ef9ee464f7f1ab0ce9585c4244e207f577f243ae2b4753311a3d
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d271cdb95f63cd655315f063ca2e25c78dc5ae4275523c5d4f80f367586b3351
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5607f6de446164d9d9adb8b91c44cec55b14aa391e24ba5637c08b834eedda2a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.03438346155499205,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 17.109,
       "eval_steps_per_second": 4.448,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -212,12 +395,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.629552387915776e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0687669231099841,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 17.109,
       "eval_steps_per_second": 4.448,
       "step": 25
+    },
+    {
+      "epoch": 0.03575880001719173,
+      "grad_norm": 36.2202262878418,
+      "learning_rate": 5.500000000000001e-05,
+      "loss": 142.4944,
+      "step": 26
+    },
+    {
+      "epoch": 0.03713413847939141,
+      "grad_norm": 93.85054779052734,
+      "learning_rate": 5.205685918464356e-05,
+      "loss": 144.9992,
+      "step": 27
+    },
+    {
+      "epoch": 0.0385094769415911,
+      "grad_norm": 148.26992797851562,
+      "learning_rate": 4.912632135009769e-05,
+      "loss": 149.2477,
+      "step": 28
+    },
+    {
+      "epoch": 0.039884815403790776,
+      "grad_norm": 100.91616821289062,
+      "learning_rate": 4.6220935509274235e-05,
+      "loss": 148.9117,
+      "step": 29
+    },
+    {
+      "epoch": 0.04126015386599046,
+      "grad_norm": 26.545833587646484,
+      "learning_rate": 4.3353142970386564e-05,
+      "loss": 148.5461,
+      "step": 30
+    },
+    {
+      "epoch": 0.04263549232819014,
+      "grad_norm": 77.59082794189453,
+      "learning_rate": 4.053522406135775e-05,
+      "loss": 151.5493,
+      "step": 31
+    },
+    {
+      "epoch": 0.044010830790389824,
+      "grad_norm": 120.279541015625,
+      "learning_rate": 3.777924554357096e-05,
+      "loss": 149.7922,
+      "step": 32
+    },
+    {
+      "epoch": 0.0453861692525895,
+      "grad_norm": 99.76937103271484,
+      "learning_rate": 3.509700894014496e-05,
+      "loss": 154.2675,
+      "step": 33
+    },
+    {
+      "epoch": 0.04676150771478919,
+      "grad_norm": 52.80602264404297,
+      "learning_rate": 3.250000000000001e-05,
+      "loss": 154.6349,
+      "step": 34
+    },
+    {
+      "epoch": 0.048136846176988865,
+      "grad_norm": 15.08095645904541,
+      "learning_rate": 2.9999339514117912e-05,
+      "loss": 155.4922,
+      "step": 35
+    },
+    {
+      "epoch": 0.04951218463918855,
+      "grad_norm": 63.00039291381836,
+      "learning_rate": 2.760573569460757e-05,
+      "loss": 156.5996,
+      "step": 36
+    },
+    {
+      "epoch": 0.050887523101388235,
+      "grad_norm": 71.60152435302734,
+      "learning_rate": 2.53294383204969e-05,
+      "loss": 157.0395,
+      "step": 37
+    },
+    {
+      "epoch": 0.05226286156358791,
+      "grad_norm": 13.717570304870605,
+      "learning_rate": 2.3180194846605367e-05,
+      "loss": 153.0983,
+      "step": 38
+    },
+    {
+      "epoch": 0.0536382000257876,
+      "grad_norm": 52.51483154296875,
+      "learning_rate": 2.1167208663446025e-05,
+      "loss": 143.1787,
+      "step": 39
+    },
+    {
+      "epoch": 0.055013538487987276,
+      "grad_norm": 65.76811218261719,
+      "learning_rate": 1.9299099686894423e-05,
+      "loss": 146.494,
+      "step": 40
+    },
+    {
+      "epoch": 0.05638887695018696,
+      "grad_norm": 48.262054443359375,
+      "learning_rate": 1.758386744638546e-05,
+      "loss": 146.9782,
+      "step": 41
+    },
+    {
+      "epoch": 0.05776421541238664,
+      "grad_norm": 10.546296119689941,
+      "learning_rate": 1.602885682970026e-05,
+      "loss": 148.3874,
+      "step": 42
+    },
+    {
+      "epoch": 0.059139553874586324,
+      "grad_norm": 28.65929412841797,
+      "learning_rate": 1.464072663102903e-05,
+      "loss": 148.8202,
+      "step": 43
+    },
+    {
+      "epoch": 0.060514892336786,
+      "grad_norm": 40.519371032714844,
+      "learning_rate": 1.3425421036992098e-05,
+      "loss": 149.3516,
+      "step": 44
+    },
+    {
+      "epoch": 0.06189023079898569,
+      "grad_norm": 42.98031997680664,
+      "learning_rate": 1.2388144172720251e-05,
+      "loss": 149.592,
+      "step": 45
+    },
+    {
+      "epoch": 0.06326556926118537,
+      "grad_norm": 33.01335144042969,
+      "learning_rate": 1.1533337816991932e-05,
+      "loss": 150.4775,
+      "step": 46
+    },
+    {
+      "epoch": 0.06464090772338506,
+      "grad_norm": 12.483397483825684,
+      "learning_rate": 1.0864662381854632e-05,
+      "loss": 154.7161,
+      "step": 47
+    },
+    {
+      "epoch": 0.06601624618558473,
+      "grad_norm": 10.724721908569336,
+      "learning_rate": 1.0384981238178534e-05,
+      "loss": 155.87,
+      "step": 48
+    },
+    {
+      "epoch": 0.06739158464778441,
+      "grad_norm": 38.91442108154297,
+      "learning_rate": 1.0096348454262845e-05,
+      "loss": 159.1637,
+      "step": 49
+    },
+    {
+      "epoch": 0.0687669231099841,
+      "grad_norm": 18.77695083618164,
+      "learning_rate": 1e-05,
+      "loss": 163.0386,
+      "step": 50
+    },
+    {
+      "epoch": 0.0687669231099841,
+      "eval_loss": 4.621382236480713,
+      "eval_runtime": 2.9364,
+      "eval_samples_per_second": 17.027,
+      "eval_steps_per_second": 4.427,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.259104775831552e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null