Training in progress, step 65, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +94 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f1a1f7edba342dcae21bc1bcc931a8d18e8fcd5b7452808965c7181e3f8fb2f1
 size 500770656

 version https://git-lfs.github.com/spec/v1
+oid sha256:39e874375ad72eebe8ef41dcd2cc77942456f640cbe3bb03a574424588cd8a3a
 size 500770656

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c31134d8c644da3fe4e7fe5335cea6c99d373c2c3f4bfc42b0a9f8a83ee3881e
 size 1001863522

 version https://git-lfs.github.com/spec/v1
+oid sha256:e0e4bf9d77e80c4122da30cc787f051493de20046abbcd72f3dbf4f61742208d
 size 1001863522

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8e4f84759c0d0345e3d0ec8c8f48cc65de394faca774211b25c2f484b7ca4dbc
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f8fe965a183dbb4db01355f9eea55b73f3c2653c1ab23e6c3218c43988c19f31
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a0083f46b3c06809c7ba2e6389d4ff484bc53254e300d739b3ff127153f94c8d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3d73a9cffc5823f9b954d9d65f5e7cf29f10d6f2c164580cdcb5194694db8d74
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7282275711159737,
   "eval_steps": 50,
-  "global_step": 52,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -387,6 +387,97 @@
       "learning_rate": 0.00012866992205580908,
       "loss": 2.6251,
       "step": 52
     }
   ],
   "logging_steps": 1,
@@ -406,7 +497,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.3069890977792e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9102844638949672,
   "eval_steps": 50,
+  "global_step": 65,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00012866992205580908,
       "loss": 2.6251,
       "step": 52
+    },
+    {
+      "epoch": 0.7422319474835887,
+      "grad_norm": 8.420281410217285,
+      "learning_rate": 0.00012738835239367027,
+      "loss": 1.8761,
+      "step": 53
+    },
+    {
+      "epoch": 0.7562363238512035,
+      "grad_norm": 5.919778347015381,
+      "learning_rate": 0.00012607618940900977,
+      "loss": 1.8202,
+      "step": 54
+    },
+    {
+      "epoch": 0.7702407002188184,
+      "grad_norm": 7.590326309204102,
+      "learning_rate": 0.00012473419936805962,
+      "loss": 2.0368,
+      "step": 55
+    },
+    {
+      "epoch": 0.7842450765864333,
+      "grad_norm": 5.551327228546143,
+      "learning_rate": 0.0001233631659552128,
+      "loss": 1.5056,
+      "step": 56
+    },
+    {
+      "epoch": 0.7982494529540481,
+      "grad_norm": 4.756547451019287,
+      "learning_rate": 0.00012196388981537352,
+      "loss": 1.9188,
+      "step": 57
+    },
+    {
+      "epoch": 0.812253829321663,
+      "grad_norm": 5.172512054443359,
+      "learning_rate": 0.00012053718808640333,
+      "loss": 1.4091,
+      "step": 58
+    },
+    {
+      "epoch": 0.8262582056892779,
+      "grad_norm": 8.058588981628418,
+      "learning_rate": 0.00011908389392193547,
+      "loss": 2.8933,
+      "step": 59
+    },
+    {
+      "epoch": 0.8402625820568927,
+      "grad_norm": 5.907708168029785,
+      "learning_rate": 0.00011760485600483667,
+      "loss": 1.4843,
+      "step": 60
+    },
+    {
+      "epoch": 0.8542669584245076,
+      "grad_norm": 6.655636310577393,
+      "learning_rate": 0.00011610093805160025,
+      "loss": 2.0464,
+      "step": 61
+    },
+    {
+      "epoch": 0.8682713347921225,
+      "grad_norm": 7.76405668258667,
+      "learning_rate": 0.00011457301830795994,
+      "loss": 2.042,
+      "step": 62
+    },
+    {
+      "epoch": 0.8822757111597375,
+      "grad_norm": 8.136941909790039,
+      "learning_rate": 0.00011302198903601928,
+      "loss": 1.8616,
+      "step": 63
+    },
+    {
+      "epoch": 0.8962800875273523,
+      "grad_norm": 4.971902370452881,
+      "learning_rate": 0.00011144875599319543,
+      "loss": 1.8062,
+      "step": 64
+    },
+    {
+      "epoch": 0.9102844638949672,
+      "grad_norm": 4.203851222991943,
+      "learning_rate": 0.00010985423790328263,
+      "loss": 1.1645,
+      "step": 65
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 6.633736372224e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null