Training in progress, step 119, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +122 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:75f8f94bb8b1b985d11e8f332c6c0710694f4b31be603b16301078e94b374122
 size 97728

 version https://git-lfs.github.com/spec/v1
+oid sha256:1b16313c79bd3f248bf4d438764be663bf73293a82f845bfc82bbdc1c96faa18
 size 97728

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:61ba349b29aac7c82f3a71d2d800915fc6c4987485e29e9dd6c73171fb5a3b0b
 size 212298

 version https://git-lfs.github.com/spec/v1
+oid sha256:8820d9b006cab186b61503299fdb9b87f1c8bf0451bfe4bbedc210c4bc63254a
 size 212298

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d57bfe85897bedf916779c7c1c13ce627cb70100032ab6a274d22be811e19abc
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0126cf7d989a7263b97f1fe2ca3d6bc2827ac39dc2b4674586229158dba72ea3
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:70670c442607259270e13afbef3aac28e38a58ddad6998414f76ed43ab7f41d4
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d4a2e0d38e4aad78961b54bb99f0a18c11d847ea1d3d3bc12cf223a3f862cf9e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.3455796717840127,
   "eval_steps": 50,
-  "global_step": 102,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -745,6 +745,125 @@
       "learning_rate": 8.54379825720049e-05,
       "loss": 10.346,
       "step": 102
     }
   ],
   "logging_steps": 1,
@@ -764,7 +883,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 43690245881856.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.40317628374801484,
   "eval_steps": 50,
+  "global_step": 119,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 8.54379825720049e-05,
       "loss": 10.346,
       "step": 102
+    },
+    {
+      "epoch": 0.3489677077818952,
+      "grad_norm": 0.02064266800880432,
+      "learning_rate": 8.414020075538605e-05,
+      "loss": 10.3461,
+      "step": 103
+    },
+    {
+      "epoch": 0.3523557437797777,
+      "grad_norm": 0.023022258654236794,
+      "learning_rate": 8.2839634745074e-05,
+      "loss": 10.3463,
+      "step": 104
+    },
+    {
+      "epoch": 0.35574377977766014,
+      "grad_norm": 0.017432276159524918,
+      "learning_rate": 8.153668070607437e-05,
+      "loss": 10.3465,
+      "step": 105
+    },
+    {
+      "epoch": 0.3591318157755426,
+      "grad_norm": 0.01911778748035431,
+      "learning_rate": 8.023173553080938e-05,
+      "loss": 10.3459,
+      "step": 106
+    },
+    {
+      "epoch": 0.3625198517734251,
+      "grad_norm": 0.01831880584359169,
+      "learning_rate": 7.89251967182208e-05,
+      "loss": 10.3455,
+      "step": 107
+    },
+    {
+      "epoch": 0.36590788777130756,
+      "grad_norm": 0.018690194934606552,
+      "learning_rate": 7.761746225268758e-05,
+      "loss": 10.3471,
+      "step": 108
+    },
+    {
+      "epoch": 0.36929592376919007,
+      "grad_norm": 0.021015219390392303,
+      "learning_rate": 7.630893048279627e-05,
+      "loss": 10.3453,
+      "step": 109
+    },
+    {
+      "epoch": 0.3726839597670725,
+      "grad_norm": 0.01983080990612507,
+      "learning_rate": 7.5e-05,
+      "loss": 10.3462,
+      "step": 110
+    },
+    {
+      "epoch": 0.376071995764955,
+      "grad_norm": 0.021141625940799713,
+      "learning_rate": 7.369106951720373e-05,
+      "loss": 10.3457,
+      "step": 111
+    },
+    {
+      "epoch": 0.3794600317628375,
+      "grad_norm": 0.021793803200125694,
+      "learning_rate": 7.238253774731244e-05,
+      "loss": 10.3457,
+      "step": 112
+    },
+    {
+      "epoch": 0.38284806776071995,
+      "grad_norm": 0.019963612779974937,
+      "learning_rate": 7.10748032817792e-05,
+      "loss": 10.3463,
+      "step": 113
+    },
+    {
+      "epoch": 0.38623610375860246,
+      "grad_norm": 0.020090965554118156,
+      "learning_rate": 6.976826446919059e-05,
+      "loss": 10.3458,
+      "step": 114
+    },
+    {
+      "epoch": 0.3896241397564849,
+      "grad_norm": 0.02083776332437992,
+      "learning_rate": 6.846331929392562e-05,
+      "loss": 10.3469,
+      "step": 115
+    },
+    {
+      "epoch": 0.39301217575436737,
+      "grad_norm": 0.01785002276301384,
+      "learning_rate": 6.7160365254926e-05,
+      "loss": 10.3465,
+      "step": 116
+    },
+    {
+      "epoch": 0.3964002117522499,
+      "grad_norm": 0.02254386991262436,
+      "learning_rate": 6.585979924461394e-05,
+      "loss": 10.3452,
+      "step": 117
+    },
+    {
+      "epoch": 0.39978824775013233,
+      "grad_norm": 0.023428700864315033,
+      "learning_rate": 6.45620174279951e-05,
+      "loss": 10.3475,
+      "step": 118
+    },
+    {
+      "epoch": 0.40317628374801484,
+      "grad_norm": 0.01672559231519699,
+      "learning_rate": 6.326741512198266e-05,
+      "loss": 10.346,
+      "step": 119
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 50987546050560.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null