Training in progress, step 119, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +122 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:919cf08a7d7abf9880c22fb4dfe4b831d65927f7ae57ae5667378e28e06cd849
 size 335604696

 version https://git-lfs.github.com/spec/v1
+oid sha256:320fd95f92c74b30e44eea33ed497b690bc39a047f9687bb0de584f2694e20c8
 size 335604696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4138bcc19e199dc967bcd7384f55e319c96ad62e01d12d688ab9eccaa9ae5991
 size 671466706

 version https://git-lfs.github.com/spec/v1
+oid sha256:fbe1bbd4c0b2029d5651d4bfed85f41431ed9ee8d5961c5f019eed1914818a8c
 size 671466706

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:df7161f83e2c1b5421911bfc287c46c4380aa9d1a9390b7f6bdd147d920abb38
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:7a76ce3b7ea5491c369f9a36901d24e79782cda5360c68d2970c580ab23ec498
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:70670c442607259270e13afbef3aac28e38a58ddad6998414f76ed43ab7f41d4
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d4a2e0d38e4aad78961b54bb99f0a18c11d847ea1d3d3bc12cf223a3f862cf9e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.15017253278122844,
   "eval_steps": 50,
-  "global_step": 102,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -745,6 +745,125 @@
       "learning_rate": 8.54379825720049e-05,
       "loss": 0.0527,
       "step": 102
     }
   ],
   "logging_steps": 1,
@@ -764,7 +883,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.0779851033778586e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.1752012882447665,
   "eval_steps": 50,
+  "global_step": 119,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 8.54379825720049e-05,
       "loss": 0.0527,
       "step": 102
+    },
+    {
+      "epoch": 0.15164481251437772,
+      "grad_norm": 0.10324705392122269,
+      "learning_rate": 8.414020075538605e-05,
+      "loss": 0.0217,
+      "step": 103
+    },
+    {
+      "epoch": 0.15311709224752704,
+      "grad_norm": 0.11965189129114151,
+      "learning_rate": 8.2839634745074e-05,
+      "loss": 0.0518,
+      "step": 104
+    },
+    {
+      "epoch": 0.15458937198067632,
+      "grad_norm": 0.14544668793678284,
+      "learning_rate": 8.153668070607437e-05,
+      "loss": 0.0936,
+      "step": 105
+    },
+    {
+      "epoch": 0.15606165171382563,
+      "grad_norm": 0.11407126486301422,
+      "learning_rate": 8.023173553080938e-05,
+      "loss": 0.0279,
+      "step": 106
+    },
+    {
+      "epoch": 0.15753393144697492,
+      "grad_norm": 0.11697705090045929,
+      "learning_rate": 7.89251967182208e-05,
+      "loss": 0.044,
+      "step": 107
+    },
+    {
+      "epoch": 0.15900621118012423,
+      "grad_norm": 0.18119023740291595,
+      "learning_rate": 7.761746225268758e-05,
+      "loss": 0.1192,
+      "step": 108
+    },
+    {
+      "epoch": 0.1604784909132735,
+      "grad_norm": 0.19359427690505981,
+      "learning_rate": 7.630893048279627e-05,
+      "loss": 0.1415,
+      "step": 109
+    },
+    {
+      "epoch": 0.16195077064642283,
+      "grad_norm": 0.11051613837480545,
+      "learning_rate": 7.5e-05,
+      "loss": 0.0614,
+      "step": 110
+    },
+    {
+      "epoch": 0.1634230503795721,
+      "grad_norm": 0.11084026098251343,
+      "learning_rate": 7.369106951720373e-05,
+      "loss": 0.0506,
+      "step": 111
+    },
+    {
+      "epoch": 0.16489533011272142,
+      "grad_norm": 0.08058993518352509,
+      "learning_rate": 7.238253774731244e-05,
+      "loss": 0.0232,
+      "step": 112
+    },
+    {
+      "epoch": 0.1663676098458707,
+      "grad_norm": 0.11398748308420181,
+      "learning_rate": 7.10748032817792e-05,
+      "loss": 0.077,
+      "step": 113
+    },
+    {
+      "epoch": 0.16783988957902002,
+      "grad_norm": 0.10695286840200424,
+      "learning_rate": 6.976826446919059e-05,
+      "loss": 0.0586,
+      "step": 114
+    },
+    {
+      "epoch": 0.1693121693121693,
+      "grad_norm": 0.06323719769716263,
+      "learning_rate": 6.846331929392562e-05,
+      "loss": 0.0096,
+      "step": 115
+    },
+    {
+      "epoch": 0.17078444904531861,
+      "grad_norm": 0.09416350722312927,
+      "learning_rate": 6.7160365254926e-05,
+      "loss": 0.0552,
+      "step": 116
+    },
+    {
+      "epoch": 0.1722567287784679,
+      "grad_norm": 0.10175611078739166,
+      "learning_rate": 6.585979924461394e-05,
+      "loss": 0.0447,
+      "step": 117
+    },
+    {
+      "epoch": 0.1737290085116172,
+      "grad_norm": 0.0726943388581276,
+      "learning_rate": 6.45620174279951e-05,
+      "loss": 0.0183,
+      "step": 118
+    },
+    {
+      "epoch": 0.1752012882447665,
+      "grad_norm": 0.12314460426568985,
+      "learning_rate": 6.326741512198266e-05,
+      "loss": 0.0912,
+      "step": 119
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.260198156319064e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null