Training in progress, step 117, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +94 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7c317579f0ce6943c96efcd29641b0da777b663afd35615d7139e5030c2ea7fa
 size 319876032

 version https://git-lfs.github.com/spec/v1
+oid sha256:a5b2a4ce332b6d3f2d7b5027963d2bac8c8a90a8166a714fcfa99acf1139b970
 size 319876032

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f840bb68a6dbf9aeb6a06b461912087949474294f107d721ad10f08734a94f2
 size 640009682

 version https://git-lfs.github.com/spec/v1
+oid sha256:f008f744c603be58af8a9cf079728bb5cbea9265abdcb5dd15ed0a5ff49df40a
 size 640009682

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:42ff49e4fdfd413d94e203d9be9412a3f3634f923d746d4404b2715d238c396e
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:a85180cb5b242ee948b2af64053ea3c4e0cc8b032b0a9568c39646d04dd2d77e
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b57ac6e48796727694162b0b2b513045a14687c5b4988558b227383b3e2c1d53
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:72ced90745bf11dd0913ccb678fa84f97d9d9d2dcc6e8de79651c15430da9657
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.13840715325431482,
   "eval_steps": 50,
-  "global_step": 104,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -759,6 +759,97 @@
       "learning_rate": 4.176063408005691e-05,
       "loss": 47.4479,
       "step": 104
     }
   ],
   "logging_steps": 1,
@@ -778,7 +869,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.852454887274578e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.15570804741110417,
   "eval_steps": 50,
+  "global_step": 117,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.176063408005691e-05,
       "loss": 47.4479,
       "step": 104
+    },
+    {
+      "epoch": 0.13973799126637554,
+      "grad_norm": 5.846158504486084,
+      "learning_rate": 4.014576209671735e-05,
+      "loss": 46.6026,
+      "step": 105
+    },
+    {
+      "epoch": 0.14106882927843625,
+      "grad_norm": 6.544958591461182,
+      "learning_rate": 3.855124400680454e-05,
+      "loss": 47.0602,
+      "step": 106
+    },
+    {
+      "epoch": 0.142399667290497,
+      "grad_norm": 4.549857139587402,
+      "learning_rate": 3.697801096398074e-05,
+      "loss": 44.632,
+      "step": 107
+    },
+    {
+      "epoch": 0.1437305053025577,
+      "grad_norm": 4.476944446563721,
+      "learning_rate": 3.542698169204003e-05,
+      "loss": 44.4824,
+      "step": 108
+    },
+    {
+      "epoch": 0.14506134331461842,
+      "grad_norm": 5.5804314613342285,
+      "learning_rate": 3.389906194839976e-05,
+      "loss": 45.0248,
+      "step": 109
+    },
+    {
+      "epoch": 0.14639218132667914,
+      "grad_norm": 5.525055408477783,
+      "learning_rate": 3.239514399516332e-05,
+      "loss": 44.9731,
+      "step": 110
+    },
+    {
+      "epoch": 0.14772301933873985,
+      "grad_norm": 4.815814018249512,
+      "learning_rate": 3.091610607806452e-05,
+      "loss": 43.9473,
+      "step": 111
+    },
+    {
+      "epoch": 0.1490538573508006,
+      "grad_norm": 4.596780300140381,
+      "learning_rate": 2.946281191359666e-05,
+      "loss": 43.4745,
+      "step": 112
+    },
+    {
+      "epoch": 0.1503846953628613,
+      "grad_norm": 4.904426097869873,
+      "learning_rate": 2.803611018462647e-05,
+      "loss": 46.6434,
+      "step": 113
+    },
+    {
+      "epoch": 0.15171553337492202,
+      "grad_norm": 5.079036712646484,
+      "learning_rate": 2.663683404478722e-05,
+      "loss": 45.0433,
+      "step": 114
+    },
+    {
+      "epoch": 0.15304637138698274,
+      "grad_norm": 4.7992072105407715,
+      "learning_rate": 2.5265800631940373e-05,
+      "loss": 45.1488,
+      "step": 115
+    },
+    {
+      "epoch": 0.15437720939904345,
+      "grad_norm": 8.740615844726562,
+      "learning_rate": 2.3923810590990202e-05,
+      "loss": 46.0447,
+      "step": 116
+    },
+    {
+      "epoch": 0.15570804741110417,
+      "grad_norm": 4.366965293884277,
+      "learning_rate": 2.2611647606329732e-05,
+      "loss": 43.9194,
+      "step": 117
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 6.6051716349218e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null