Training in progress, step 63, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +66 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a2a3673ac6ecfcf9fbd4e8a74b81927275b26bd0684e838caa39b7caf422cf88
 size 319876032

 version https://git-lfs.github.com/spec/v1
+oid sha256:f652211dac610e80872598da1df98e590795842f441ad646c668780471aa84c1
 size 319876032

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a6817fbbcd33480f164784483673185908595dff99afaaa869a566aa99376d9a
 size 640009682

 version https://git-lfs.github.com/spec/v1
+oid sha256:b61ce972ea96e2306fbf26a1d40d3335709da2c916fff3ac1116fa3c2f0f5737
 size 640009682

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9f07b4f2bcc3f569f306e9b6554b0fe04785f073abde4e0109bc3d3d5eb765c2
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c1098628841f4bcb07d5546445f4de27d56441cc7dc92917c1ddcac8b9e18a99
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6f68b4a4ba4603d307a8be653dd70e693a9f3506ea1af5f24d2d241dc54cfcb1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d85eac2daddfe8e3f78a5d6ef1e9ba13c04651694635a9ed76369a20726389db
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.1620712844653664,
   "eval_steps": 50,
-  "global_step": 54,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -401,6 +401,69 @@
       "learning_rate": 9.25084022891929e-05,
       "loss": 26.5253,
       "step": 54
     }
   ],
   "logging_steps": 1,
@@ -420,7 +483,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.839944410337116e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.3557498318762609,
   "eval_steps": 50,
+  "global_step": 63,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.25084022891929e-05,
       "loss": 26.5253,
       "step": 54
+    },
+    {
+      "epoch": 1.183591123066577,
+      "grad_norm": 25.87575912475586,
+      "learning_rate": 8.963177415120962e-05,
+      "loss": 22.2069,
+      "step": 55
+    },
+    {
+      "epoch": 1.2051109616677875,
+      "grad_norm": 21.698827743530273,
+      "learning_rate": 8.673258487801731e-05,
+      "loss": 25.7023,
+      "step": 56
+    },
+    {
+      "epoch": 1.226630800268998,
+      "grad_norm": 22.29081916809082,
+      "learning_rate": 8.381530480933783e-05,
+      "loss": 23.0083,
+      "step": 57
+    },
+    {
+      "epoch": 1.2481506388702084,
+      "grad_norm": 19.079532623291016,
+      "learning_rate": 8.088443217958837e-05,
+      "loss": 25.0357,
+      "step": 58
+    },
+    {
+      "epoch": 1.269670477471419,
+      "grad_norm": 19.22806167602539,
+      "learning_rate": 7.794448618193015e-05,
+      "loss": 27.0424,
+      "step": 59
+    },
+    {
+      "epoch": 1.2911903160726295,
+      "grad_norm": 20.588699340820312,
+      "learning_rate": 7.5e-05,
+      "loss": 23.868,
+      "step": 60
+    },
+    {
+      "epoch": 1.31271015467384,
+      "grad_norm": 21.745031356811523,
+      "learning_rate": 7.205551381806987e-05,
+      "loss": 23.2284,
+      "step": 61
+    },
+    {
+      "epoch": 1.3342299932750503,
+      "grad_norm": 19.947467803955078,
+      "learning_rate": 6.911556782041163e-05,
+      "loss": 26.8844,
+      "step": 62
+    },
+    {
+      "epoch": 1.3557498318762609,
+      "grad_norm": 23.738483428955078,
+      "learning_rate": 6.618469519066217e-05,
+      "loss": 25.726,
+      "step": 63
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 3.3132684787266355e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null