Training in progress, step 350, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +48 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a568b508e9355a47048e3a8a97449028695d12fa2733dc3f0b827ab748d1f60f
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:e1168e5b8b1c52b89556c033ff67fd9831b3ca5251d9c2a2e62b2df28843d988
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6f8af1dce93ad0109737df42af8c775242a4d902e6b217d23390637c67f88c77
 size 341314644

 version https://git-lfs.github.com/spec/v1
+oid sha256:4a50c4599ed8d3f96a778732216801f188ce39983e30b56ba7f5aeb7742c90ac
 size 341314644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:31b90a2658a91fd2095b78f4edf43108110add2d1bb7198edca067b5374f45b9
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:33e70537bf9ca12ee7c7d03210f07cbbed909aa7328895abf3cc4ad034759531
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4e3407292f65425c0593f095883f5f3c050baccb28fc15b3c1ad88090d6c23ef
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:9aefa5c0989fb1076f44dee5ec81119730caab480902e567590e4c3defecd81d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 2.1419291496276855,
-  "best_model_checkpoint": "miner_id_24/checkpoint-300",
-  "epoch": 0.10824463287028685,
   "eval_steps": 50,
-  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -273,6 +273,49 @@
       "eval_samples_per_second": 15.895,
       "eval_steps_per_second": 3.977,
       "step": 300
     }
   ],
   "logging_steps": 10,
@@ -301,7 +344,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.845490117640192e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 2.0811328887939453,
+  "best_model_checkpoint": "miner_id_24/checkpoint-350",
+  "epoch": 0.12628540501533467,
   "eval_steps": 50,
+  "global_step": 350,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 15.895,
       "eval_steps_per_second": 3.977,
       "step": 300
+    },
+    {
+      "epoch": 0.11185278729929642,
+      "grad_norm": 1.4185280799865723,
+      "learning_rate": 8.263051337963623e-05,
+      "loss": 2.2137,
+      "step": 310
+    },
+    {
+      "epoch": 0.11546094172830597,
+      "grad_norm": 1.4617102146148682,
+      "learning_rate": 7.531714761313074e-05,
+      "loss": 2.1878,
+      "step": 320
+    },
+    {
+      "epoch": 0.11906909615731553,
+      "grad_norm": 1.7664028406143188,
+      "learning_rate": 6.816788131766559e-05,
+      "loss": 2.0554,
+      "step": 330
+    },
+    {
+      "epoch": 0.1226772505863251,
+      "grad_norm": 2.4227097034454346,
+      "learning_rate": 6.121754499999055e-05,
+      "loss": 2.0241,
+      "step": 340
+    },
+    {
+      "epoch": 0.12628540501533467,
+      "grad_norm": 4.657759189605713,
+      "learning_rate": 5.450000000000003e-05,
+      "loss": 2.0051,
+      "step": 350
+    },
+    {
+      "epoch": 0.12628540501533467,
+      "eval_loss": 2.0811328887939453,
+      "eval_runtime": 73.35,
+      "eval_samples_per_second": 15.91,
+      "eval_steps_per_second": 3.981,
+      "step": 350
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 6.826023943824998e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null