Training in progress, step 300, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +48 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b727edaef991a3a15cc32eab3c7047ead733ece64ad7db72666300fc6b70eae4
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:7fe7d2c5f5f9cf61ead847bfb5ab31887e6f1c74166fa3b445b55707dfe89d5f
 size 639691872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9d8e7b35bc70805daae2984e8c89e70106ef48b8780f2b0f5c51579210ff1a63
-size 325339796

 version https://git-lfs.github.com/spec/v1
+oid sha256:b46c3d6e6f49bc2e58d21dbe8b4bdbc282fab73fb1bb2a82a04c8dce5689b158
+size 325340244

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3ea2bf0a34221e8bebf26496bafc8f3fe6a6ef5d7f7e43883d9c09d47debb643
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:788e13c48f45af667448858903d0b64d7c243d58216820af65c93716a092e847
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0b764a7feea336c9409f04ca3df4d8b4349bea019384446d21739ad3565001bd
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:dd9465ef056e04d3fe99197d208af63980ee5111d270db49fe259e43c05f827d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.2357275485992432,
-  "best_model_checkpoint": "miner_id_24/checkpoint-250",
-  "epoch": 0.004209391995420182,
   "eval_steps": 50,
-  "global_step": 250,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -230,6 +230,49 @@
       "eval_samples_per_second": 14.828,
       "eval_steps_per_second": 3.707,
       "step": 250
     }
   ],
   "logging_steps": 10,
@@ -258,7 +301,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.116529964220416e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.223321795463562,
+  "best_model_checkpoint": "miner_id_24/checkpoint-300",
+  "epoch": 0.0050512703945042176,
   "eval_steps": 50,
+  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 14.828,
       "eval_steps_per_second": 3.707,
       "step": 250
+    },
+    {
+      "epoch": 0.004377767675236989,
+      "grad_norm": 1.4801981449127197,
+      "learning_rate": 0.00011818454556963892,
+      "loss": 1.0041,
+      "step": 260
+    },
+    {
+      "epoch": 0.004546143355053796,
+      "grad_norm": 1.3664003610610962,
+      "learning_rate": 0.00011073424614716762,
+      "loss": 1.0114,
+      "step": 270
+    },
+    {
+      "epoch": 0.004714519034870603,
+      "grad_norm": 1.5936243534088135,
+      "learning_rate": 0.00010326575385283242,
+      "loss": 1.1728,
+      "step": 280
+    },
+    {
+      "epoch": 0.00488289471468741,
+      "grad_norm": 1.537528157234192,
+      "learning_rate": 9.58154544303611e-05,
+      "loss": 1.3018,
+      "step": 290
+    },
+    {
+      "epoch": 0.0050512703945042176,
+      "grad_norm": 2.965649127960205,
+      "learning_rate": 8.841964498963846e-05,
+      "loss": 1.5497,
+      "step": 300
+    },
+    {
+      "epoch": 0.0050512703945042176,
+      "eval_loss": 1.223321795463562,
+      "eval_runtime": 1686.5734,
+      "eval_samples_per_second": 14.827,
+      "eval_steps_per_second": 3.707,
+      "step": 300
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 6.123060448985088e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null