Training in progress, step 300, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +48 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:34bd11160aec9fe2bd858aad8d63001600dd28dbfe0f9393afbc194a4bd2fe33
 size 1006723888

 version https://git-lfs.github.com/spec/v1
+oid sha256:825ecd2a9b415c9b1862546edd0f3945a56a381c68920f130700de62462f1f2e
 size 1006723888

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0814d4e09fd596d42c4126e7f9af7a0d9627a886bb5ab4e2a995a620103f3614
-size 511971028

 version https://git-lfs.github.com/spec/v1
+oid sha256:064ecfb5d89792208a73611c0ec367e4fd173f534715a3b65af1db393e805133
+size 511971668

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8a84bdfc985f9f71e6bcb152a1eb9fd0ac393101acf6977b32e80460062d0456
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:130b330e493d984dcf2540998fdf543192159617f46ed883e63a7374d38984b7
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5245962b37abe5debb29d3c0771306ab720257c32ca897672f6c78751ec6642f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:4e3407292f65425c0593f095883f5f3c050baccb28fc15b3c1ad88090d6c23ef
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.5013260841369629,
-  "best_model_checkpoint": "miner_id_24/checkpoint-250",
-  "epoch": 0.051991265467401473,
   "eval_steps": 50,
-  "global_step": 250,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -230,6 +230,49 @@
       "eval_samples_per_second": 6.282,
       "eval_steps_per_second": 1.574,
       "step": 250
     }
   ],
   "logging_steps": 10,
@@ -258,7 +301,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.360440978296013e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.4650544822216034,
+  "best_model_checkpoint": "miner_id_24/checkpoint-300",
+  "epoch": 0.06238951856088177,
   "eval_steps": 50,
+  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 6.282,
       "eval_steps_per_second": 1.574,
       "step": 250
+    },
+    {
+      "epoch": 0.054070916086097535,
+      "grad_norm": 1.2806155681610107,
+      "learning_rate": 0.00012039360249617425,
+      "loss": 0.4848,
+      "step": 260
+    },
+    {
+      "epoch": 0.0561505667047936,
+      "grad_norm": 0.8744197487831116,
+      "learning_rate": 0.00011280404514057264,
+      "loss": 0.4888,
+      "step": 270
+    },
+    {
+      "epoch": 0.05823021732348965,
+      "grad_norm": 1.0379194021224976,
+      "learning_rate": 0.00010519595485942743,
+      "loss": 0.4923,
+      "step": 280
+    },
+    {
+      "epoch": 0.060309867942185715,
+      "grad_norm": 1.0062249898910522,
+      "learning_rate": 9.76063975038258e-05,
+      "loss": 0.4953,
+      "step": 290
+    },
+    {
+      "epoch": 0.06238951856088177,
+      "grad_norm": 1.2005985975265503,
+      "learning_rate": 9.00723486343046e-05,
+      "loss": 0.4437,
+      "step": 300
+    },
+    {
+      "epoch": 0.06238951856088177,
+      "eval_loss": 0.4650544822216034,
+      "eval_runtime": 80.7039,
+      "eval_samples_per_second": 6.282,
+      "eval_steps_per_second": 1.574,
+      "step": 300
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 4.027195140656333e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null