Training in progress, step 550, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +48 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:14fad88eaff3f9e0c2b07260912cdc45c644bf5e4d95a34cb03df39e87f900f4
 size 100697728

 version https://git-lfs.github.com/spec/v1
+oid sha256:f3fd992e113586f812afc77a0702f2871849c3b36106c0c2de720d263ffaa124
 size 100697728

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6210a6ec10ebc2458aa9ffa49429f2af3dd7b19351ec74512b739af0e8579f01
 size 51418452

 version https://git-lfs.github.com/spec/v1
+oid sha256:3105b22c5fc7869e6ed63a58ea962520392f460fbb1c31c1abc4d139211b21cf
 size 51418452

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8f28831b96af77035c2ded7be2a068154d942395539af092e0feeb6dcde701bc
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:6207d4c3c45167cc370ab64a1372acf1cee42bfee65685d0672373dc45c12efd
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5b782a524e5b57eb023365370accae538ac5e68454bafa53a6dd8b2c51cead56
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:6bb9c0d62d6b3cf0976c16f73e9bd814b298ebffa1786831bc2a68d8e48809b9
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.7421861290931702,
-  "best_model_checkpoint": "miner_id_24/checkpoint-500",
-  "epoch": 0.5884083553986467,
   "eval_steps": 50,
-  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -445,6 +445,49 @@
       "eval_samples_per_second": 22.047,
       "eval_steps_per_second": 5.512,
       "step": 500
     }
   ],
   "logging_steps": 10,
@@ -473,7 +516,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.3691853688039014e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.7403023838996887,
+  "best_model_checkpoint": "miner_id_24/checkpoint-550",
+  "epoch": 0.6472491909385113,
   "eval_steps": 50,
+  "global_step": 550,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 22.047,
       "eval_steps_per_second": 5.512,
       "step": 500
+    },
+    {
+      "epoch": 0.6001765225066196,
+      "grad_norm": 0.7555685043334961,
+      "learning_rate": 1.1264792494342857e-05,
+      "loss": 5.5477,
+      "step": 510
+    },
+    {
+      "epoch": 0.6119446896145925,
+      "grad_norm": 0.7698599696159363,
+      "learning_rate": 8.936522714508678e-06,
+      "loss": 4.6595,
+      "step": 520
+    },
+    {
+      "epoch": 0.6237128567225655,
+      "grad_norm": 0.8012061715126038,
+      "learning_rate": 6.866382254766157e-06,
+      "loss": 2.9295,
+      "step": 530
+    },
+    {
+      "epoch": 0.6354810238305384,
+      "grad_norm": 0.8370329737663269,
+      "learning_rate": 5.060239153161872e-06,
+      "loss": 1.2254,
+      "step": 540
+    },
+    {
+      "epoch": 0.6472491909385113,
+      "grad_norm": 1.4551666975021362,
+      "learning_rate": 3.5232131185484076e-06,
+      "loss": 1.2469,
+      "step": 550
+    },
+    {
+      "epoch": 0.6472491909385113,
+      "eval_loss": 0.7403023838996887,
+      "eval_runtime": 65.1388,
+      "eval_samples_per_second": 21.984,
+      "eval_steps_per_second": 5.496,
+      "step": 550
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 3.706288114835128e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null