Training in progress, step 350, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +48 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:787f2551b6ac8583af6eea1cb71af383da2ce2104f41d2230e2a07da90e5d21c
 size 72396376

 version https://git-lfs.github.com/spec/v1
+oid sha256:000da61d1797457b2c95bcb27a7b2b110d3afb6a606a87ecd4e2f320b4fad9d7
 size 72396376

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1f7a6512e6cf7cc15a8a708ef1e3cc6ec2b4bf938b14afef26cc69ace80971c5
 size 37134740

 version https://git-lfs.github.com/spec/v1
+oid sha256:105ac16b5d228dd7aa94d2fb6725026dc57df3bf84599096844a4c1dedfd4e77
 size 37134740

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:19e2e3b1a3fb431d188c6e72e3cee4c7684c54ae7e071aa6381813d0d35eaabc
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:205bd1398626770ca707f06808286a5c34cea314101953ce4ecf1df4984e8133
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a8d9346c4fcc90fb1ec8546736583b76a4fae6bc25cb93181337c187d15da94a
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:77c9b643e87499abdfb160399a4a6e4965274897c037cf91e4cd5d5d65b2b404
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.2161424160003662,
-  "best_model_checkpoint": "miner_id_24/checkpoint-300",
-  "epoch": 0.1958863858961802,
   "eval_steps": 50,
-  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -273,6 +273,49 @@
       "eval_samples_per_second": 60.921,
       "eval_steps_per_second": 15.236,
       "step": 300
     }
   ],
   "logging_steps": 10,
@@ -301,7 +344,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.875582621037363e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.2151703834533691,
+  "best_model_checkpoint": "miner_id_24/checkpoint-350",
+  "epoch": 0.22853411687887693,
   "eval_steps": 50,
+  "global_step": 350,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 60.921,
       "eval_steps_per_second": 15.236,
       "step": 300
+    },
+    {
+      "epoch": 0.20241593209271955,
+      "grad_norm": 0.058099668473005295,
+      "learning_rate": 9.733794785622253e-05,
+      "loss": 1.1525,
+      "step": 310
+    },
+    {
+      "epoch": 0.20894547828925888,
+      "grad_norm": 0.09674125164747238,
+      "learning_rate": 9.202138944469168e-05,
+      "loss": 1.2296,
+      "step": 320
+    },
+    {
+      "epoch": 0.21547502448579825,
+      "grad_norm": 0.15291635692119598,
+      "learning_rate": 8.672744727162781e-05,
+      "loss": 1.2739,
+      "step": 330
+    },
+    {
+      "epoch": 0.2220045706823376,
+      "grad_norm": 0.2810160219669342,
+      "learning_rate": 8.147112759128859e-05,
+      "loss": 1.3062,
+      "step": 340
+    },
+    {
+      "epoch": 0.22853411687887693,
+      "grad_norm": 0.978479266166687,
+      "learning_rate": 7.626733001288851e-05,
+      "loss": 1.3263,
+      "step": 350
+    },
+    {
+      "epoch": 0.22853411687887693,
+      "eval_loss": 1.2151703834533691,
+      "eval_runtime": 41.9719,
+      "eval_samples_per_second": 61.446,
+      "eval_steps_per_second": 15.367,
+      "step": 350
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 5.692182665940173e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null