Training in progress, step 110, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +74 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a2a70e450bc009b9c5087623706af2ed489db51cab00a987ff052710ecceff28
 size 159967880

 version https://git-lfs.github.com/spec/v1
+oid sha256:e3cdbe374c1e1f1a76c11e4f1288fe314d5a37aa7dd40932d29ff0520c4aacfb
 size 159967880

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d94a41d2844d8bd37cb0fa7cbcf19940d93eb3cf9e6e2b29d80b12046923528
 size 320194002

 version https://git-lfs.github.com/spec/v1
+oid sha256:b5c31e5f659f95dde62e00e80508aa1cca1ee8d3e0e7248595ab9cc3737ee5aa
 size 320194002

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:acca656d17f6717113dc9db830ccab89811067361025e443ec9c355f4f43f913
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:10ad139536e36a5c5994194df11493228ee012519c23cc0621e37e04c948cdad
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:df88ad9d29a5b994fc668c3ab662b1d4e6baa321c3f5068caf8ff1c21c6e351d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:eda1b3688163acb4c0de1a0c7c611576b5a46451ca11dac78f3f571adee24be0
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.12525896728038788,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 1.8310502283105023,
   "eval_steps": 25,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -747,6 +747,76 @@
       "eval_samples_per_second": 14.646,
       "eval_steps_per_second": 2.05,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -770,12 +840,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.388598679044096e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.12525896728038788,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 2.018264840182648,
   "eval_steps": 25,
+  "global_step": 110,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 14.646,
       "eval_steps_per_second": 2.05,
       "step": 100
+    },
+    {
+      "epoch": 1.8493150684931505,
+      "grad_norm": 0.7789422273635864,
+      "learning_rate": 5.95594714845854e-06,
+      "loss": 0.4551,
+      "step": 101
+    },
+    {
+      "epoch": 1.8675799086757991,
+      "grad_norm": 0.9181280732154846,
+      "learning_rate": 4.712525830705338e-06,
+      "loss": 0.4778,
+      "step": 102
+    },
+    {
+      "epoch": 1.8858447488584473,
+      "grad_norm": 0.6837835311889648,
+      "learning_rate": 3.6124857091878845e-06,
+      "loss": 0.4939,
+      "step": 103
+    },
+    {
+      "epoch": 1.904109589041096,
+      "grad_norm": 0.522508978843689,
+      "learning_rate": 2.656912390696708e-06,
+      "loss": 0.5255,
+      "step": 104
+    },
+    {
+      "epoch": 1.9223744292237441,
+      "grad_norm": 1.1073154211044312,
+      "learning_rate": 1.8467489107293509e-06,
+      "loss": 0.6029,
+      "step": 105
+    },
+    {
+      "epoch": 1.9406392694063928,
+      "grad_norm": 1.4917099475860596,
+      "learning_rate": 1.1827948028283352e-06,
+      "loss": 0.6545,
+      "step": 106
+    },
+    {
+      "epoch": 1.958904109589041,
+      "grad_norm": 0.9225929379463196,
+      "learning_rate": 6.657053095380005e-07,
+      "loss": 0.5178,
+      "step": 107
+    },
+    {
+      "epoch": 1.9771689497716896,
+      "grad_norm": 0.7463716864585876,
+      "learning_rate": 2.959907357592661e-07,
+      "loss": 0.4391,
+      "step": 108
+    },
+    {
+      "epoch": 1.9954337899543377,
+      "grad_norm": 0.6585250496864319,
+      "learning_rate": 7.401594514025999e-08,
+      "loss": 0.5468,
+      "step": 109
+    },
+    {
+      "epoch": 2.018264840182648,
+      "grad_norm": 0.6586726307868958,
+      "learning_rate": 0.0,
+      "loss": 0.3368,
+      "step": 110
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.5274585469485056e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null