Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:513d1d001e44abbb11bad16b9d54f3736bb27c4130e0788ca51fd9c10438e975
 size 389074464

 version https://git-lfs.github.com/spec/v1
+oid sha256:2d8d84d6799f2b8600857503facf9efc9d707b12ab9d349858785190ec83c7d3
 size 389074464

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dc7521782a5874a8c21ae024658e0c02f59d66f7eb9a3acad546da3fddd692a3
 size 198011252

 version https://git-lfs.github.com/spec/v1
+oid sha256:dc1863110faab83375fcee208fa1d72ba911899663ebb042e555ee2c5af572fe
 size 198011252

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cd02c6bfecd03b67f7114f8aadb9346cf17a57d8ec1cb14f3f019d44fd417cd5
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:fcc4528e89b8c13286b23ebd93ace19b89a25d694fb02d1cd006cd914385e472
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6f4002ee03f4202a6a410bde3375cd186d152ce129e8a177eb112bee1f18b1e2
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:4ac116b8169c53ab649a7f15f2f32735f2c71ec2f803f70de8c655a513ee9cfc
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.1833840608596802,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.2037351443123939,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 16.336,
       "eval_steps_per_second": 4.084,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1117,7 +1475,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.2025166377569485e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.169433355331421,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.27164685908319186,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 16.336,
       "eval_steps_per_second": 4.084,
       "step": 150
+    },
+    {
+      "epoch": 0.20509337860780985,
+      "grad_norm": 0.2699250876903534,
+      "learning_rate": 7.107005538862646e-05,
+      "loss": 1.2426,
+      "step": 151
+    },
+    {
+      "epoch": 0.2064516129032258,
+      "grad_norm": 0.29538774490356445,
+      "learning_rate": 7.07041155014006e-05,
+      "loss": 1.2144,
+      "step": 152
+    },
+    {
+      "epoch": 0.20780984719864176,
+      "grad_norm": 0.2714727222919464,
+      "learning_rate": 7.033683215379002e-05,
+      "loss": 1.2891,
+      "step": 153
+    },
+    {
+      "epoch": 0.20916808149405772,
+      "grad_norm": 0.26055291295051575,
+      "learning_rate": 6.996822917828477e-05,
+      "loss": 1.1651,
+      "step": 154
+    },
+    {
+      "epoch": 0.21052631578947367,
+      "grad_norm": 0.2369328737258911,
+      "learning_rate": 6.959833049300377e-05,
+      "loss": 1.1572,
+      "step": 155
+    },
+    {
+      "epoch": 0.21188455008488966,
+      "grad_norm": 0.215769425034523,
+      "learning_rate": 6.922716010014255e-05,
+      "loss": 1.1521,
+      "step": 156
+    },
+    {
+      "epoch": 0.2132427843803056,
+      "grad_norm": 0.20645937323570251,
+      "learning_rate": 6.885474208441603e-05,
+      "loss": 1.1744,
+      "step": 157
+    },
+    {
+      "epoch": 0.21460101867572157,
+      "grad_norm": 0.21098513901233673,
+      "learning_rate": 6.848110061149556e-05,
+      "loss": 1.1768,
+      "step": 158
+    },
+    {
+      "epoch": 0.21595925297113752,
+      "grad_norm": 0.20801562070846558,
+      "learning_rate": 6.810625992644085e-05,
+      "loss": 1.1577,
+      "step": 159
+    },
+    {
+      "epoch": 0.21731748726655348,
+      "grad_norm": 0.22393718361854553,
+      "learning_rate": 6.773024435212678e-05,
+      "loss": 1.2199,
+      "step": 160
+    },
+    {
+      "epoch": 0.21867572156196943,
+      "grad_norm": 0.20644541084766388,
+      "learning_rate": 6.735307828766515e-05,
+      "loss": 1.1711,
+      "step": 161
+    },
+    {
+      "epoch": 0.2200339558573854,
+      "grad_norm": 0.21023766696453094,
+      "learning_rate": 6.697478620682137e-05,
+      "loss": 1.1736,
+      "step": 162
+    },
+    {
+      "epoch": 0.22139219015280137,
+      "grad_norm": 0.21561093628406525,
+      "learning_rate": 6.659539265642643e-05,
+      "loss": 1.1502,
+      "step": 163
+    },
+    {
+      "epoch": 0.22275042444821733,
+      "grad_norm": 0.2196631133556366,
+      "learning_rate": 6.621492225478414e-05,
+      "loss": 1.1813,
+      "step": 164
+    },
+    {
+      "epoch": 0.22410865874363328,
+      "grad_norm": 0.2159351408481598,
+      "learning_rate": 6.583339969007363e-05,
+      "loss": 1.1997,
+      "step": 165
+    },
+    {
+      "epoch": 0.22546689303904924,
+      "grad_norm": 0.215266153216362,
+      "learning_rate": 6.545084971874738e-05,
+      "loss": 1.0927,
+      "step": 166
+    },
+    {
+      "epoch": 0.2268251273344652,
+      "grad_norm": 0.22314059734344482,
+      "learning_rate": 6.506729716392481e-05,
+      "loss": 1.2044,
+      "step": 167
+    },
+    {
+      "epoch": 0.22818336162988115,
+      "grad_norm": 0.22275614738464355,
+      "learning_rate": 6.468276691378155e-05,
+      "loss": 1.2058,
+      "step": 168
+    },
+    {
+      "epoch": 0.2295415959252971,
+      "grad_norm": 0.22310510277748108,
+      "learning_rate": 6.429728391993446e-05,
+      "loss": 1.1506,
+      "step": 169
+    },
+    {
+      "epoch": 0.23089983022071306,
+      "grad_norm": 0.21441923081874847,
+      "learning_rate": 6.391087319582264e-05,
+      "loss": 1.1346,
+      "step": 170
+    },
+    {
+      "epoch": 0.23225806451612904,
+      "grad_norm": 0.2179337441921234,
+      "learning_rate": 6.35235598150842e-05,
+      "loss": 1.1355,
+      "step": 171
+    },
+    {
+      "epoch": 0.233616298811545,
+      "grad_norm": 0.2206660360097885,
+      "learning_rate": 6.313536890992935e-05,
+      "loss": 1.1346,
+      "step": 172
+    },
+    {
+      "epoch": 0.23497453310696095,
+      "grad_norm": 0.21749481558799744,
+      "learning_rate": 6.274632566950967e-05,
+      "loss": 1.1424,
+      "step": 173
+    },
+    {
+      "epoch": 0.2363327674023769,
+      "grad_norm": 0.21882691979408264,
+      "learning_rate": 6.235645533828349e-05,
+      "loss": 1.2118,
+      "step": 174
+    },
+    {
+      "epoch": 0.23769100169779286,
+      "grad_norm": 0.22239045798778534,
+      "learning_rate": 6.19657832143779e-05,
+      "loss": 1.1532,
+      "step": 175
+    },
+    {
+      "epoch": 0.23904923599320882,
+      "grad_norm": 0.22943776845932007,
+      "learning_rate": 6.157433464794716e-05,
+      "loss": 1.1658,
+      "step": 176
+    },
+    {
+      "epoch": 0.24040747028862477,
+      "grad_norm": 0.22224801778793335,
+      "learning_rate": 6.118213503952779e-05,
+      "loss": 1.2036,
+      "step": 177
+    },
+    {
+      "epoch": 0.24176570458404076,
+      "grad_norm": 0.23055648803710938,
+      "learning_rate": 6.078920983839031e-05,
+      "loss": 1.1496,
+      "step": 178
+    },
+    {
+      "epoch": 0.2431239388794567,
+      "grad_norm": 0.22937384247779846,
+      "learning_rate": 6.0395584540887963e-05,
+      "loss": 1.1748,
+      "step": 179
+    },
+    {
+      "epoch": 0.24448217317487267,
+      "grad_norm": 0.22974710166454315,
+      "learning_rate": 6.0001284688802226e-05,
+      "loss": 1.1668,
+      "step": 180
+    },
+    {
+      "epoch": 0.24584040747028862,
+      "grad_norm": 0.2364954799413681,
+      "learning_rate": 5.960633586768543e-05,
+      "loss": 1.1893,
+      "step": 181
+    },
+    {
+      "epoch": 0.24719864176570458,
+      "grad_norm": 0.2343706637620926,
+      "learning_rate": 5.921076370520058e-05,
+      "loss": 1.1655,
+      "step": 182
+    },
+    {
+      "epoch": 0.24855687606112054,
+      "grad_norm": 0.23308633267879486,
+      "learning_rate": 5.8814593869458455e-05,
+      "loss": 1.1332,
+      "step": 183
+    },
+    {
+      "epoch": 0.2499151103565365,
+      "grad_norm": 0.2331320345401764,
+      "learning_rate": 5.841785206735192e-05,
+      "loss": 1.088,
+      "step": 184
+    },
+    {
+      "epoch": 0.25127334465195245,
+      "grad_norm": 0.23977631330490112,
+      "learning_rate": 5.8020564042888015e-05,
+      "loss": 1.2078,
+      "step": 185
+    },
+    {
+      "epoch": 0.25263157894736843,
+      "grad_norm": 0.23800607025623322,
+      "learning_rate": 5.762275557551727e-05,
+      "loss": 1.1134,
+      "step": 186
+    },
+    {
+      "epoch": 0.25398981324278436,
+      "grad_norm": 0.24236267805099487,
+      "learning_rate": 5.7224452478461064e-05,
+      "loss": 1.2159,
+      "step": 187
+    },
+    {
+      "epoch": 0.25534804753820034,
+      "grad_norm": 0.24531729519367218,
+      "learning_rate": 5.682568059703659e-05,
+      "loss": 1.1084,
+      "step": 188
+    },
+    {
+      "epoch": 0.2567062818336163,
+      "grad_norm": 0.2564159333705902,
+      "learning_rate": 5.642646580697973e-05,
+      "loss": 1.0714,
+      "step": 189
+    },
+    {
+      "epoch": 0.25806451612903225,
+      "grad_norm": 0.2536662817001343,
+      "learning_rate": 5.602683401276615e-05,
+      "loss": 1.1332,
+      "step": 190
+    },
+    {
+      "epoch": 0.25942275042444823,
+      "grad_norm": 0.2573738098144531,
+      "learning_rate": 5.562681114593028e-05,
+      "loss": 1.1732,
+      "step": 191
+    },
+    {
+      "epoch": 0.26078098471986416,
+      "grad_norm": 0.28553032875061035,
+      "learning_rate": 5.522642316338268e-05,
+      "loss": 1.1813,
+      "step": 192
+    },
+    {
+      "epoch": 0.26213921901528014,
+      "grad_norm": 0.2642425000667572,
+      "learning_rate": 5.482569604572576e-05,
+      "loss": 1.1169,
+      "step": 193
+    },
+    {
+      "epoch": 0.2634974533106961,
+      "grad_norm": 0.27143600583076477,
+      "learning_rate": 5.442465579556793e-05,
+      "loss": 1.17,
+      "step": 194
+    },
+    {
+      "epoch": 0.26485568760611206,
+      "grad_norm": 0.27368876338005066,
+      "learning_rate": 5.402332843583631e-05,
+      "loss": 1.1743,
+      "step": 195
+    },
+    {
+      "epoch": 0.26621392190152804,
+      "grad_norm": 0.28669026494026184,
+      "learning_rate": 5.3621740008088126e-05,
+      "loss": 1.1476,
+      "step": 196
+    },
+    {
+      "epoch": 0.26757215619694397,
+      "grad_norm": 0.3066455125808716,
+      "learning_rate": 5.321991657082097e-05,
+      "loss": 1.2093,
+      "step": 197
+    },
+    {
+      "epoch": 0.26893039049235995,
+      "grad_norm": 0.3169887661933899,
+      "learning_rate": 5.281788419778187e-05,
+      "loss": 1.1844,
+      "step": 198
+    },
+    {
+      "epoch": 0.2702886247877759,
+      "grad_norm": 0.34708085656166077,
+      "learning_rate": 5.2415668976275355e-05,
+      "loss": 1.154,
+      "step": 199
+    },
+    {
+      "epoch": 0.27164685908319186,
+      "grad_norm": 0.39115723967552185,
+      "learning_rate": 5.201329700547076e-05,
+      "loss": 1.1696,
+      "step": 200
+    },
+    {
+      "epoch": 0.27164685908319186,
+      "eval_loss": 1.169433355331421,
+      "eval_runtime": 75.7878,
+      "eval_samples_per_second": 16.361,
+      "eval_steps_per_second": 4.09,
+      "step": 200
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.6109996434312397e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null