Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cf29443debbaeb8b5f6be635ecf3f3047541778f494fcf8578b48c5673875b5f
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:67ec32acdd200b3d6c0914fb315a322f2177453b9059dbe4be465b88716294c8
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:313686e53d2b25a7cd48a6ca83b9283964f660070b9d2873bb008d29585307f1
 size 341314196

 version https://git-lfs.github.com/spec/v1
+oid sha256:b95f258ffc5f1e8caab2f20eea0b07f7d27d3b5b3f57772bc4db62e71b528790
 size 341314196

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b2d254e573e38a2c7feb17e84a4cc124ee608a87daa1ea24234af541b177d203
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:8155c8a248323f073e1f0baac921acee2f24fe33546a62224d3e6bbbb50ae56b
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b26348e70deb5f7d6cee52f5eb084c0e6829440001a3c6d0128b93cd074af8c2
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b60d6f1383abda4776549360effee800fe6cfe2c0604503e9e3fbaa79347f790
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.00024003432190511376,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 1.2958963282937366,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 9.388,
       "eval_steps_per_second": 2.359,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1103,7 +1461,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.0316408827858125e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.00024003432190511376,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 1.7278617710583153,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.388,
       "eval_steps_per_second": 2.359,
       "step": 150
+    },
+    {
+      "epoch": 1.3045356371490282,
+      "grad_norm": 0.0014002382522448897,
+      "learning_rate": 1.0872630680850196e-05,
+      "loss": 0.0001,
+      "step": 151
+    },
+    {
+      "epoch": 1.3131749460043196,
+      "grad_norm": 0.0010238329414278269,
+      "learning_rate": 1.0456712550462898e-05,
+      "loss": 0.0002,
+      "step": 152
+    },
+    {
+      "epoch": 1.3218142548596112,
+      "grad_norm": 0.0012431687209755182,
+      "learning_rate": 1.0047504309801104e-05,
+      "loss": 0.0001,
+      "step": 153
+    },
+    {
+      "epoch": 1.3304535637149029,
+      "grad_norm": 0.0026770096737891436,
+      "learning_rate": 9.645117832311886e-06,
+      "loss": 0.0001,
+      "step": 154
+    },
+    {
+      "epoch": 1.3390928725701943,
+      "grad_norm": 0.01820327155292034,
+      "learning_rate": 9.249663126440394e-06,
+      "loss": 0.0002,
+      "step": 155
+    },
+    {
+      "epoch": 1.347732181425486,
+      "grad_norm": 0.01555480808019638,
+      "learning_rate": 8.861248305554624e-06,
+      "loss": 0.0002,
+      "step": 156
+    },
+    {
+      "epoch": 1.3563714902807775,
+      "grad_norm": 0.0015801583649590611,
+      "learning_rate": 8.47997955838829e-06,
+      "loss": 0.0002,
+      "step": 157
+    },
+    {
+      "epoch": 1.3650107991360692,
+      "grad_norm": 0.10153518617153168,
+      "learning_rate": 8.10596112000994e-06,
+      "loss": 0.0004,
+      "step": 158
+    },
+    {
+      "epoch": 1.3736501079913608,
+      "grad_norm": 0.0009876766707748175,
+      "learning_rate": 7.739295243326067e-06,
+      "loss": 0.0001,
+      "step": 159
+    },
+    {
+      "epoch": 1.3822894168466522,
+      "grad_norm": 0.0026555354706943035,
+      "learning_rate": 7.380082171126228e-06,
+      "loss": 0.0002,
+      "step": 160
+    },
+    {
+      "epoch": 1.3909287257019438,
+      "grad_norm": 0.0006382952560670674,
+      "learning_rate": 7.028420108677635e-06,
+      "loss": 0.0001,
+      "step": 161
+    },
+    {
+      "epoch": 1.3995680345572354,
+      "grad_norm": 0.0014154494274407625,
+      "learning_rate": 6.684405196876842e-06,
+      "loss": 0.0001,
+      "step": 162
+    },
+    {
+      "epoch": 1.408207343412527,
+      "grad_norm": 0.0011231210082769394,
+      "learning_rate": 6.3481314859657675e-06,
+      "loss": 0.0001,
+      "step": 163
+    },
+    {
+      "epoch": 1.4168466522678185,
+      "grad_norm": 0.0014514840440824628,
+      "learning_rate": 6.019690909819298e-06,
+      "loss": 0.0001,
+      "step": 164
+    },
+    {
+      "epoch": 1.42548596112311,
+      "grad_norm": 0.0013826994691044092,
+      "learning_rate": 5.6991732608115e-06,
+      "loss": 0.0001,
+      "step": 165
+    },
+    {
+      "epoch": 1.4341252699784017,
+      "grad_norm": 0.0013565586414188147,
+      "learning_rate": 5.386666165267256e-06,
+      "loss": 0.0001,
+      "step": 166
+    },
+    {
+      "epoch": 1.4427645788336934,
+      "grad_norm": 0.007900253869593143,
+      "learning_rate": 5.08225505950613e-06,
+      "loss": 0.0001,
+      "step": 167
+    },
+    {
+      "epoch": 1.451403887688985,
+      "grad_norm": 0.0012855289969593287,
+      "learning_rate": 4.786023166484913e-06,
+      "loss": 0.0001,
+      "step": 168
+    },
+    {
+      "epoch": 1.4600431965442764,
+      "grad_norm": 0.01582699827849865,
+      "learning_rate": 4.498051473045291e-06,
+      "loss": 0.0002,
+      "step": 169
+    },
+    {
+      "epoch": 1.468682505399568,
+      "grad_norm": 0.0007394661079160869,
+      "learning_rate": 4.218418707772886e-06,
+      "loss": 0.0001,
+      "step": 170
+    },
+    {
+      "epoch": 1.4773218142548596,
+      "grad_norm": 0.0008164668688550591,
+      "learning_rate": 3.947201319473587e-06,
+      "loss": 0.0001,
+      "step": 171
+    },
+    {
+      "epoch": 1.485961123110151,
+      "grad_norm": 0.002124256454408169,
+      "learning_rate": 3.684473456273278e-06,
+      "loss": 0.0002,
+      "step": 172
+    },
+    {
+      "epoch": 1.4946004319654427,
+      "grad_norm": 0.00903933122754097,
+      "learning_rate": 3.4303069453464383e-06,
+      "loss": 0.0003,
+      "step": 173
+    },
+    {
+      "epoch": 1.5032397408207343,
+      "grad_norm": 0.017047259956598282,
+      "learning_rate": 3.184771273279312e-06,
+      "loss": 0.0006,
+      "step": 174
+    },
+    {
+      "epoch": 1.511879049676026,
+      "grad_norm": 0.01836921088397503,
+      "learning_rate": 2.947933567072987e-06,
+      "loss": 0.0006,
+      "step": 175
+    },
+    {
+      "epoch": 1.5205183585313176,
+      "grad_norm": 0.0054769194684922695,
+      "learning_rate": 2.719858575791534e-06,
+      "loss": 0.0003,
+      "step": 176
+    },
+    {
+      "epoch": 1.5291576673866092,
+      "grad_norm": 0.05894150957465172,
+      "learning_rate": 2.500608652860256e-06,
+      "loss": 0.0009,
+      "step": 177
+    },
+    {
+      "epoch": 1.5377969762419006,
+      "grad_norm": 0.0017646638443693519,
+      "learning_rate": 2.2902437390188737e-06,
+      "loss": 0.0002,
+      "step": 178
+    },
+    {
+      "epoch": 1.5464362850971922,
+      "grad_norm": 0.0016004899516701698,
+      "learning_rate": 2.0888213459343587e-06,
+      "loss": 0.0002,
+      "step": 179
+    },
+    {
+      "epoch": 1.5550755939524838,
+      "grad_norm": 0.0009360113763250411,
+      "learning_rate": 1.8963965404777875e-06,
+      "loss": 0.0001,
+      "step": 180
+    },
+    {
+      "epoch": 1.5637149028077753,
+      "grad_norm": 0.0009685845579952002,
+      "learning_rate": 1.7130219296696263e-06,
+      "loss": 0.0001,
+      "step": 181
+    },
+    {
+      "epoch": 1.5723542116630669,
+      "grad_norm": 0.0022252460476011038,
+      "learning_rate": 1.5387476462974824e-06,
+      "loss": 0.0003,
+      "step": 182
+    },
+    {
+      "epoch": 1.5809935205183585,
+      "grad_norm": 0.0034834735561162233,
+      "learning_rate": 1.3736213352103147e-06,
+      "loss": 0.0001,
+      "step": 183
+    },
+    {
+      "epoch": 1.5896328293736501,
+      "grad_norm": 0.0007790013332851231,
+      "learning_rate": 1.2176881402928002e-06,
+      "loss": 0.0001,
+      "step": 184
+    },
+    {
+      "epoch": 1.5982721382289418,
+      "grad_norm": 0.0013246826129034162,
+      "learning_rate": 1.0709906921234367e-06,
+      "loss": 0.0001,
+      "step": 185
+    },
+    {
+      "epoch": 1.6069114470842334,
+      "grad_norm": 0.0007768021896481514,
+      "learning_rate": 9.33569096319799e-07,
+      "loss": 0.0001,
+      "step": 186
+    },
+    {
+      "epoch": 1.6155507559395248,
+      "grad_norm": 0.0009022291051223874,
+      "learning_rate": 8.054609225740255e-07,
+      "loss": 0.0001,
+      "step": 187
+    },
+    {
+      "epoch": 1.6241900647948164,
+      "grad_norm": 0.0007956126355566084,
+      "learning_rate": 6.867011943816724e-07,
+      "loss": 0.0001,
+      "step": 188
+    },
+    {
+      "epoch": 1.6328293736501078,
+      "grad_norm": 0.0012011040234938264,
+      "learning_rate": 5.77322379466617e-07,
+      "loss": 0.0001,
+      "step": 189
+    },
+    {
+      "epoch": 1.6414686825053995,
+      "grad_norm": 0.0011474161874502897,
+      "learning_rate": 4.773543809047186e-07,
+      "loss": 0.0001,
+      "step": 190
+    },
+    {
+      "epoch": 1.650107991360691,
+      "grad_norm": 0.0006911220261827111,
+      "learning_rate": 3.868245289486027e-07,
+      "loss": 0.0001,
+      "step": 191
+    },
+    {
+      "epoch": 1.6587473002159827,
+      "grad_norm": 0.0015587140806019306,
+      "learning_rate": 3.0575757355586817e-07,
+      "loss": 0.0001,
+      "step": 192
+    },
+    {
+      "epoch": 1.6673866090712743,
+      "grad_norm": 0.0011123515432700515,
+      "learning_rate": 2.3417567762266497e-07,
+      "loss": 0.0001,
+      "step": 193
+    },
+    {
+      "epoch": 1.676025917926566,
+      "grad_norm": 0.002342136111110449,
+      "learning_rate": 1.7209841092460043e-07,
+      "loss": 0.0001,
+      "step": 194
+    },
+    {
+      "epoch": 1.6846652267818576,
+      "grad_norm": 0.0016477032331749797,
+      "learning_rate": 1.1954274476655534e-07,
+      "loss": 0.0001,
+      "step": 195
+    },
+    {
+      "epoch": 1.693304535637149,
+      "grad_norm": 0.0014787918189540505,
+      "learning_rate": 7.652304734289127e-08,
+      "loss": 0.0001,
+      "step": 196
+    },
+    {
+      "epoch": 1.7019438444924406,
+      "grad_norm": 0.00296723167411983,
+      "learning_rate": 4.30510798093342e-08,
+      "loss": 0.0001,
+      "step": 197
+    },
+    {
+      "epoch": 1.710583153347732,
+      "grad_norm": 0.0015325212152674794,
+      "learning_rate": 1.9135993067588284e-08,
+      "loss": 0.0001,
+      "step": 198
+    },
+    {
+      "epoch": 1.7192224622030237,
+      "grad_norm": 0.00978434830904007,
+      "learning_rate": 4.784325263584854e-09,
+      "loss": 0.0002,
+      "step": 199
+    },
+    {
+      "epoch": 1.7278617710583153,
+      "grad_norm": 0.0022683811839669943,
+      "learning_rate": 0.0,
+      "loss": 0.0002,
+      "step": 200
+    },
+    {
+      "epoch": 1.7278617710583153,
+      "eval_loss": 0.00024209167168010026,
+      "eval_runtime": 20.7713,
+      "eval_samples_per_second": 9.388,
+      "eval_steps_per_second": 2.359,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.970690585555108e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null