Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1017f957dcded6000e12603fb76f2553ed487d24bcc3527967bb89acb47530de
 size 891333320

 version https://git-lfs.github.com/spec/v1
+oid sha256:8b767ba5e8ce751880f3f4ebf83a1266322e85b92bc8b348e2d66e8c93c680fd
 size 891333320

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a97ba2db2f918f2d15bcc6390b2cc7806f327b00b544a9c57b0fb0b0ad36a885
 size 452984596

 version https://git-lfs.github.com/spec/v1
+oid sha256:93efffe424054f7a894a471b76063c80482a406f38b102e5bac40d23b264e731
 size 452984596

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:57645f2c6130b0019a3d70e4f7a375692f5e849e44c5f7d42df1b8670004659b
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:151ba8290837d1789e85c8f0409fc783ab80ce1480475b46c68232fab884254c
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:993ec630ffd55a49125eacfd3aa86aa61c305caeb20b41b21cdd9a15eb377712
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:8fb6e84b61e1d7eca31b5c24862f17cdfbe140bbf377a76353176857bcb8fef1
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.2351786196231842,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.12269938650306748,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 3.115,
       "eval_steps_per_second": 0.78,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.870788720328704e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.21266202628612518,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.16359918200409,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 3.115,
       "eval_steps_per_second": 0.78,
       "step": 150
+    },
+    {
+      "epoch": 0.12351738241308793,
+      "grad_norm": 0.430698424577713,
+      "learning_rate": 2.6098947368421053e-05,
+      "loss": 0.838,
+      "step": 151
+    },
+    {
+      "epoch": 0.12433537832310838,
+      "grad_norm": 0.4114360213279724,
+      "learning_rate": 2.5566315789473684e-05,
+      "loss": 0.6347,
+      "step": 152
+    },
+    {
+      "epoch": 0.12515337423312883,
+      "grad_norm": 5.368963241577148,
+      "learning_rate": 2.5033684210526312e-05,
+      "loss": 1.7455,
+      "step": 153
+    },
+    {
+      "epoch": 0.1259713701431493,
+      "grad_norm": 0.36379197239875793,
+      "learning_rate": 2.4501052631578947e-05,
+      "loss": 0.5644,
+      "step": 154
+    },
+    {
+      "epoch": 0.12678936605316973,
+      "grad_norm": 0.3544858694076538,
+      "learning_rate": 2.3968421052631575e-05,
+      "loss": 0.5105,
+      "step": 155
+    },
+    {
+      "epoch": 0.1276073619631902,
+      "grad_norm": 0.3365378975868225,
+      "learning_rate": 2.343578947368421e-05,
+      "loss": 0.4261,
+      "step": 156
+    },
+    {
+      "epoch": 0.12842535787321063,
+      "grad_norm": 0.4293052852153778,
+      "learning_rate": 2.290315789473684e-05,
+      "loss": 0.695,
+      "step": 157
+    },
+    {
+      "epoch": 0.1292433537832311,
+      "grad_norm": 0.5024716854095459,
+      "learning_rate": 2.237052631578947e-05,
+      "loss": 0.7874,
+      "step": 158
+    },
+    {
+      "epoch": 0.13006134969325153,
+      "grad_norm": 0.4503779113292694,
+      "learning_rate": 2.1837894736842106e-05,
+      "loss": 0.6787,
+      "step": 159
+    },
+    {
+      "epoch": 0.130879345603272,
+      "grad_norm": 0.5354055166244507,
+      "learning_rate": 2.1305263157894734e-05,
+      "loss": 0.8901,
+      "step": 160
+    },
+    {
+      "epoch": 0.13169734151329243,
+      "grad_norm": 0.6013686656951904,
+      "learning_rate": 2.0772631578947368e-05,
+      "loss": 0.6101,
+      "step": 161
+    },
+    {
+      "epoch": 0.1325153374233129,
+      "grad_norm": 0.5253039002418518,
+      "learning_rate": 2.024e-05,
+      "loss": 0.6207,
+      "step": 162
+    },
+    {
+      "epoch": 0.13333333333333333,
+      "grad_norm": 0.5484157800674438,
+      "learning_rate": 1.970736842105263e-05,
+      "loss": 0.63,
+      "step": 163
+    },
+    {
+      "epoch": 0.1341513292433538,
+      "grad_norm": 0.376302570104599,
+      "learning_rate": 1.917473684210526e-05,
+      "loss": 0.3893,
+      "step": 164
+    },
+    {
+      "epoch": 0.13496932515337423,
+      "grad_norm": 0.41201335191726685,
+      "learning_rate": 1.8642105263157893e-05,
+      "loss": 0.4192,
+      "step": 165
+    },
+    {
+      "epoch": 0.1357873210633947,
+      "grad_norm": 0.69189453125,
+      "learning_rate": 1.8109473684210524e-05,
+      "loss": 0.8196,
+      "step": 166
+    },
+    {
+      "epoch": 0.13660531697341513,
+      "grad_norm": 0.3967001140117645,
+      "learning_rate": 1.757684210526316e-05,
+      "loss": 0.4478,
+      "step": 167
+    },
+    {
+      "epoch": 0.1374233128834356,
+      "grad_norm": 0.40037596225738525,
+      "learning_rate": 1.704421052631579e-05,
+      "loss": 0.437,
+      "step": 168
+    },
+    {
+      "epoch": 0.13824130879345603,
+      "grad_norm": 0.4589173197746277,
+      "learning_rate": 1.651157894736842e-05,
+      "loss": 0.5041,
+      "step": 169
+    },
+    {
+      "epoch": 0.1390593047034765,
+      "grad_norm": 0.5317126512527466,
+      "learning_rate": 1.5978947368421052e-05,
+      "loss": 0.5844,
+      "step": 170
+    },
+    {
+      "epoch": 0.13987730061349693,
+      "grad_norm": 0.6097099184989929,
+      "learning_rate": 1.5446315789473683e-05,
+      "loss": 0.8981,
+      "step": 171
+    },
+    {
+      "epoch": 0.1406952965235174,
+      "grad_norm": 0.5526396632194519,
+      "learning_rate": 1.4913684210526314e-05,
+      "loss": 0.5214,
+      "step": 172
+    },
+    {
+      "epoch": 0.14151329243353783,
+      "grad_norm": 0.49050372838974,
+      "learning_rate": 1.4381052631578945e-05,
+      "loss": 0.3992,
+      "step": 173
+    },
+    {
+      "epoch": 0.1423312883435583,
+      "grad_norm": 0.6631816029548645,
+      "learning_rate": 1.3848421052631578e-05,
+      "loss": 0.7089,
+      "step": 174
+    },
+    {
+      "epoch": 0.14314928425357873,
+      "grad_norm": 0.6019887328147888,
+      "learning_rate": 1.331578947368421e-05,
+      "loss": 0.6868,
+      "step": 175
+    },
+    {
+      "epoch": 0.1439672801635992,
+      "grad_norm": 0.46158432960510254,
+      "learning_rate": 1.2783157894736842e-05,
+      "loss": 0.4053,
+      "step": 176
+    },
+    {
+      "epoch": 0.14478527607361963,
+      "grad_norm": 3.7597968578338623,
+      "learning_rate": 1.2250526315789473e-05,
+      "loss": 0.6116,
+      "step": 177
+    },
+    {
+      "epoch": 0.1456032719836401,
+      "grad_norm": 0.3607610762119293,
+      "learning_rate": 1.1717894736842105e-05,
+      "loss": 0.2041,
+      "step": 178
+    },
+    {
+      "epoch": 0.14642126789366053,
+      "grad_norm": 0.6213468313217163,
+      "learning_rate": 1.1185263157894736e-05,
+      "loss": 0.6546,
+      "step": 179
+    },
+    {
+      "epoch": 0.147239263803681,
+      "grad_norm": 0.4181584119796753,
+      "learning_rate": 1.0652631578947367e-05,
+      "loss": 0.3367,
+      "step": 180
+    },
+    {
+      "epoch": 0.14805725971370143,
+      "grad_norm": 0.7935755848884583,
+      "learning_rate": 1.012e-05,
+      "loss": 0.7382,
+      "step": 181
+    },
+    {
+      "epoch": 0.1488752556237219,
+      "grad_norm": 0.7149289846420288,
+      "learning_rate": 9.58736842105263e-06,
+      "loss": 0.6995,
+      "step": 182
+    },
+    {
+      "epoch": 0.14969325153374233,
+      "grad_norm": 0.6175360083580017,
+      "learning_rate": 9.054736842105262e-06,
+      "loss": 0.515,
+      "step": 183
+    },
+    {
+      "epoch": 0.1505112474437628,
+      "grad_norm": 0.432099848985672,
+      "learning_rate": 8.522105263157895e-06,
+      "loss": 0.222,
+      "step": 184
+    },
+    {
+      "epoch": 0.15132924335378323,
+      "grad_norm": 0.14052408933639526,
+      "learning_rate": 7.989473684210526e-06,
+      "loss": 0.0522,
+      "step": 185
+    },
+    {
+      "epoch": 0.1521472392638037,
+      "grad_norm": 0.06722941249608994,
+      "learning_rate": 7.456842105263157e-06,
+      "loss": 0.0043,
+      "step": 186
+    },
+    {
+      "epoch": 0.15296523517382413,
+      "grad_norm": 0.015035667456686497,
+      "learning_rate": 6.924210526315789e-06,
+      "loss": 0.0008,
+      "step": 187
+    },
+    {
+      "epoch": 0.1537832310838446,
+      "grad_norm": 0.15017950534820557,
+      "learning_rate": 6.391578947368421e-06,
+      "loss": 0.003,
+      "step": 188
+    },
+    {
+      "epoch": 0.15460122699386503,
+      "grad_norm": 0.018131496384739876,
+      "learning_rate": 5.858947368421052e-06,
+      "loss": 0.001,
+      "step": 189
+    },
+    {
+      "epoch": 0.1554192229038855,
+      "grad_norm": 0.015157670713961124,
+      "learning_rate": 5.326315789473683e-06,
+      "loss": 0.0008,
+      "step": 190
+    },
+    {
+      "epoch": 0.15623721881390593,
+      "grad_norm": 0.11725586652755737,
+      "learning_rate": 4.793684210526315e-06,
+      "loss": 0.0067,
+      "step": 191
+    },
+    {
+      "epoch": 0.1570552147239264,
+      "grad_norm": 0.043377045542001724,
+      "learning_rate": 4.261052631578947e-06,
+      "loss": 0.0024,
+      "step": 192
+    },
+    {
+      "epoch": 0.15787321063394683,
+      "grad_norm": 0.10283850133419037,
+      "learning_rate": 3.7284210526315786e-06,
+      "loss": 0.004,
+      "step": 193
+    },
+    {
+      "epoch": 0.1586912065439673,
+      "grad_norm": 0.1786062866449356,
+      "learning_rate": 3.1957894736842106e-06,
+      "loss": 0.0071,
+      "step": 194
+    },
+    {
+      "epoch": 0.15950920245398773,
+      "grad_norm": 0.13341274857521057,
+      "learning_rate": 2.6631578947368417e-06,
+      "loss": 0.0007,
+      "step": 195
+    },
+    {
+      "epoch": 0.1603271983640082,
+      "grad_norm": 0.1136331856250763,
+      "learning_rate": 2.1305263157894737e-06,
+      "loss": 0.0033,
+      "step": 196
+    },
+    {
+      "epoch": 0.16114519427402862,
+      "grad_norm": 0.052339375019073486,
+      "learning_rate": 1.5978947368421053e-06,
+      "loss": 0.002,
+      "step": 197
+    },
+    {
+      "epoch": 0.1619631901840491,
+      "grad_norm": 0.01893027499318123,
+      "learning_rate": 1.0652631578947369e-06,
+      "loss": 0.0008,
+      "step": 198
+    },
+    {
+      "epoch": 0.16278118609406952,
+      "grad_norm": 0.01076345145702362,
+      "learning_rate": 5.326315789473684e-07,
+      "loss": 0.0006,
+      "step": 199
+    },
+    {
+      "epoch": 0.16359918200409,
+      "grad_norm": 0.029393598437309265,
+      "learning_rate": 0.0,
+      "loss": 0.0015,
+      "step": 200
+    },
+    {
+      "epoch": 0.16359918200409,
+      "eval_loss": 0.21266202628612518,
+      "eval_runtime": 165.9021,
+      "eval_samples_per_second": 3.104,
+      "eval_steps_per_second": 0.778,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.484049111154688e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null