Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:453c1d5cf47fb4e3921e12014c1829ebb1eb709f2b6e1d36a97378a35d8f12b6
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:c0d69654c85da1ceb3fb9c1383635174ef9d431ff72d0d37f5d006adabab8a8d
 size 639691872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f3f280d6c7741c2b9b8aecfd959bb1e36d0f2e1f9256824b2ea2305104151e16
 size 1279641042

 version https://git-lfs.github.com/spec/v1
+oid sha256:6c18e364229a196511c72883765ce49a6d50d266e888498ac6a841d9d727f824
 size 1279641042

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7e56946399aabee448fd3c422a118df88cf2bc9a4a8936d73afb8a779bd8edd2
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e8ecd75598e972c810cf285aaa1e527eb3cd79801e76d98021a5e8cc302bbf63
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dfd59dca009004df561617f8f6994512d029a952a68609cac24b36df5a0757ce
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.6301707029342651,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.10542962572482868,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 7.242,
       "eval_steps_per_second": 3.622,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.63693695041536e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.6194185614585876,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.14057283429977158,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 7.242,
       "eval_steps_per_second": 3.622,
       "step": 150
+    },
+    {
+      "epoch": 0.10613248989632754,
+      "grad_norm": 0.16006144881248474,
+      "learning_rate": 1.9136088935510362e-05,
+      "loss": 0.269,
+      "step": 151
+    },
+    {
+      "epoch": 0.10683535406782639,
+      "grad_norm": 0.16216064989566803,
+      "learning_rate": 1.8414449687337464e-05,
+      "loss": 0.25,
+      "step": 152
+    },
+    {
+      "epoch": 0.10753821823932525,
+      "grad_norm": 0.13053874671459198,
+      "learning_rate": 1.7703596875660645e-05,
+      "loss": 0.2243,
+      "step": 153
+    },
+    {
+      "epoch": 0.10824108241082411,
+      "grad_norm": 0.15271152555942535,
+      "learning_rate": 1.700377325606388e-05,
+      "loss": 0.2549,
+      "step": 154
+    },
+    {
+      "epoch": 0.10894394658232297,
+      "grad_norm": 0.14837241172790527,
+      "learning_rate": 1.631521781767214e-05,
+      "loss": 0.2801,
+      "step": 155
+    },
+    {
+      "epoch": 0.10964681075382182,
+      "grad_norm": 0.18986637890338898,
+      "learning_rate": 1.5638165701536868e-05,
+      "loss": 0.3478,
+      "step": 156
+    },
+    {
+      "epoch": 0.11034967492532068,
+      "grad_norm": 0.1956043392419815,
+      "learning_rate": 1.4972848120335453e-05,
+      "loss": 0.3824,
+      "step": 157
+    },
+    {
+      "epoch": 0.11105253909681954,
+      "grad_norm": 0.19598978757858276,
+      "learning_rate": 1.4319492279412388e-05,
+      "loss": 0.469,
+      "step": 158
+    },
+    {
+      "epoch": 0.1117554032683184,
+      "grad_norm": 0.2101970613002777,
+      "learning_rate": 1.3678321299188801e-05,
+      "loss": 0.5021,
+      "step": 159
+    },
+    {
+      "epoch": 0.11245826743981725,
+      "grad_norm": 0.18322844803333282,
+      "learning_rate": 1.3049554138967051e-05,
+      "loss": 0.4139,
+      "step": 160
+    },
+    {
+      "epoch": 0.11316113161131611,
+      "grad_norm": 0.2107023447751999,
+      "learning_rate": 1.2433405522156332e-05,
+      "loss": 0.5289,
+      "step": 161
+    },
+    {
+      "epoch": 0.11386399578281498,
+      "grad_norm": 0.22936807572841644,
+      "learning_rate": 1.183008586294485e-05,
+      "loss": 0.597,
+      "step": 162
+    },
+    {
+      "epoch": 0.11456685995431383,
+      "grad_norm": 0.23180989921092987,
+      "learning_rate": 1.1239801194443506e-05,
+      "loss": 0.6878,
+      "step": 163
+    },
+    {
+      "epoch": 0.11526972412581268,
+      "grad_norm": 0.21876811981201172,
+      "learning_rate": 1.066275309832584e-05,
+      "loss": 0.5481,
+      "step": 164
+    },
+    {
+      "epoch": 0.11597258829731154,
+      "grad_norm": 0.23612289130687714,
+      "learning_rate": 1.0099138635988026e-05,
+      "loss": 0.6402,
+      "step": 165
+    },
+    {
+      "epoch": 0.1166754524688104,
+      "grad_norm": 0.22611941397190094,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.6465,
+      "step": 166
+    },
+    {
+      "epoch": 0.11737831664030926,
+      "grad_norm": 0.245264932513237,
+      "learning_rate": 9.012975854638949e-06,
+      "loss": 0.6396,
+      "step": 167
+    },
+    {
+      "epoch": 0.11808118081180811,
+      "grad_norm": 0.23365627229213715,
+      "learning_rate": 8.490798459222476e-06,
+      "loss": 0.5333,
+      "step": 168
+    },
+    {
+      "epoch": 0.11878404498330698,
+      "grad_norm": 0.23980385065078735,
+      "learning_rate": 7.982796418105371e-06,
+      "loss": 0.6043,
+      "step": 169
+    },
+    {
+      "epoch": 0.11948690915480584,
+      "grad_norm": 0.24617138504981995,
+      "learning_rate": 7.489143213519301e-06,
+      "loss": 0.6639,
+      "step": 170
+    },
+    {
+      "epoch": 0.12018977332630469,
+      "grad_norm": 0.23338913917541504,
+      "learning_rate": 7.010007427581378e-06,
+      "loss": 0.5935,
+      "step": 171
+    },
+    {
+      "epoch": 0.12089263749780355,
+      "grad_norm": 0.25520893931388855,
+      "learning_rate": 6.5455526847235825e-06,
+      "loss": 0.6685,
+      "step": 172
+    },
+    {
+      "epoch": 0.12159550166930241,
+      "grad_norm": 0.2546359896659851,
+      "learning_rate": 6.0959375958151045e-06,
+      "loss": 0.6254,
+      "step": 173
+    },
+    {
+      "epoch": 0.12229836584080127,
+      "grad_norm": 0.25202396512031555,
+      "learning_rate": 5.6613157039969055e-06,
+      "loss": 0.7532,
+      "step": 174
+    },
+    {
+      "epoch": 0.12300123001230012,
+      "grad_norm": 0.2818569540977478,
+      "learning_rate": 5.241835432246889e-06,
+      "loss": 0.7886,
+      "step": 175
+    },
+    {
+      "epoch": 0.12370409418379898,
+      "grad_norm": 0.2797452211380005,
+      "learning_rate": 4.837640032693558e-06,
+      "loss": 0.7124,
+      "step": 176
+    },
+    {
+      "epoch": 0.12440695835529784,
+      "grad_norm": 0.27972176671028137,
+      "learning_rate": 4.448867537695578e-06,
+      "loss": 0.8054,
+      "step": 177
+    },
+    {
+      "epoch": 0.1251098225267967,
+      "grad_norm": 0.27351635694503784,
+      "learning_rate": 4.075650712703849e-06,
+      "loss": 0.7271,
+      "step": 178
+    },
+    {
+      "epoch": 0.12581268669829557,
+      "grad_norm": 0.2771807312965393,
+      "learning_rate": 3.71811701092219e-06,
+      "loss": 0.6967,
+      "step": 179
+    },
+    {
+      "epoch": 0.1265155508697944,
+      "grad_norm": 0.28817304968833923,
+      "learning_rate": 3.376388529782215e-06,
+      "loss": 0.762,
+      "step": 180
+    },
+    {
+      "epoch": 0.12721841504129328,
+      "grad_norm": 0.3513469099998474,
+      "learning_rate": 3.0505819692471792e-06,
+      "loss": 0.7149,
+      "step": 181
+    },
+    {
+      "epoch": 0.12792127921279212,
+      "grad_norm": 0.30512431263923645,
+      "learning_rate": 2.7408085919590264e-06,
+      "loss": 0.6649,
+      "step": 182
+    },
+    {
+      "epoch": 0.12862414338429098,
+      "grad_norm": 0.38616737723350525,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.7642,
+      "step": 183
+    },
+    {
+      "epoch": 0.12932700755578985,
+      "grad_norm": 0.4033832550048828,
+      "learning_rate": 2.1697790249779636e-06,
+      "loss": 0.6318,
+      "step": 184
+    },
+    {
+      "epoch": 0.1300298717272887,
+      "grad_norm": 0.3767646253108978,
+      "learning_rate": 1.908717841359048e-06,
+      "loss": 0.6769,
+      "step": 185
+    },
+    {
+      "epoch": 0.13073273589878756,
+      "grad_norm": 0.432635098695755,
+      "learning_rate": 1.6640797865406288e-06,
+      "loss": 0.7082,
+      "step": 186
+    },
+    {
+      "epoch": 0.13143560007028643,
+      "grad_norm": 0.36078161001205444,
+      "learning_rate": 1.4359484041943038e-06,
+      "loss": 0.8317,
+      "step": 187
+    },
+    {
+      "epoch": 0.13213846424178527,
+      "grad_norm": 0.3384253978729248,
+      "learning_rate": 1.2244016009781701e-06,
+      "loss": 0.8636,
+      "step": 188
+    },
+    {
+      "epoch": 0.13284132841328414,
+      "grad_norm": 0.30498048663139343,
+      "learning_rate": 1.0295116199317057e-06,
+      "loss": 0.777,
+      "step": 189
+    },
+    {
+      "epoch": 0.13354419258478298,
+      "grad_norm": 0.28438106179237366,
+      "learning_rate": 8.513450158049108e-07,
+      "loss": 0.8401,
+      "step": 190
+    },
+    {
+      "epoch": 0.13424705675628185,
+      "grad_norm": 0.2901749312877655,
+      "learning_rate": 6.899626323298713e-07,
+      "loss": 0.8142,
+      "step": 191
+    },
+    {
+      "epoch": 0.13494992092778071,
+      "grad_norm": 0.2804357707500458,
+      "learning_rate": 5.454195814427021e-07,
+      "loss": 0.777,
+      "step": 192
+    },
+    {
+      "epoch": 0.13565278509927955,
+      "grad_norm": 0.30183541774749756,
+      "learning_rate": 4.177652244628627e-07,
+      "loss": 0.868,
+      "step": 193
+    },
+    {
+      "epoch": 0.13635564927077842,
+      "grad_norm": 0.3358307480812073,
+      "learning_rate": 3.0704315523631953e-07,
+      "loss": 0.8041,
+      "step": 194
+    },
+    {
+      "epoch": 0.1370585134422773,
+      "grad_norm": 0.3486460745334625,
+      "learning_rate": 2.1329118524827662e-07,
+      "loss": 0.807,
+      "step": 195
+    },
+    {
+      "epoch": 0.13776137761377613,
+      "grad_norm": 0.341403603553772,
+      "learning_rate": 1.3654133071059893e-07,
+      "loss": 0.8237,
+      "step": 196
+    },
+    {
+      "epoch": 0.138464241785275,
+      "grad_norm": 0.3632611632347107,
+      "learning_rate": 7.681980162830282e-08,
+      "loss": 0.8123,
+      "step": 197
+    },
+    {
+      "epoch": 0.13916710595677384,
+      "grad_norm": 0.38071852922439575,
+      "learning_rate": 3.4146992848854695e-08,
+      "loss": 0.8128,
+      "step": 198
+    },
+    {
+      "epoch": 0.1398699701282727,
+      "grad_norm": 0.40551653504371643,
+      "learning_rate": 8.537477097364522e-09,
+      "loss": 0.876,
+      "step": 199
+    },
+    {
+      "epoch": 0.14057283429977158,
+      "grad_norm": 0.4792748689651489,
+      "learning_rate": 0.0,
+      "loss": 0.8343,
+      "step": 200
+    },
+    {
+      "epoch": 0.14057283429977158,
+      "eval_loss": 0.6194185614585876,
+      "eval_runtime": 331.5493,
+      "eval_samples_per_second": 7.23,
+      "eval_steps_per_second": 3.616,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.83803453718528e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null