Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:327bb09f07fadbb90419314f06fdf5a08855d88e3be0f9bb0713dc59e442d0e5
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:b3bf0784eff6ef207dcf81247f4b13dbc3535c71445a56dcd0d9aa6b86f15bb0
 size 639691872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea2ba130d7a8741b336dbe6dd615ad8beca8a93406d12b4568d879860dec51a7
 size 325339796

 version https://git-lfs.github.com/spec/v1
+oid sha256:084d9551da2d31c36e832b6718ed99fc5eb7c8158f32594366161a5af4e6b490
 size 325339796

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6105b16f046d46648a1f26f3589a752c7b16b448db198e4695cc445aa7c59d08
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:cbaa49b9aac57396fb70fa3deaa5fece67e5e8bebb3857e5382d74ed81e912c3
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8685a85e01d0081c4ee6b3d27083bc45de61653fc346f2b531f3e09e6eff0d83
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:4f96196bd3544de2c28f6af356470f327df948539b0e3259c46b8a6786b633fd
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.8605657815933228,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.4032258064516129,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 7.835,
       "eval_steps_per_second": 1.996,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.98940747186176e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.7936509251594543,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.5376344086021505,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 7.835,
       "eval_steps_per_second": 1.996,
       "step": 150
+    },
+    {
+      "epoch": 0.40591397849462363,
+      "grad_norm": 2.3321237564086914,
+      "learning_rate": 2.599578947368421e-05,
+      "loss": 3.6574,
+      "step": 151
+    },
+    {
+      "epoch": 0.40860215053763443,
+      "grad_norm": 2.897630214691162,
+      "learning_rate": 2.5465263157894738e-05,
+      "loss": 2.9582,
+      "step": 152
+    },
+    {
+      "epoch": 0.4112903225806452,
+      "grad_norm": 2.3918817043304443,
+      "learning_rate": 2.493473684210526e-05,
+      "loss": 2.7901,
+      "step": 153
+    },
+    {
+      "epoch": 0.41397849462365593,
+      "grad_norm": 2.5373759269714355,
+      "learning_rate": 2.440421052631579e-05,
+      "loss": 3.1311,
+      "step": 154
+    },
+    {
+      "epoch": 0.4166666666666667,
+      "grad_norm": 2.5285072326660156,
+      "learning_rate": 2.3873684210526313e-05,
+      "loss": 2.5189,
+      "step": 155
+    },
+    {
+      "epoch": 0.41935483870967744,
+      "grad_norm": 2.504533052444458,
+      "learning_rate": 2.3343157894736843e-05,
+      "loss": 3.2569,
+      "step": 156
+    },
+    {
+      "epoch": 0.4220430107526882,
+      "grad_norm": 2.3029768466949463,
+      "learning_rate": 2.281263157894737e-05,
+      "loss": 2.6988,
+      "step": 157
+    },
+    {
+      "epoch": 0.42473118279569894,
+      "grad_norm": 2.2616817951202393,
+      "learning_rate": 2.2282105263157892e-05,
+      "loss": 2.3272,
+      "step": 158
+    },
+    {
+      "epoch": 0.4274193548387097,
+      "grad_norm": 2.2895076274871826,
+      "learning_rate": 2.175157894736842e-05,
+      "loss": 2.3862,
+      "step": 159
+    },
+    {
+      "epoch": 0.43010752688172044,
+      "grad_norm": 1.9649949073791504,
+      "learning_rate": 2.1221052631578944e-05,
+      "loss": 2.1478,
+      "step": 160
+    },
+    {
+      "epoch": 0.4327956989247312,
+      "grad_norm": 2.0496561527252197,
+      "learning_rate": 2.0690526315789474e-05,
+      "loss": 1.3665,
+      "step": 161
+    },
+    {
+      "epoch": 0.43548387096774194,
+      "grad_norm": 2.4721364974975586,
+      "learning_rate": 2.016e-05,
+      "loss": 3.1437,
+      "step": 162
+    },
+    {
+      "epoch": 0.4381720430107527,
+      "grad_norm": 2.5533034801483154,
+      "learning_rate": 1.9629473684210526e-05,
+      "loss": 2.2656,
+      "step": 163
+    },
+    {
+      "epoch": 0.44086021505376344,
+      "grad_norm": 1.7975753545761108,
+      "learning_rate": 1.9098947368421053e-05,
+      "loss": 1.4366,
+      "step": 164
+    },
+    {
+      "epoch": 0.4435483870967742,
+      "grad_norm": 2.068775177001953,
+      "learning_rate": 1.856842105263158e-05,
+      "loss": 1.3049,
+      "step": 165
+    },
+    {
+      "epoch": 0.44623655913978494,
+      "grad_norm": 2.0522570610046387,
+      "learning_rate": 1.8037894736842105e-05,
+      "loss": 1.8766,
+      "step": 166
+    },
+    {
+      "epoch": 0.4489247311827957,
+      "grad_norm": 1.8050427436828613,
+      "learning_rate": 1.750736842105263e-05,
+      "loss": 1.501,
+      "step": 167
+    },
+    {
+      "epoch": 0.45161290322580644,
+      "grad_norm": 2.184504508972168,
+      "learning_rate": 1.6976842105263157e-05,
+      "loss": 1.7882,
+      "step": 168
+    },
+    {
+      "epoch": 0.4543010752688172,
+      "grad_norm": 1.4424117803573608,
+      "learning_rate": 1.6446315789473684e-05,
+      "loss": 0.6933,
+      "step": 169
+    },
+    {
+      "epoch": 0.45698924731182794,
+      "grad_norm": 2.0629289150238037,
+      "learning_rate": 1.591578947368421e-05,
+      "loss": 1.1983,
+      "step": 170
+    },
+    {
+      "epoch": 0.4596774193548387,
+      "grad_norm": 2.0509307384490967,
+      "learning_rate": 1.5385263157894736e-05,
+      "loss": 1.709,
+      "step": 171
+    },
+    {
+      "epoch": 0.46236559139784944,
+      "grad_norm": 1.423912525177002,
+      "learning_rate": 1.485473684210526e-05,
+      "loss": 0.6537,
+      "step": 172
+    },
+    {
+      "epoch": 0.4650537634408602,
+      "grad_norm": 2.0477561950683594,
+      "learning_rate": 1.4324210526315789e-05,
+      "loss": 0.791,
+      "step": 173
+    },
+    {
+      "epoch": 0.46774193548387094,
+      "grad_norm": 2.9739744663238525,
+      "learning_rate": 1.3793684210526316e-05,
+      "loss": 1.7767,
+      "step": 174
+    },
+    {
+      "epoch": 0.47043010752688175,
+      "grad_norm": 2.3965930938720703,
+      "learning_rate": 1.3263157894736841e-05,
+      "loss": 1.8458,
+      "step": 175
+    },
+    {
+      "epoch": 0.4731182795698925,
+      "grad_norm": 2.0706305503845215,
+      "learning_rate": 1.2732631578947369e-05,
+      "loss": 0.8644,
+      "step": 176
+    },
+    {
+      "epoch": 0.47580645161290325,
+      "grad_norm": 1.389144778251648,
+      "learning_rate": 1.2202105263157895e-05,
+      "loss": 0.5712,
+      "step": 177
+    },
+    {
+      "epoch": 0.478494623655914,
+      "grad_norm": 2.4105353355407715,
+      "learning_rate": 1.1671578947368421e-05,
+      "loss": 1.7319,
+      "step": 178
+    },
+    {
+      "epoch": 0.48118279569892475,
+      "grad_norm": 1.4054794311523438,
+      "learning_rate": 1.1141052631578946e-05,
+      "loss": 0.594,
+      "step": 179
+    },
+    {
+      "epoch": 0.4838709677419355,
+      "grad_norm": 1.2603883743286133,
+      "learning_rate": 1.0610526315789472e-05,
+      "loss": 0.521,
+      "step": 180
+    },
+    {
+      "epoch": 0.48655913978494625,
+      "grad_norm": 2.3499302864074707,
+      "learning_rate": 1.008e-05,
+      "loss": 1.2467,
+      "step": 181
+    },
+    {
+      "epoch": 0.489247311827957,
+      "grad_norm": 1.8965387344360352,
+      "learning_rate": 9.549473684210526e-06,
+      "loss": 1.0413,
+      "step": 182
+    },
+    {
+      "epoch": 0.49193548387096775,
+      "grad_norm": 1.9322240352630615,
+      "learning_rate": 9.018947368421052e-06,
+      "loss": 1.1569,
+      "step": 183
+    },
+    {
+      "epoch": 0.4946236559139785,
+      "grad_norm": 2.103156089782715,
+      "learning_rate": 8.488421052631579e-06,
+      "loss": 1.1426,
+      "step": 184
+    },
+    {
+      "epoch": 0.49731182795698925,
+      "grad_norm": 1.795997142791748,
+      "learning_rate": 7.957894736842105e-06,
+      "loss": 1.1144,
+      "step": 185
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 2.546483278274536,
+      "learning_rate": 7.42736842105263e-06,
+      "loss": 1.2147,
+      "step": 186
+    },
+    {
+      "epoch": 0.5026881720430108,
+      "grad_norm": 2.0869460105895996,
+      "learning_rate": 6.896842105263158e-06,
+      "loss": 1.4491,
+      "step": 187
+    },
+    {
+      "epoch": 0.5053763440860215,
+      "grad_norm": 2.946209192276001,
+      "learning_rate": 6.3663157894736845e-06,
+      "loss": 1.5029,
+      "step": 188
+    },
+    {
+      "epoch": 0.5080645161290323,
+      "grad_norm": 1.2845457792282104,
+      "learning_rate": 5.835789473684211e-06,
+      "loss": 0.5538,
+      "step": 189
+    },
+    {
+      "epoch": 0.510752688172043,
+      "grad_norm": 1.6108278036117554,
+      "learning_rate": 5.305263157894736e-06,
+      "loss": 0.5395,
+      "step": 190
+    },
+    {
+      "epoch": 0.5134408602150538,
+      "grad_norm": 2.4547786712646484,
+      "learning_rate": 4.774736842105263e-06,
+      "loss": 0.7314,
+      "step": 191
+    },
+    {
+      "epoch": 0.5161290322580645,
+      "grad_norm": 4.319262981414795,
+      "learning_rate": 4.244210526315789e-06,
+      "loss": 2.1648,
+      "step": 192
+    },
+    {
+      "epoch": 0.5188172043010753,
+      "grad_norm": 1.8788155317306519,
+      "learning_rate": 3.713684210526315e-06,
+      "loss": 0.9098,
+      "step": 193
+    },
+    {
+      "epoch": 0.521505376344086,
+      "grad_norm": 2.349867820739746,
+      "learning_rate": 3.1831578947368422e-06,
+      "loss": 1.2202,
+      "step": 194
+    },
+    {
+      "epoch": 0.5241935483870968,
+      "grad_norm": 2.8968505859375,
+      "learning_rate": 2.652631578947368e-06,
+      "loss": 1.8995,
+      "step": 195
+    },
+    {
+      "epoch": 0.5268817204301075,
+      "grad_norm": 4.167923927307129,
+      "learning_rate": 2.1221052631578947e-06,
+      "loss": 2.6088,
+      "step": 196
+    },
+    {
+      "epoch": 0.5295698924731183,
+      "grad_norm": 4.212091445922852,
+      "learning_rate": 1.5915789473684211e-06,
+      "loss": 1.8908,
+      "step": 197
+    },
+    {
+      "epoch": 0.532258064516129,
+      "grad_norm": 1.670013666152954,
+      "learning_rate": 1.0610526315789473e-06,
+      "loss": 0.529,
+      "step": 198
+    },
+    {
+      "epoch": 0.5349462365591398,
+      "grad_norm": 2.4547181129455566,
+      "learning_rate": 5.305263157894737e-07,
+      "loss": 0.7957,
+      "step": 199
+    },
+    {
+      "epoch": 0.5376344086021505,
+      "grad_norm": 2.100339651107788,
+      "learning_rate": 0.0,
+      "loss": 0.3624,
+      "step": 200
+    },
+    {
+      "epoch": 0.5376344086021505,
+      "eval_loss": 0.7936509251594543,
+      "eval_runtime": 20.0443,
+      "eval_samples_per_second": 7.833,
+      "eval_steps_per_second": 1.996,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.65254329581568e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null