Training in progress, step 100, checkpoint

Browse files

Files changed (12) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/rng_state_4.pth +1 -1
last-checkpoint/rng_state_5.pth +1 -1
last-checkpoint/rng_state_6.pth +1 -1
last-checkpoint/rng_state_7.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a8a49a091a872ef62704ea49f3a58664e3abf27c03c8c8b0d49836256b20c0bd
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:3dacb94f4c3db1f3f8846866302243300c9b7cfe6a8af70e0e73d5f187b91d90
 size 639691872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7be956b3d748691f14c008d0ef297a481a7325c76ce8fb7a7ffe144b5540dee7
 size 325339796

 version https://git-lfs.github.com/spec/v1
+oid sha256:5d7da7d08b84866b01f2d25f3bc946756b3796b16a8c75bd017c2f3a6cdd316c
 size 325339796

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0c8cbf9d903bf0816b05cc0952fee067cf09b9bdb153d5cc77076cf5f303247d
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:3356da821dae79a784a6750ab4501d0f5a7c8755fe80b2c59455f49121810a79
 size 15984

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:22437fcb03e9d37c80f2e168fe330e3500981c5fc3338e25bd25138708710d87
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:31c31bc5b4ce122946d5e6d935d6f0e7b59a5c3f8527767b37fab6599541eafa
 size 15984

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:86daa9f6f7d1a01135f22e35016a3da71ff4996b60a4a113309932034093c642
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:674f48eb96aac82ac7c27099dab2ce7b2ca02397e65e74ab34902206f61d1c00
 size 15984

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c2d0046f5e281489bd6ad0cd82d925d5c4ba8cbd8536559975badd7948bc271b
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:7081d2dbf902e430a2d98e5f49a4ebbd91afb951f26f658ae17a540d3adb3dd7
 size 15984

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2c560dfff7c273995faa3f7e48b302f3de4a66340fa1364e523e4d736d1f7fb1
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:f9cf0b7703139e63d7207fd09059c7c9b0cef01bdf844e8b8474e7b1fa1172b5
 size 15984

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1261a309c8ce47aa73685603c0e027addf62ad8dae6ec918dafd325149511546
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:23ef933c3efa4411ffa43b27049a7a8137012cb0c23240ff6ae1c68f27726849
 size 15984

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:74cd71a129165caf155be810d4ec61ee104d8c48ccd09dd50f9640f2f4ed1619
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:f6a5c29bf8829c9bb2e6b7bc9722dceaa322ce778a5bfff62135baa7cbf57e41
 size 15984

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:93317d3f1822a246edbaa1f880bff9fbd4f1d67942cc6634afd4d8835dd6786d
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:146109f2f784eb7fcee232dc81869b691fa0991c3c8b95abdd635b8ce609398a
 size 15984

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4b7e7d044a338e6b420016be2e48e692869df520a6f768a0f5ba8de63e9bb378
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f6b2f615f21faa4fde4442b48613d92f84c55cd5ef4fb4d04d8e3f819305ce14
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.6940747499465942,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.31347962382445144,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 95.481,
       "eval_steps_per_second": 3.021,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -401,7 +759,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.3985389026738176e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.6180227994918823,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.6269592476489029,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 95.481,
       "eval_steps_per_second": 3.021,
       "step": 50
+    },
+    {
+      "epoch": 0.31974921630094044,
+      "grad_norm": 0.11734400689601898,
+      "learning_rate": 7.920526315789474e-05,
+      "loss": 1.4185,
+      "step": 51
+    },
+    {
+      "epoch": 0.32601880877742945,
+      "grad_norm": 0.24825617671012878,
+      "learning_rate": 7.867368421052631e-05,
+      "loss": 1.6477,
+      "step": 52
+    },
+    {
+      "epoch": 0.3322884012539185,
+      "grad_norm": 0.2786071002483368,
+      "learning_rate": 7.814210526315789e-05,
+      "loss": 1.6508,
+      "step": 53
+    },
+    {
+      "epoch": 0.3385579937304075,
+      "grad_norm": 0.22523944079875946,
+      "learning_rate": 7.761052631578946e-05,
+      "loss": 1.7142,
+      "step": 54
+    },
+    {
+      "epoch": 0.3448275862068966,
+      "grad_norm": 0.20191609859466553,
+      "learning_rate": 7.707894736842105e-05,
+      "loss": 1.7316,
+      "step": 55
+    },
+    {
+      "epoch": 0.3510971786833856,
+      "grad_norm": 0.2791290283203125,
+      "learning_rate": 7.654736842105264e-05,
+      "loss": 1.754,
+      "step": 56
+    },
+    {
+      "epoch": 0.3573667711598746,
+      "grad_norm": 0.10366496443748474,
+      "learning_rate": 7.601578947368422e-05,
+      "loss": 1.2027,
+      "step": 57
+    },
+    {
+      "epoch": 0.36363636363636365,
+      "grad_norm": 0.15324437618255615,
+      "learning_rate": 7.548421052631579e-05,
+      "loss": 1.6548,
+      "step": 58
+    },
+    {
+      "epoch": 0.36990595611285265,
+      "grad_norm": 0.1608223021030426,
+      "learning_rate": 7.495263157894737e-05,
+      "loss": 1.6525,
+      "step": 59
+    },
+    {
+      "epoch": 0.3761755485893417,
+      "grad_norm": 0.20208290219306946,
+      "learning_rate": 7.442105263157894e-05,
+      "loss": 1.6865,
+      "step": 60
+    },
+    {
+      "epoch": 0.3824451410658307,
+      "grad_norm": 0.23002368211746216,
+      "learning_rate": 7.388947368421053e-05,
+      "loss": 1.7319,
+      "step": 61
+    },
+    {
+      "epoch": 0.3887147335423197,
+      "grad_norm": 0.25719794631004333,
+      "learning_rate": 7.335789473684211e-05,
+      "loss": 1.7345,
+      "step": 62
+    },
+    {
+      "epoch": 0.3949843260188088,
+      "grad_norm": 0.11874490976333618,
+      "learning_rate": 7.282631578947368e-05,
+      "loss": 1.1791,
+      "step": 63
+    },
+    {
+      "epoch": 0.4012539184952978,
+      "grad_norm": 0.13194750249385834,
+      "learning_rate": 7.229473684210527e-05,
+      "loss": 1.5006,
+      "step": 64
+    },
+    {
+      "epoch": 0.40752351097178685,
+      "grad_norm": 0.16368307173252106,
+      "learning_rate": 7.176315789473685e-05,
+      "loss": 1.653,
+      "step": 65
+    },
+    {
+      "epoch": 0.41379310344827586,
+      "grad_norm": 0.18413026630878448,
+      "learning_rate": 7.123157894736842e-05,
+      "loss": 1.6872,
+      "step": 66
+    },
+    {
+      "epoch": 0.4200626959247649,
+      "grad_norm": 0.23217302560806274,
+      "learning_rate": 7.07e-05,
+      "loss": 1.7738,
+      "step": 67
+    },
+    {
+      "epoch": 0.4263322884012539,
+      "grad_norm": 0.21129940450191498,
+      "learning_rate": 7.016842105263159e-05,
+      "loss": 1.6499,
+      "step": 68
+    },
+    {
+      "epoch": 0.43260188087774293,
+      "grad_norm": 0.14536724984645844,
+      "learning_rate": 6.963684210526316e-05,
+      "loss": 1.0403,
+      "step": 69
+    },
+    {
+      "epoch": 0.438871473354232,
+      "grad_norm": 0.12869617342948914,
+      "learning_rate": 6.910526315789474e-05,
+      "loss": 1.4696,
+      "step": 70
+    },
+    {
+      "epoch": 0.445141065830721,
+      "grad_norm": 0.1599850058555603,
+      "learning_rate": 6.857368421052631e-05,
+      "loss": 1.6434,
+      "step": 71
+    },
+    {
+      "epoch": 0.45141065830721006,
+      "grad_norm": 0.17733348906040192,
+      "learning_rate": 6.80421052631579e-05,
+      "loss": 1.6627,
+      "step": 72
+    },
+    {
+      "epoch": 0.45768025078369906,
+      "grad_norm": 0.1968277245759964,
+      "learning_rate": 6.751052631578948e-05,
+      "loss": 1.641,
+      "step": 73
+    },
+    {
+      "epoch": 0.46394984326018807,
+      "grad_norm": 0.2661347985267639,
+      "learning_rate": 6.697894736842105e-05,
+      "loss": 1.6584,
+      "step": 74
+    },
+    {
+      "epoch": 0.4702194357366771,
+      "grad_norm": 0.574530303478241,
+      "learning_rate": 6.644736842105264e-05,
+      "loss": 1.8574,
+      "step": 75
+    },
+    {
+      "epoch": 0.47648902821316613,
+      "grad_norm": 0.11632688343524933,
+      "learning_rate": 6.591578947368422e-05,
+      "loss": 1.4023,
+      "step": 76
+    },
+    {
+      "epoch": 0.4827586206896552,
+      "grad_norm": 0.2113131582736969,
+      "learning_rate": 6.538421052631579e-05,
+      "loss": 1.6341,
+      "step": 77
+    },
+    {
+      "epoch": 0.4890282131661442,
+      "grad_norm": 0.2565387189388275,
+      "learning_rate": 6.485263157894737e-05,
+      "loss": 1.6345,
+      "step": 78
+    },
+    {
+      "epoch": 0.4952978056426332,
+      "grad_norm": 0.2454068958759308,
+      "learning_rate": 6.432105263157894e-05,
+      "loss": 1.6885,
+      "step": 79
+    },
+    {
+      "epoch": 0.5015673981191222,
+      "grad_norm": 0.25979533791542053,
+      "learning_rate": 6.378947368421053e-05,
+      "loss": 1.7208,
+      "step": 80
+    },
+    {
+      "epoch": 0.5078369905956113,
+      "grad_norm": 0.30401724576950073,
+      "learning_rate": 6.32578947368421e-05,
+      "loss": 1.7026,
+      "step": 81
+    },
+    {
+      "epoch": 0.5141065830721003,
+      "grad_norm": 0.11303433030843735,
+      "learning_rate": 6.27263157894737e-05,
+      "loss": 1.1272,
+      "step": 82
+    },
+    {
+      "epoch": 0.5203761755485894,
+      "grad_norm": 0.21448008716106415,
+      "learning_rate": 6.219473684210527e-05,
+      "loss": 1.6056,
+      "step": 83
+    },
+    {
+      "epoch": 0.5266457680250783,
+      "grad_norm": 0.18290068209171295,
+      "learning_rate": 6.166315789473685e-05,
+      "loss": 1.6274,
+      "step": 84
+    },
+    {
+      "epoch": 0.5329153605015674,
+      "grad_norm": 0.21106426417827606,
+      "learning_rate": 6.113157894736842e-05,
+      "loss": 1.6398,
+      "step": 85
+    },
+    {
+      "epoch": 0.5391849529780565,
+      "grad_norm": 0.24094125628471375,
+      "learning_rate": 6.0599999999999996e-05,
+      "loss": 1.7705,
+      "step": 86
+    },
+    {
+      "epoch": 0.5454545454545454,
+      "grad_norm": 0.2784807085990906,
+      "learning_rate": 6.006842105263158e-05,
+      "loss": 1.6553,
+      "step": 87
+    },
+    {
+      "epoch": 0.5517241379310345,
+      "grad_norm": 0.135740265250206,
+      "learning_rate": 5.953684210526315e-05,
+      "loss": 1.0888,
+      "step": 88
+    },
+    {
+      "epoch": 0.5579937304075235,
+      "grad_norm": 0.13163161277770996,
+      "learning_rate": 5.900526315789474e-05,
+      "loss": 1.5075,
+      "step": 89
+    },
+    {
+      "epoch": 0.5642633228840125,
+      "grad_norm": 0.1819342076778412,
+      "learning_rate": 5.847368421052632e-05,
+      "loss": 1.6406,
+      "step": 90
+    },
+    {
+      "epoch": 0.5705329153605015,
+      "grad_norm": 0.20158928632736206,
+      "learning_rate": 5.79421052631579e-05,
+      "loss": 1.6991,
+      "step": 91
+    },
+    {
+      "epoch": 0.5768025078369906,
+      "grad_norm": 0.218618705868721,
+      "learning_rate": 5.7410526315789475e-05,
+      "loss": 1.6872,
+      "step": 92
+    },
+    {
+      "epoch": 0.5830721003134797,
+      "grad_norm": 0.2431667596101761,
+      "learning_rate": 5.687894736842105e-05,
+      "loss": 1.6567,
+      "step": 93
+    },
+    {
+      "epoch": 0.5893416927899686,
+      "grad_norm": 0.15539388358592987,
+      "learning_rate": 5.6347368421052625e-05,
+      "loss": 1.1379,
+      "step": 94
+    },
+    {
+      "epoch": 0.5956112852664577,
+      "grad_norm": 0.14918771386146545,
+      "learning_rate": 5.5815789473684214e-05,
+      "loss": 1.4123,
+      "step": 95
+    },
+    {
+      "epoch": 0.6018808777429467,
+      "grad_norm": 0.19607971608638763,
+      "learning_rate": 5.5284210526315796e-05,
+      "loss": 1.6222,
+      "step": 96
+    },
+    {
+      "epoch": 0.6081504702194357,
+      "grad_norm": 0.21519030630588531,
+      "learning_rate": 5.475263157894737e-05,
+      "loss": 1.6053,
+      "step": 97
+    },
+    {
+      "epoch": 0.6144200626959248,
+      "grad_norm": 0.2148403525352478,
+      "learning_rate": 5.422105263157895e-05,
+      "loss": 1.7118,
+      "step": 98
+    },
+    {
+      "epoch": 0.6206896551724138,
+      "grad_norm": 0.24617306888103485,
+      "learning_rate": 5.368947368421053e-05,
+      "loss": 1.6654,
+      "step": 99
+    },
+    {
+      "epoch": 0.6269592476489029,
+      "grad_norm": 0.5618337392807007,
+      "learning_rate": 5.3157894736842104e-05,
+      "loss": 1.7828,
+      "step": 100
+    },
+    {
+      "epoch": 0.6269592476489029,
+      "eval_loss": 1.6180227994918823,
+      "eval_runtime": 21.2838,
+      "eval_samples_per_second": 100.969,
+      "eval_steps_per_second": 3.195,
+      "step": 100
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.0800404085719695e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null