Training in progress, step 150, checkpoint

Browse files

Files changed (12) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/rng_state_4.pth +1 -1
last-checkpoint/rng_state_5.pth +1 -1
last-checkpoint/rng_state_6.pth +1 -1
last-checkpoint/rng_state_7.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3dacb94f4c3db1f3f8846866302243300c9b7cfe6a8af70e0e73d5f187b91d90
 size 639691872

 version https://git-lfs.github.com/spec/v1
+oid sha256:9ffef11321aeb40c36498a7b0243b13c9ad66a1ed4042e5d2cff5f22aa66c152
 size 639691872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5d7da7d08b84866b01f2d25f3bc946756b3796b16a8c75bd017c2f3a6cdd316c
 size 325339796

 version https://git-lfs.github.com/spec/v1
+oid sha256:026b4e4a3e304f00ef68b648ee773062cfc432c6ebbdb19dfa9f21385d9c478c
 size 325339796

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3356da821dae79a784a6750ab4501d0f5a7c8755fe80b2c59455f49121810a79
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:6fbbf97b3a5982769328e6c17bbd721d4db29f3a90af2964997b842ff0b7c627
 size 15984

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:31c31bc5b4ce122946d5e6d935d6f0e7b59a5c3f8527767b37fab6599541eafa
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:55dae8430b90d80adef1c03a4f3a616fcfecb52c15727db0573d3903a58c7433
 size 15984

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:674f48eb96aac82ac7c27099dab2ce7b2ca02397e65e74ab34902206f61d1c00
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:e2a53637177b916508a2faa86ffb825e8aec1da6f32f04559dcc4f209969d4ad
 size 15984

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7081d2dbf902e430a2d98e5f49a4ebbd91afb951f26f658ae17a540d3adb3dd7
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:7624a009fbd55b74eabae40d34cae21bdea5444998c9b54d71b3dce9371f4b09
 size 15984

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f9cf0b7703139e63d7207fd09059c7c9b0cef01bdf844e8b8474e7b1fa1172b5
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:cf945fd2a3d32a022238b4fa2cccd83f027cee749881f4a3cd330deb0fbba67d
 size 15984

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:23ef933c3efa4411ffa43b27049a7a8137012cb0c23240ff6ae1c68f27726849
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:b39cc4938391c9cb93ced3ff4ba63e53d75aec591735c4893f30bd34c1bf2cdb
 size 15984

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f6a5c29bf8829c9bb2e6b7bc9722dceaa322ce778a5bfff62135baa7cbf57e41
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:fca866cd199952053cad89fe8ee47fd054f317b68d9a842774a6ed014c7755b5
 size 15984

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:146109f2f784eb7fcee232dc81869b691fa0991c3c8b95abdd635b8ce609398a
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:a6006c96ab3ed3fd9a76884eb4947385722e38fb97c03a068a95364ce3f2f031
 size 15984

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f6b2f615f21faa4fde4442b48613d92f84c55cd5ef4fb4d04d8e3f819305ce14
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:7d606eeb1aa97b417de3c30d0a970be83ac979e2c7cc0fa41135c63d459909e5
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.6180227994918823,
-  "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.6269592476489029,
   "eval_steps": 50,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -731,6 +731,364 @@
       "eval_samples_per_second": 100.969,
       "eval_steps_per_second": 3.195,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -759,7 +1117,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.0800404085719695e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.5667701959609985,
+  "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 0.9404388714733543,
   "eval_steps": 50,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 100.969,
       "eval_steps_per_second": 3.195,
       "step": 100
+    },
+    {
+      "epoch": 0.6332288401253918,
+      "grad_norm": 0.12550345063209534,
+      "learning_rate": 5.262631578947368e-05,
+      "loss": 1.1544,
+      "step": 101
+    },
+    {
+      "epoch": 0.6394984326018809,
+      "grad_norm": 0.20995737612247467,
+      "learning_rate": 5.209473684210527e-05,
+      "loss": 1.5736,
+      "step": 102
+    },
+    {
+      "epoch": 0.64576802507837,
+      "grad_norm": 0.25661516189575195,
+      "learning_rate": 5.1563157894736844e-05,
+      "loss": 1.6052,
+      "step": 103
+    },
+    {
+      "epoch": 0.6520376175548589,
+      "grad_norm": 0.2354477196931839,
+      "learning_rate": 5.1031578947368426e-05,
+      "loss": 1.6452,
+      "step": 104
+    },
+    {
+      "epoch": 0.658307210031348,
+      "grad_norm": 0.2458198219537735,
+      "learning_rate": 5.05e-05,
+      "loss": 1.7015,
+      "step": 105
+    },
+    {
+      "epoch": 0.664576802507837,
+      "grad_norm": 0.33329248428344727,
+      "learning_rate": 4.9968421052631576e-05,
+      "loss": 1.5996,
+      "step": 106
+    },
+    {
+      "epoch": 0.670846394984326,
+      "grad_norm": 0.1337110549211502,
+      "learning_rate": 4.943684210526316e-05,
+      "loss": 1.3261,
+      "step": 107
+    },
+    {
+      "epoch": 0.677115987460815,
+      "grad_norm": 0.17378275096416473,
+      "learning_rate": 4.890526315789474e-05,
+      "loss": 1.5064,
+      "step": 108
+    },
+    {
+      "epoch": 0.6833855799373041,
+      "grad_norm": 0.20934435725212097,
+      "learning_rate": 4.8373684210526316e-05,
+      "loss": 1.6143,
+      "step": 109
+    },
+    {
+      "epoch": 0.6896551724137931,
+      "grad_norm": 0.24600297212600708,
+      "learning_rate": 4.784210526315789e-05,
+      "loss": 1.6473,
+      "step": 110
+    },
+    {
+      "epoch": 0.6959247648902821,
+      "grad_norm": 0.2352122813463211,
+      "learning_rate": 4.731052631578947e-05,
+      "loss": 1.59,
+      "step": 111
+    },
+    {
+      "epoch": 0.7021943573667712,
+      "grad_norm": 0.3226903975009918,
+      "learning_rate": 4.6778947368421055e-05,
+      "loss": 1.5846,
+      "step": 112
+    },
+    {
+      "epoch": 0.7084639498432602,
+      "grad_norm": 0.14687702059745789,
+      "learning_rate": 4.624736842105263e-05,
+      "loss": 1.1129,
+      "step": 113
+    },
+    {
+      "epoch": 0.7147335423197492,
+      "grad_norm": 0.15256668627262115,
+      "learning_rate": 4.571578947368421e-05,
+      "loss": 1.4691,
+      "step": 114
+    },
+    {
+      "epoch": 0.7210031347962382,
+      "grad_norm": 0.20408159494400024,
+      "learning_rate": 4.518421052631579e-05,
+      "loss": 1.6611,
+      "step": 115
+    },
+    {
+      "epoch": 0.7272727272727273,
+      "grad_norm": 0.22884103655815125,
+      "learning_rate": 4.465263157894737e-05,
+      "loss": 1.6256,
+      "step": 116
+    },
+    {
+      "epoch": 0.7335423197492164,
+      "grad_norm": 0.23959754407405853,
+      "learning_rate": 4.412105263157895e-05,
+      "loss": 1.6332,
+      "step": 117
+    },
+    {
+      "epoch": 0.7398119122257053,
+      "grad_norm": 0.2993139624595642,
+      "learning_rate": 4.358947368421053e-05,
+      "loss": 1.6246,
+      "step": 118
+    },
+    {
+      "epoch": 0.7460815047021944,
+      "grad_norm": 0.19114616513252258,
+      "learning_rate": 4.30578947368421e-05,
+      "loss": 1.0553,
+      "step": 119
+    },
+    {
+      "epoch": 0.7523510971786834,
+      "grad_norm": 0.1459835320711136,
+      "learning_rate": 4.2526315789473685e-05,
+      "loss": 1.432,
+      "step": 120
+    },
+    {
+      "epoch": 0.7586206896551724,
+      "grad_norm": 0.19810743629932404,
+      "learning_rate": 4.199473684210527e-05,
+      "loss": 1.5597,
+      "step": 121
+    },
+    {
+      "epoch": 0.7648902821316614,
+      "grad_norm": 0.22155524790287018,
+      "learning_rate": 4.146315789473684e-05,
+      "loss": 1.5973,
+      "step": 122
+    },
+    {
+      "epoch": 0.7711598746081505,
+      "grad_norm": 0.252210795879364,
+      "learning_rate": 4.093157894736842e-05,
+      "loss": 1.7093,
+      "step": 123
+    },
+    {
+      "epoch": 0.7774294670846394,
+      "grad_norm": 0.2699624300003052,
+      "learning_rate": 4.0400000000000006e-05,
+      "loss": 1.6218,
+      "step": 124
+    },
+    {
+      "epoch": 0.7836990595611285,
+      "grad_norm": 0.5296167731285095,
+      "learning_rate": 3.986842105263158e-05,
+      "loss": 1.6447,
+      "step": 125
+    },
+    {
+      "epoch": 0.7899686520376176,
+      "grad_norm": 0.1305689811706543,
+      "learning_rate": 3.933684210526316e-05,
+      "loss": 1.2415,
+      "step": 126
+    },
+    {
+      "epoch": 0.7962382445141066,
+      "grad_norm": 0.19951651990413666,
+      "learning_rate": 3.880526315789473e-05,
+      "loss": 1.5846,
+      "step": 127
+    },
+    {
+      "epoch": 0.8025078369905956,
+      "grad_norm": 0.24844390153884888,
+      "learning_rate": 3.827368421052632e-05,
+      "loss": 1.5013,
+      "step": 128
+    },
+    {
+      "epoch": 0.8087774294670846,
+      "grad_norm": 0.26770254969596863,
+      "learning_rate": 3.7742105263157896e-05,
+      "loss": 1.6404,
+      "step": 129
+    },
+    {
+      "epoch": 0.8150470219435737,
+      "grad_norm": 0.268388956785202,
+      "learning_rate": 3.721052631578947e-05,
+      "loss": 1.6831,
+      "step": 130
+    },
+    {
+      "epoch": 0.8213166144200627,
+      "grad_norm": 0.3877102732658386,
+      "learning_rate": 3.6678947368421054e-05,
+      "loss": 1.5713,
+      "step": 131
+    },
+    {
+      "epoch": 0.8275862068965517,
+      "grad_norm": 0.14182406663894653,
+      "learning_rate": 3.6147368421052636e-05,
+      "loss": 1.1147,
+      "step": 132
+    },
+    {
+      "epoch": 0.8338557993730408,
+      "grad_norm": 0.1862500160932541,
+      "learning_rate": 3.561578947368421e-05,
+      "loss": 1.4998,
+      "step": 133
+    },
+    {
+      "epoch": 0.8401253918495298,
+      "grad_norm": 0.22701646387577057,
+      "learning_rate": 3.508421052631579e-05,
+      "loss": 1.5137,
+      "step": 134
+    },
+    {
+      "epoch": 0.8463949843260188,
+      "grad_norm": 0.2560668885707855,
+      "learning_rate": 3.455263157894737e-05,
+      "loss": 1.6129,
+      "step": 135
+    },
+    {
+      "epoch": 0.8526645768025078,
+      "grad_norm": 0.2551365792751312,
+      "learning_rate": 3.402105263157895e-05,
+      "loss": 1.6454,
+      "step": 136
+    },
+    {
+      "epoch": 0.8589341692789969,
+      "grad_norm": 0.331463485956192,
+      "learning_rate": 3.3489473684210526e-05,
+      "loss": 1.5863,
+      "step": 137
+    },
+    {
+      "epoch": 0.8652037617554859,
+      "grad_norm": 0.17028988897800446,
+      "learning_rate": 3.295789473684211e-05,
+      "loss": 1.1377,
+      "step": 138
+    },
+    {
+      "epoch": 0.8714733542319749,
+      "grad_norm": 0.1787910908460617,
+      "learning_rate": 3.242631578947368e-05,
+      "loss": 1.4605,
+      "step": 139
+    },
+    {
+      "epoch": 0.877742946708464,
+      "grad_norm": 0.21535782516002655,
+      "learning_rate": 3.1894736842105265e-05,
+      "loss": 1.5651,
+      "step": 140
+    },
+    {
+      "epoch": 0.8840125391849529,
+      "grad_norm": 0.2516005337238312,
+      "learning_rate": 3.136315789473685e-05,
+      "loss": 1.6457,
+      "step": 141
+    },
+    {
+      "epoch": 0.890282131661442,
+      "grad_norm": 0.26242345571517944,
+      "learning_rate": 3.083157894736842e-05,
+      "loss": 1.6603,
+      "step": 142
+    },
+    {
+      "epoch": 0.896551724137931,
+      "grad_norm": 0.31754815578460693,
+      "learning_rate": 3.0299999999999998e-05,
+      "loss": 1.6406,
+      "step": 143
+    },
+    {
+      "epoch": 0.9028213166144201,
+      "grad_norm": 0.19499145448207855,
+      "learning_rate": 2.9768421052631577e-05,
+      "loss": 1.0787,
+      "step": 144
+    },
+    {
+      "epoch": 0.9090909090909091,
+      "grad_norm": 0.16461507976055145,
+      "learning_rate": 2.923684210526316e-05,
+      "loss": 1.3539,
+      "step": 145
+    },
+    {
+      "epoch": 0.9153605015673981,
+      "grad_norm": 0.231450155377388,
+      "learning_rate": 2.8705263157894737e-05,
+      "loss": 1.6046,
+      "step": 146
+    },
+    {
+      "epoch": 0.9216300940438872,
+      "grad_norm": 0.25709661841392517,
+      "learning_rate": 2.8173684210526313e-05,
+      "loss": 1.5514,
+      "step": 147
+    },
+    {
+      "epoch": 0.9278996865203761,
+      "grad_norm": 0.26337385177612305,
+      "learning_rate": 2.7642105263157898e-05,
+      "loss": 1.6737,
+      "step": 148
+    },
+    {
+      "epoch": 0.9341692789968652,
+      "grad_norm": 0.29333168268203735,
+      "learning_rate": 2.7110526315789473e-05,
+      "loss": 1.5403,
+      "step": 149
+    },
+    {
+      "epoch": 0.9404388714733543,
+      "grad_norm": 0.6478084325790405,
+      "learning_rate": 2.6578947368421052e-05,
+      "loss": 1.7032,
+      "step": 150
+    },
+    {
+      "epoch": 0.9404388714733543,
+      "eval_loss": 1.5667701959609985,
+      "eval_runtime": 22.4342,
+      "eval_samples_per_second": 95.791,
+      "eval_steps_per_second": 3.031,
+      "step": 150
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.6208921818772275e+18,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null