Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4ce8d910db8726d31d85496ee25671bd423f80ca1887b224fa9c0ba36dc40049
 size 201892112

 version https://git-lfs.github.com/spec/v1
+oid sha256:68a258ec929d9218770e1f86fd3f1de785918e5b52a2e2c30529f74998eaa2aa
 size 201892112

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8e550d99da701cdcb6e6a333be295ff8f2d8c8e3642f1b88205d5396d20e49e0
 size 102864548

 version https://git-lfs.github.com/spec/v1
+oid sha256:a2e1ef4ae464844442d88115251951165c07376033477b3523521d128587207a
 size 102864548

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f06d6a4c9bed84c828edb6fda8551d33e80fe0e20455e2ae0cde3a44b0063947
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:212dcba99a7b1ae77770a31ef6779f7d223c41005e87ffc275e7c8bccb2d4cfd
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4213e97511ad1675d9fb39d8ad82aecd284027b2573bbfd2023f4cf1b821b35c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:87acae65691e01ffbaf4c13ce5265904843d2731d4c47c0eed57fce5257b0710
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.8939120173454285,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.7782101167315175,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 41.512,
       "eval_steps_per_second": 10.378,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -401,7 +759,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.3012201900081152e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.8714919686317444,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 1.556420233463035,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 41.512,
       "eval_steps_per_second": 10.378,
       "step": 50
+    },
+    {
+      "epoch": 0.7937743190661478,
+      "grad_norm": 0.3049919009208679,
+      "learning_rate": 8.811768194241952e-05,
+      "loss": 0.8823,
+      "step": 51
+    },
+    {
+      "epoch": 0.8093385214007782,
+      "grad_norm": 0.3248423635959625,
+      "learning_rate": 8.755659654352599e-05,
+      "loss": 0.9283,
+      "step": 52
+    },
+    {
+      "epoch": 0.8249027237354085,
+      "grad_norm": 0.3247882127761841,
+      "learning_rate": 8.698444304324835e-05,
+      "loss": 0.9166,
+      "step": 53
+    },
+    {
+      "epoch": 0.8404669260700389,
+      "grad_norm": 0.30053266882896423,
+      "learning_rate": 8.640139005784924e-05,
+      "loss": 0.858,
+      "step": 54
+    },
+    {
+      "epoch": 0.8560311284046692,
+      "grad_norm": 0.33963730931282043,
+      "learning_rate": 8.580760941571967e-05,
+      "loss": 0.8852,
+      "step": 55
+    },
+    {
+      "epoch": 0.8715953307392996,
+      "grad_norm": 0.31807518005371094,
+      "learning_rate": 8.520327610674029e-05,
+      "loss": 0.9407,
+      "step": 56
+    },
+    {
+      "epoch": 0.8871595330739299,
+      "grad_norm": 0.3238030970096588,
+      "learning_rate": 8.458856823071111e-05,
+      "loss": 0.8734,
+      "step": 57
+    },
+    {
+      "epoch": 0.9027237354085603,
+      "grad_norm": 0.3245297372341156,
+      "learning_rate": 8.396366694486466e-05,
+      "loss": 0.8825,
+      "step": 58
+    },
+    {
+      "epoch": 0.9182879377431906,
+      "grad_norm": 0.3227308988571167,
+      "learning_rate": 8.332875641047817e-05,
+      "loss": 0.8962,
+      "step": 59
+    },
+    {
+      "epoch": 0.933852140077821,
+      "grad_norm": 0.3529612123966217,
+      "learning_rate": 8.26840237386003e-05,
+      "loss": 0.9568,
+      "step": 60
+    },
+    {
+      "epoch": 0.9494163424124513,
+      "grad_norm": 0.36405235528945923,
+      "learning_rate": 8.202965893490878e-05,
+      "loss": 0.897,
+      "step": 61
+    },
+    {
+      "epoch": 0.9649805447470817,
+      "grad_norm": 0.3392234444618225,
+      "learning_rate": 8.13658548437147e-05,
+      "loss": 0.8032,
+      "step": 62
+    },
+    {
+      "epoch": 0.980544747081712,
+      "grad_norm": 0.3424031436443329,
+      "learning_rate": 8.06928070911306e-05,
+      "loss": 0.8227,
+      "step": 63
+    },
+    {
+      "epoch": 0.9961089494163424,
+      "grad_norm": 0.38254794478416443,
+      "learning_rate": 8.001071402741842e-05,
+      "loss": 0.9149,
+      "step": 64
+    },
+    {
+      "epoch": 1.0116731517509727,
+      "grad_norm": 0.9797287583351135,
+      "learning_rate": 7.931977666853479e-05,
+      "loss": 1.5691,
+      "step": 65
+    },
+    {
+      "epoch": 1.027237354085603,
+      "grad_norm": 0.2772282361984253,
+      "learning_rate": 7.862019863689074e-05,
+      "loss": 0.8003,
+      "step": 66
+    },
+    {
+      "epoch": 1.0428015564202335,
+      "grad_norm": 0.285792738199234,
+      "learning_rate": 7.791218610134323e-05,
+      "loss": 0.8685,
+      "step": 67
+    },
+    {
+      "epoch": 1.0583657587548638,
+      "grad_norm": 0.32011640071868896,
+      "learning_rate": 7.719594771643623e-05,
+      "loss": 0.9241,
+      "step": 68
+    },
+    {
+      "epoch": 1.0739299610894941,
+      "grad_norm": 0.30448997020721436,
+      "learning_rate": 7.647169456090925e-05,
+      "loss": 0.8244,
+      "step": 69
+    },
+    {
+      "epoch": 1.0894941634241244,
+      "grad_norm": 0.3131871819496155,
+      "learning_rate": 7.573964007549155e-05,
+      "loss": 0.8418,
+      "step": 70
+    },
+    {
+      "epoch": 1.105058365758755,
+      "grad_norm": 0.3209502398967743,
+      "learning_rate": 7.500000000000001e-05,
+      "loss": 0.7923,
+      "step": 71
+    },
+    {
+      "epoch": 1.1206225680933852,
+      "grad_norm": 0.3268880844116211,
+      "learning_rate": 7.425299230975981e-05,
+      "loss": 0.8179,
+      "step": 72
+    },
+    {
+      "epoch": 1.1361867704280155,
+      "grad_norm": 0.3202899396419525,
+      "learning_rate": 7.3498837151366e-05,
+      "loss": 0.8361,
+      "step": 73
+    },
+    {
+      "epoch": 1.1517509727626458,
+      "grad_norm": 0.33722591400146484,
+      "learning_rate": 7.27377567778053e-05,
+      "loss": 0.8352,
+      "step": 74
+    },
+    {
+      "epoch": 1.1673151750972763,
+      "grad_norm": 0.3361928164958954,
+      "learning_rate": 7.196997548295708e-05,
+      "loss": 0.8771,
+      "step": 75
+    },
+    {
+      "epoch": 1.1828793774319066,
+      "grad_norm": 0.3383100628852844,
+      "learning_rate": 7.119571953549305e-05,
+      "loss": 0.8353,
+      "step": 76
+    },
+    {
+      "epoch": 1.198443579766537,
+      "grad_norm": 0.342599481344223,
+      "learning_rate": 7.041521711219467e-05,
+      "loss": 0.7851,
+      "step": 77
+    },
+    {
+      "epoch": 1.2140077821011672,
+      "grad_norm": 0.3692112863063812,
+      "learning_rate": 6.96286982307086e-05,
+      "loss": 0.7754,
+      "step": 78
+    },
+    {
+      "epoch": 1.2295719844357977,
+      "grad_norm": 0.3618566691875458,
+      "learning_rate": 6.883639468175927e-05,
+      "loss": 0.7592,
+      "step": 79
+    },
+    {
+      "epoch": 1.245136186770428,
+      "grad_norm": 0.3885677456855774,
+      "learning_rate": 6.803853996083917e-05,
+      "loss": 0.8669,
+      "step": 80
+    },
+    {
+      "epoch": 1.2607003891050583,
+      "grad_norm": 0.3042435646057129,
+      "learning_rate": 6.72353691993967e-05,
+      "loss": 0.8014,
+      "step": 81
+    },
+    {
+      "epoch": 1.2762645914396886,
+      "grad_norm": 0.2926236093044281,
+      "learning_rate": 6.642711909554174e-05,
+      "loss": 0.757,
+      "step": 82
+    },
+    {
+      "epoch": 1.2918287937743191,
+      "grad_norm": 0.315763384103775,
+      "learning_rate": 6.561402784428974e-05,
+      "loss": 0.8411,
+      "step": 83
+    },
+    {
+      "epoch": 1.3073929961089494,
+      "grad_norm": 0.3296045958995819,
+      "learning_rate": 6.479633506736446e-05,
+      "loss": 0.8441,
+      "step": 84
+    },
+    {
+      "epoch": 1.3229571984435797,
+      "grad_norm": 0.3326404392719269,
+      "learning_rate": 6.397428174258047e-05,
+      "loss": 0.8187,
+      "step": 85
+    },
+    {
+      "epoch": 1.3385214007782102,
+      "grad_norm": 0.32586634159088135,
+      "learning_rate": 6.314811013282574e-05,
+      "loss": 0.8349,
+      "step": 86
+    },
+    {
+      "epoch": 1.3540856031128405,
+      "grad_norm": 0.33808082342147827,
+      "learning_rate": 6.231806371466574e-05,
+      "loss": 0.848,
+      "step": 87
+    },
+    {
+      "epoch": 1.3696498054474708,
+      "grad_norm": 0.3542570173740387,
+      "learning_rate": 6.14843871065898e-05,
+      "loss": 0.8243,
+      "step": 88
+    },
+    {
+      "epoch": 1.3852140077821011,
+      "grad_norm": 0.356636643409729,
+      "learning_rate": 6.064732599692079e-05,
+      "loss": 0.8619,
+      "step": 89
+    },
+    {
+      "epoch": 1.4007782101167314,
+      "grad_norm": 0.3576579689979553,
+      "learning_rate": 5.980712707140985e-05,
+      "loss": 0.7986,
+      "step": 90
+    },
+    {
+      "epoch": 1.416342412451362,
+      "grad_norm": 0.36819812655448914,
+      "learning_rate": 5.896403794053679e-05,
+      "loss": 0.8189,
+      "step": 91
+    },
+    {
+      "epoch": 1.4319066147859922,
+      "grad_norm": 0.3678414821624756,
+      "learning_rate": 5.8118307066538193e-05,
+      "loss": 0.7622,
+      "step": 92
+    },
+    {
+      "epoch": 1.4474708171206225,
+      "grad_norm": 0.38239309191703796,
+      "learning_rate": 5.7270183690184495e-05,
+      "loss": 0.8324,
+      "step": 93
+    },
+    {
+      "epoch": 1.463035019455253,
+      "grad_norm": 0.36592909693717957,
+      "learning_rate": 5.6419917757327555e-05,
+      "loss": 0.7254,
+      "step": 94
+    },
+    {
+      "epoch": 1.4785992217898833,
+      "grad_norm": 0.3889016807079315,
+      "learning_rate": 5.5567759845240444e-05,
+      "loss": 0.7894,
+      "step": 95
+    },
+    {
+      "epoch": 1.4941634241245136,
+      "grad_norm": 0.4369194507598877,
+      "learning_rate": 5.4713961088771226e-05,
+      "loss": 0.8497,
+      "step": 96
+    },
+    {
+      "epoch": 1.509727626459144,
+      "grad_norm": 0.3298012614250183,
+      "learning_rate": 5.385877310633233e-05,
+      "loss": 0.7555,
+      "step": 97
+    },
+    {
+      "epoch": 1.5252918287937742,
+      "grad_norm": 0.3367776572704315,
+      "learning_rate": 5.300244792574742e-05,
+      "loss": 0.8044,
+      "step": 98
+    },
+    {
+      "epoch": 1.5408560311284045,
+      "grad_norm": 0.3444443941116333,
+      "learning_rate": 5.214523790997773e-05,
+      "loss": 0.8537,
+      "step": 99
+    },
+    {
+      "epoch": 1.556420233463035,
+      "grad_norm": 0.33900946378707886,
+      "learning_rate": 5.128739568274944e-05,
+      "loss": 0.7601,
+      "step": 100
+    },
+    {
+      "epoch": 1.556420233463035,
+      "eval_loss": 0.8714919686317444,
+      "eval_runtime": 2.6058,
+      "eval_samples_per_second": 41.446,
+      "eval_steps_per_second": 10.362,
+      "step": 100
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 2.581108901491507e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null