Training in progress, step 300, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:83bc9d32c440bb09b0e306b49ef66d95c54c199e5232e817229ddba46e25a087
 size 578859568

 version https://git-lfs.github.com/spec/v1
+oid sha256:6bbcf69b8eeb5cf9fd068e0ac17a52f2df71e8f11a6be6a22c8168f23eef03d9
 size 578859568

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:307bcb4f2a38b249970fda89b4d96744d49eb9460ddb7a85287c3abef80e335e
-size 294324372

 version https://git-lfs.github.com/spec/v1
+oid sha256:1d6eff029a189244991acb874c7a5a97397aaf3b50735559ebb4a955f4b98f47
+size 294324692

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1c82ab9f2071d51a262a67414c9e0763e55a3b2ac7fdbba4135fff650dc8fe49
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:25ebec897dc147aad62d4d9cba9ec057687aa9ed3e810cfa1436fa460b983424
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d1f3538c1e3a5459eb655b11eb8bcebff4cdf068b69eab5cfd18947d9810e7a
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:698b407b737eae28d312187b0cfb0990e1352176888e7a2fdb41197b9abbc630
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.7649185657501221,
-  "best_model_checkpoint": "miner_id_24/checkpoint-250",
-  "epoch": 0.006949222034593227,
   "eval_steps": 50,
-  "global_step": 250,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1805,6 +1805,364 @@
       "eval_samples_per_second": 11.517,
       "eval_steps_per_second": 5.769,
       "step": 250
     }
   ],
   "logging_steps": 1,
@@ -1833,7 +2191,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.612399276720128e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.7490311861038208,
+  "best_model_checkpoint": "miner_id_24/checkpoint-300",
+  "epoch": 0.008339066441511873,
   "eval_steps": 50,
+  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 11.517,
       "eval_steps_per_second": 5.769,
       "step": 250
+    },
+    {
+      "epoch": 0.0069770189227316,
+      "grad_norm": 0.14372354745864868,
+      "learning_rate": 0.0010256428432180956,
+      "loss": 1.0122,
+      "step": 251
+    },
+    {
+      "epoch": 0.007004815810869973,
+      "grad_norm": 0.11382071673870087,
+      "learning_rate": 0.0010192330547876872,
+      "loss": 0.8098,
+      "step": 252
+    },
+    {
+      "epoch": 0.007032612699008346,
+      "grad_norm": 0.10247013717889786,
+      "learning_rate": 0.0010128224757617274,
+      "loss": 0.9305,
+      "step": 253
+    },
+    {
+      "epoch": 0.0070604095871467185,
+      "grad_norm": 0.09587737172842026,
+      "learning_rate": 0.0010064113696540112,
+      "loss": 0.9049,
+      "step": 254
+    },
+    {
+      "epoch": 0.007088206475285092,
+      "grad_norm": 0.0981241911649704,
+      "learning_rate": 0.001,
+      "loss": 0.7943,
+      "step": 255
+    },
+    {
+      "epoch": 0.007116003363423465,
+      "grad_norm": 0.10372012108564377,
+      "learning_rate": 0.0009935886303459888,
+      "loss": 0.8079,
+      "step": 256
+    },
+    {
+      "epoch": 0.007143800251561838,
+      "grad_norm": 0.0910145714879036,
+      "learning_rate": 0.0009871775242382727,
+      "loss": 0.7809,
+      "step": 257
+    },
+    {
+      "epoch": 0.007171597139700211,
+      "grad_norm": 0.09489095211029053,
+      "learning_rate": 0.0009807669452123128,
+      "loss": 0.7955,
+      "step": 258
+    },
+    {
+      "epoch": 0.0071993940278385835,
+      "grad_norm": 0.09354311227798462,
+      "learning_rate": 0.0009743571567819046,
+      "loss": 0.8035,
+      "step": 259
+    },
+    {
+      "epoch": 0.007227190915976956,
+      "grad_norm": 0.09070082753896713,
+      "learning_rate": 0.0009679484224283449,
+      "loss": 0.7908,
+      "step": 260
+    },
+    {
+      "epoch": 0.007254987804115329,
+      "grad_norm": 0.1037682518362999,
+      "learning_rate": 0.0009615410055896016,
+      "loss": 0.7739,
+      "step": 261
+    },
+    {
+      "epoch": 0.007282784692253702,
+      "grad_norm": 0.09658119082450867,
+      "learning_rate": 0.0009551351696494854,
+      "loss": 0.8614,
+      "step": 262
+    },
+    {
+      "epoch": 0.007310581580392075,
+      "grad_norm": 0.10361933708190918,
+      "learning_rate": 0.0009487311779268209,
+      "loss": 0.9021,
+      "step": 263
+    },
+    {
+      "epoch": 0.007338378468530448,
+      "grad_norm": 0.08979956805706024,
+      "learning_rate": 0.0009423292936646257,
+      "loss": 0.7559,
+      "step": 264
+    },
+    {
+      "epoch": 0.007366175356668821,
+      "grad_norm": 0.09771803766489029,
+      "learning_rate": 0.0009359297800192872,
+      "loss": 0.7717,
+      "step": 265
+    },
+    {
+      "epoch": 0.007393972244807194,
+      "grad_norm": 0.10628974437713623,
+      "learning_rate": 0.0009295329000497459,
+      "loss": 0.8083,
+      "step": 266
+    },
+    {
+      "epoch": 0.007421769132945567,
+      "grad_norm": 0.10804276913404465,
+      "learning_rate": 0.0009231389167066836,
+      "loss": 0.8077,
+      "step": 267
+    },
+    {
+      "epoch": 0.00744956602108394,
+      "grad_norm": 0.13234536349773407,
+      "learning_rate": 0.0009167480928217108,
+      "loss": 0.8623,
+      "step": 268
+    },
+    {
+      "epoch": 0.0074773629092223124,
+      "grad_norm": 0.10681891441345215,
+      "learning_rate": 0.0009103606910965666,
+      "loss": 0.7624,
+      "step": 269
+    },
+    {
+      "epoch": 0.007505159797360685,
+      "grad_norm": 0.11654768884181976,
+      "learning_rate": 0.0009039769740923182,
+      "loss": 0.8082,
+      "step": 270
+    },
+    {
+      "epoch": 0.007532956685499058,
+      "grad_norm": 0.1005023717880249,
+      "learning_rate": 0.0008975972042185687,
+      "loss": 0.7448,
+      "step": 271
+    },
+    {
+      "epoch": 0.007560753573637431,
+      "grad_norm": 0.10492710024118423,
+      "learning_rate": 0.0008912216437226692,
+      "loss": 0.8734,
+      "step": 272
+    },
+    {
+      "epoch": 0.0075885504617758046,
+      "grad_norm": 0.10636113584041595,
+      "learning_rate": 0.0008848505546789408,
+      "loss": 0.7397,
+      "step": 273
+    },
+    {
+      "epoch": 0.007616347349914177,
+      "grad_norm": 0.0918545350432396,
+      "learning_rate": 0.0008784841989778997,
+      "loss": 0.6861,
+      "step": 274
+    },
+    {
+      "epoch": 0.00764414423805255,
+      "grad_norm": 0.10185957700014114,
+      "learning_rate": 0.0008721228383154939,
+      "loss": 0.7906,
+      "step": 275
+    },
+    {
+      "epoch": 0.007671941126190923,
+      "grad_norm": 0.09865249693393707,
+      "learning_rate": 0.0008657667341823448,
+      "loss": 0.6877,
+      "step": 276
+    },
+    {
+      "epoch": 0.007699738014329296,
+      "grad_norm": 0.10265160351991653,
+      "learning_rate": 0.0008594161478529974,
+      "loss": 0.7361,
+      "step": 277
+    },
+    {
+      "epoch": 0.007727534902467669,
+      "grad_norm": 0.10760695487260818,
+      "learning_rate": 0.0008530713403751821,
+      "loss": 0.6973,
+      "step": 278
+    },
+    {
+      "epoch": 0.007755331790606041,
+      "grad_norm": 0.1106325089931488,
+      "learning_rate": 0.000846732572559084,
+      "loss": 0.7736,
+      "step": 279
+    },
+    {
+      "epoch": 0.007783128678744414,
+      "grad_norm": 0.11260738223791122,
+      "learning_rate": 0.000840400104966621,
+      "loss": 0.7567,
+      "step": 280
+    },
+    {
+      "epoch": 0.007810925566882787,
+      "grad_norm": 0.10231205821037292,
+      "learning_rate": 0.0008340741979007324,
+      "loss": 0.7018,
+      "step": 281
+    },
+    {
+      "epoch": 0.00783872245502116,
+      "grad_norm": 0.10939161479473114,
+      "learning_rate": 0.0008277551113946811,
+      "loss": 0.5913,
+      "step": 282
+    },
+    {
+      "epoch": 0.007866519343159533,
+      "grad_norm": 0.10841213166713715,
+      "learning_rate": 0.0008214431052013634,
+      "loss": 0.7421,
+      "step": 283
+    },
+    {
+      "epoch": 0.007894316231297906,
+      "grad_norm": 0.11439742892980576,
+      "learning_rate": 0.0008151384387826313,
+      "loss": 0.7312,
+      "step": 284
+    },
+    {
+      "epoch": 0.007922113119436278,
+      "grad_norm": 0.11929941177368164,
+      "learning_rate": 0.0008088413712986279,
+      "loss": 0.7529,
+      "step": 285
+    },
+    {
+      "epoch": 0.007949910007574652,
+      "grad_norm": 0.11869197338819504,
+      "learning_rate": 0.0008025521615971329,
+      "loss": 0.6848,
+      "step": 286
+    },
+    {
+      "epoch": 0.007977706895713026,
+      "grad_norm": 0.10041101276874542,
+      "learning_rate": 0.0007962710682029245,
+      "loss": 0.5243,
+      "step": 287
+    },
+    {
+      "epoch": 0.008005503783851398,
+      "grad_norm": 0.11425133794546127,
+      "learning_rate": 0.0007899983493071507,
+      "loss": 0.6441,
+      "step": 288
+    },
+    {
+      "epoch": 0.008033300671989771,
+      "grad_norm": 0.10552135854959488,
+      "learning_rate": 0.0007837342627567166,
+      "loss": 0.5675,
+      "step": 289
+    },
+    {
+      "epoch": 0.008061097560128143,
+      "grad_norm": 0.12019749730825424,
+      "learning_rate": 0.0007774790660436857,
+      "loss": 0.7536,
+      "step": 290
+    },
+    {
+      "epoch": 0.008088894448266517,
+      "grad_norm": 0.11838424205780029,
+      "learning_rate": 0.0007712330162946947,
+      "loss": 0.6558,
+      "step": 291
+    },
+    {
+      "epoch": 0.008116691336404889,
+      "grad_norm": 0.11879291385412216,
+      "learning_rate": 0.0007649963702603848,
+      "loss": 0.6071,
+      "step": 292
+    },
+    {
+      "epoch": 0.008144488224543263,
+      "grad_norm": 0.1303434818983078,
+      "learning_rate": 0.0007587693843048475,
+      "loss": 0.5558,
+      "step": 293
+    },
+    {
+      "epoch": 0.008172285112681634,
+      "grad_norm": 0.1721266806125641,
+      "learning_rate": 0.0007525523143950859,
+      "loss": 0.6311,
+      "step": 294
+    },
+    {
+      "epoch": 0.008200082000820008,
+      "grad_norm": 0.13676951825618744,
+      "learning_rate": 0.0007463454160904928,
+      "loss": 0.5886,
+      "step": 295
+    },
+    {
+      "epoch": 0.008227878888958382,
+      "grad_norm": 0.16192977130413055,
+      "learning_rate": 0.0007401489445323472,
+      "loss": 0.746,
+      "step": 296
+    },
+    {
+      "epoch": 0.008255675777096754,
+      "grad_norm": 0.12178271263837814,
+      "learning_rate": 0.000733963154433325,
+      "loss": 0.544,
+      "step": 297
+    },
+    {
+      "epoch": 0.008283472665235127,
+      "grad_norm": 0.1465931534767151,
+      "learning_rate": 0.0007277883000670289,
+      "loss": 0.4949,
+      "step": 298
+    },
+    {
+      "epoch": 0.0083112695533735,
+      "grad_norm": 0.15438374876976013,
+      "learning_rate": 0.0007216246352575369,
+      "loss": 0.6408,
+      "step": 299
+    },
+    {
+      "epoch": 0.008339066441511873,
+      "grad_norm": 0.2604023814201355,
+      "learning_rate": 0.0007154724133689676,
+      "loss": 0.7431,
+      "step": 300
+    },
+    {
+      "epoch": 0.008339066441511873,
+      "eval_loss": 0.7490311861038208,
+      "eval_runtime": 50.0501,
+      "eval_samples_per_second": 11.528,
+      "eval_steps_per_second": 5.774,
+      "step": 300
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.9262509085097984e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null