Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:58d0f964826eb2834161e2b52ced263c1cc933066f4b17e5727ee9143623bc3a
 size 191968

 version https://git-lfs.github.com/spec/v1
+oid sha256:67abb41e56e78a66688842b966abefe2ae07faf077979342ac8168501b27748f
 size 191968

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aca57125df711506589dfb428f3d2a85fc6d3fe9f0cdb9c4bf415e2b65261016
 size 253144

 version https://git-lfs.github.com/spec/v1
+oid sha256:34c2ef1b33596a23cca0f72b37e834875988cc57936319c329e4850667c3bdd0
 size 253144

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:36d302e97ce3d6d13c6d12c84c4cf16a520c1f7a1aada20bb31f08429d9c29e8
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:16001d5f1e5442ee9e29a6310ea7c7892e9fb70d07faaccf84234f112d26d143
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ed340328d62f63f2d9d00f6904395875dd09851f050e03ae91b4b798d852ce41
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:99ca67f9a35cd1c50df3d93bedf5a6642db2c7847c021a0a8d3f44c1bf4993d3
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 11.74016284942627,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.00821658929378415,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 210.911,
       "eval_steps_per_second": 52.743,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -401,7 +759,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 20717292748800.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 11.734024047851562,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.0164331785875683,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 210.911,
       "eval_steps_per_second": 52.743,
       "step": 50
+    },
+    {
+      "epoch": 0.008380921079659834,
+      "grad_norm": 0.10825006663799286,
+      "learning_rate": 9.729774092143627e-05,
+      "loss": 11.7462,
+      "step": 51
+    },
+    {
+      "epoch": 0.008545252865535516,
+      "grad_norm": 0.09035345911979675,
+      "learning_rate": 9.716559066288715e-05,
+      "loss": 11.7452,
+      "step": 52
+    },
+    {
+      "epoch": 0.008709584651411199,
+      "grad_norm": 0.109395332634449,
+      "learning_rate": 9.703037989675087e-05,
+      "loss": 11.7461,
+      "step": 53
+    },
+    {
+      "epoch": 0.008873916437286883,
+      "grad_norm": 0.09128301590681076,
+      "learning_rate": 9.689211739666023e-05,
+      "loss": 11.7422,
+      "step": 54
+    },
+    {
+      "epoch": 0.009038248223162565,
+      "grad_norm": 0.09604194760322571,
+      "learning_rate": 9.675081213427076e-05,
+      "loss": 11.741,
+      "step": 55
+    },
+    {
+      "epoch": 0.009202580009038248,
+      "grad_norm": 0.09056773781776428,
+      "learning_rate": 9.66064732786784e-05,
+      "loss": 11.7421,
+      "step": 56
+    },
+    {
+      "epoch": 0.009366911794913932,
+      "grad_norm": 0.08328656107187271,
+      "learning_rate": 9.645911019582467e-05,
+      "loss": 11.7412,
+      "step": 57
+    },
+    {
+      "epoch": 0.009531243580789614,
+      "grad_norm": 0.09039179235696793,
+      "learning_rate": 9.630873244788883e-05,
+      "loss": 11.7383,
+      "step": 58
+    },
+    {
+      "epoch": 0.009695575366665297,
+      "grad_norm": 0.07337184250354767,
+      "learning_rate": 9.615534979266745e-05,
+      "loss": 11.7419,
+      "step": 59
+    },
+    {
+      "epoch": 0.009859907152540981,
+      "grad_norm": 0.07708930224180222,
+      "learning_rate": 9.599897218294122e-05,
+      "loss": 11.7387,
+      "step": 60
+    },
+    {
+      "epoch": 0.010024238938416663,
+      "grad_norm": 0.08723892271518707,
+      "learning_rate": 9.583960976582913e-05,
+      "loss": 11.7393,
+      "step": 61
+    },
+    {
+      "epoch": 0.010188570724292346,
+      "grad_norm": 0.08119846135377884,
+      "learning_rate": 9.567727288213005e-05,
+      "loss": 11.7412,
+      "step": 62
+    },
+    {
+      "epoch": 0.01035290251016803,
+      "grad_norm": 0.0847805067896843,
+      "learning_rate": 9.551197206565173e-05,
+      "loss": 11.743,
+      "step": 63
+    },
+    {
+      "epoch": 0.010517234296043712,
+      "grad_norm": 0.07967144250869751,
+      "learning_rate": 9.534371804252728e-05,
+      "loss": 11.7405,
+      "step": 64
+    },
+    {
+      "epoch": 0.010681566081919395,
+      "grad_norm": 0.046992260962724686,
+      "learning_rate": 9.517252173051911e-05,
+      "loss": 11.7404,
+      "step": 65
+    },
+    {
+      "epoch": 0.010845897867795079,
+      "grad_norm": 0.054778508841991425,
+      "learning_rate": 9.49983942383106e-05,
+      "loss": 11.7398,
+      "step": 66
+    },
+    {
+      "epoch": 0.011010229653670761,
+      "grad_norm": 0.05465163663029671,
+      "learning_rate": 9.482134686478519e-05,
+      "loss": 11.7414,
+      "step": 67
+    },
+    {
+      "epoch": 0.011174561439546444,
+      "grad_norm": 0.06919052451848984,
+      "learning_rate": 9.464139109829321e-05,
+      "loss": 11.7432,
+      "step": 68
+    },
+    {
+      "epoch": 0.011338893225422128,
+      "grad_norm": 0.04978770390152931,
+      "learning_rate": 9.445853861590647e-05,
+      "loss": 11.7406,
+      "step": 69
+    },
+    {
+      "epoch": 0.01150322501129781,
+      "grad_norm": 0.05154619738459587,
+      "learning_rate": 9.42728012826605e-05,
+      "loss": 11.7389,
+      "step": 70
+    },
+    {
+      "epoch": 0.011667556797173493,
+      "grad_norm": 0.058466438204050064,
+      "learning_rate": 9.408419115078471e-05,
+      "loss": 11.7376,
+      "step": 71
+    },
+    {
+      "epoch": 0.011831888583049177,
+      "grad_norm": 0.07136223465204239,
+      "learning_rate": 9.389272045892024e-05,
+      "loss": 11.738,
+      "step": 72
+    },
+    {
+      "epoch": 0.01199622036892486,
+      "grad_norm": 0.0563649944961071,
+      "learning_rate": 9.36984016313259e-05,
+      "loss": 11.738,
+      "step": 73
+    },
+    {
+      "epoch": 0.012160552154800542,
+      "grad_norm": 0.062396273016929626,
+      "learning_rate": 9.350124727707197e-05,
+      "loss": 11.7354,
+      "step": 74
+    },
+    {
+      "epoch": 0.012324883940676224,
+      "grad_norm": 0.05039061978459358,
+      "learning_rate": 9.330127018922194e-05,
+      "loss": 11.7388,
+      "step": 75
+    },
+    {
+      "epoch": 0.012489215726551909,
+      "grad_norm": 0.058604415506124496,
+      "learning_rate": 9.309848334400246e-05,
+      "loss": 11.7386,
+      "step": 76
+    },
+    {
+      "epoch": 0.012653547512427591,
+      "grad_norm": 0.08999053388834,
+      "learning_rate": 9.289289989996133e-05,
+      "loss": 11.7338,
+      "step": 77
+    },
+    {
+      "epoch": 0.012817879298303273,
+      "grad_norm": 0.05026502162218094,
+      "learning_rate": 9.268453319711363e-05,
+      "loss": 11.7371,
+      "step": 78
+    },
+    {
+      "epoch": 0.012982211084178958,
+      "grad_norm": 0.06558049470186234,
+      "learning_rate": 9.247339675607605e-05,
+      "loss": 11.7358,
+      "step": 79
+    },
+    {
+      "epoch": 0.01314654287005464,
+      "grad_norm": 0.06828798353672028,
+      "learning_rate": 9.225950427718975e-05,
+      "loss": 11.7333,
+      "step": 80
+    },
+    {
+      "epoch": 0.013310874655930323,
+      "grad_norm": 0.05039716139435768,
+      "learning_rate": 9.204286963963111e-05,
+      "loss": 11.7367,
+      "step": 81
+    },
+    {
+      "epoch": 0.013475206441806007,
+      "grad_norm": 0.09033340215682983,
+      "learning_rate": 9.182350690051133e-05,
+      "loss": 11.737,
+      "step": 82
+    },
+    {
+      "epoch": 0.01363953822768169,
+      "grad_norm": 0.04329407215118408,
+      "learning_rate": 9.160143029396422e-05,
+      "loss": 11.7365,
+      "step": 83
+    },
+    {
+      "epoch": 0.013803870013557372,
+      "grad_norm": 0.05856756493449211,
+      "learning_rate": 9.13766542302225e-05,
+      "loss": 11.7351,
+      "step": 84
+    },
+    {
+      "epoch": 0.013968201799433056,
+      "grad_norm": 0.06545337289571762,
+      "learning_rate": 9.114919329468282e-05,
+      "loss": 11.7321,
+      "step": 85
+    },
+    {
+      "epoch": 0.014132533585308738,
+      "grad_norm": 0.06385994702577591,
+      "learning_rate": 9.091906224695935e-05,
+      "loss": 11.734,
+      "step": 86
+    },
+    {
+      "epoch": 0.01429686537118442,
+      "grad_norm": 0.0672338604927063,
+      "learning_rate": 9.068627601992598e-05,
+      "loss": 11.7357,
+      "step": 87
+    },
+    {
+      "epoch": 0.014461197157060105,
+      "grad_norm": 0.06891307979822159,
+      "learning_rate": 9.045084971874738e-05,
+      "loss": 11.7329,
+      "step": 88
+    },
+    {
+      "epoch": 0.014625528942935787,
+      "grad_norm": 0.05319250375032425,
+      "learning_rate": 9.021279861989885e-05,
+      "loss": 11.7367,
+      "step": 89
+    },
+    {
+      "epoch": 0.01478986072881147,
+      "grad_norm": 0.07806988060474396,
+      "learning_rate": 8.997213817017507e-05,
+      "loss": 11.7364,
+      "step": 90
+    },
+    {
+      "epoch": 0.014954192514687154,
+      "grad_norm": 0.07189088314771652,
+      "learning_rate": 8.972888398568772e-05,
+      "loss": 11.7356,
+      "step": 91
+    },
+    {
+      "epoch": 0.015118524300562836,
+      "grad_norm": 0.07356931269168854,
+      "learning_rate": 8.948305185085225e-05,
+      "loss": 11.7331,
+      "step": 92
+    },
+    {
+      "epoch": 0.015282856086438519,
+      "grad_norm": 0.07599375396966934,
+      "learning_rate": 8.92346577173636e-05,
+      "loss": 11.7298,
+      "step": 93
+    },
+    {
+      "epoch": 0.015447187872314203,
+      "grad_norm": 0.08366679400205612,
+      "learning_rate": 8.898371770316111e-05,
+      "loss": 11.728,
+      "step": 94
+    },
+    {
+      "epoch": 0.015611519658189885,
+      "grad_norm": 0.08361729979515076,
+      "learning_rate": 8.873024809138272e-05,
+      "loss": 11.7304,
+      "step": 95
+    },
+    {
+      "epoch": 0.01577585144406557,
+      "grad_norm": 0.07593885809183121,
+      "learning_rate": 8.847426532930831e-05,
+      "loss": 11.7272,
+      "step": 96
+    },
+    {
+      "epoch": 0.01594018322994125,
+      "grad_norm": 0.09948297590017319,
+      "learning_rate": 8.821578602729242e-05,
+      "loss": 11.7321,
+      "step": 97
+    },
+    {
+      "epoch": 0.016104515015816934,
+      "grad_norm": 0.13416649401187897,
+      "learning_rate": 8.795482695768658e-05,
+      "loss": 11.7273,
+      "step": 98
+    },
+    {
+      "epoch": 0.01626884680169262,
+      "grad_norm": 0.12499107420444489,
+      "learning_rate": 8.769140505375085e-05,
+      "loss": 11.7205,
+      "step": 99
+    },
+    {
+      "epoch": 0.0164331785875683,
+      "grad_norm": 0.2499997466802597,
+      "learning_rate": 8.742553740855506e-05,
+      "loss": 11.7202,
+      "step": 100
+    },
+    {
+      "epoch": 0.0164331785875683,
+      "eval_loss": 11.734024047851562,
+      "eval_runtime": 48.7146,
+      "eval_samples_per_second": 210.389,
+      "eval_steps_per_second": 52.613,
+      "step": 100
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 41434585497600.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null