Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:975fbc6dd8160c36ac9eaa551fbbbb305d330fbe9ef19381f4688fdbf082facb
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:ca031d868afff33032a3db8f3ecb68364655d0b6b5b97f2e4d6e843e42a99637
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0dce4aeb4d2d8fe26fc5fc408bf45773288abad54fbbe46f82565271a69b993b
 size 1342555602

 version https://git-lfs.github.com/spec/v1
+oid sha256:43e39cb73144273fe1eb07a0d2e8df72f2b82976df0967ef80ddfadbcd2e34ff
 size 1342555602

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1a7b2652c9f4098c00d7790134303c7a9255f5bb57ea0c1de0a9416873ef76e4
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:3422ddedebaed31b4269f3161d4d4afa4d0841cc50b095d82ac969a633b096af
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dfd59dca009004df561617f8f6994512d029a952a68609cac24b36df5a0757ce
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.5897608995437622,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.14781966001478197,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 10.654,
       "eval_steps_per_second": 5.33,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.5531592320273613e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.584774374961853,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.1970928800197093,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 10.654,
       "eval_steps_per_second": 5.33,
       "step": 150
+    },
+    {
+      "epoch": 0.1488051244148805,
+      "grad_norm": 0.04582216218113899,
+      "learning_rate": 1.9136088935510362e-05,
+      "loss": 0.0314,
+      "step": 151
+    },
+    {
+      "epoch": 0.14979058881497906,
+      "grad_norm": 0.09543165564537048,
+      "learning_rate": 1.8414449687337464e-05,
+      "loss": 0.1812,
+      "step": 152
+    },
+    {
+      "epoch": 0.15077605321507762,
+      "grad_norm": 0.10564053803682327,
+      "learning_rate": 1.7703596875660645e-05,
+      "loss": 0.259,
+      "step": 153
+    },
+    {
+      "epoch": 0.15176151761517614,
+      "grad_norm": 0.1349058598279953,
+      "learning_rate": 1.700377325606388e-05,
+      "loss": 0.3295,
+      "step": 154
+    },
+    {
+      "epoch": 0.1527469820152747,
+      "grad_norm": 0.16049978137016296,
+      "learning_rate": 1.631521781767214e-05,
+      "loss": 0.4605,
+      "step": 155
+    },
+    {
+      "epoch": 0.15373244641537326,
+      "grad_norm": 0.19529177248477936,
+      "learning_rate": 1.5638165701536868e-05,
+      "loss": 0.6925,
+      "step": 156
+    },
+    {
+      "epoch": 0.1547179108154718,
+      "grad_norm": 0.14760489761829376,
+      "learning_rate": 1.4972848120335453e-05,
+      "loss": 0.4273,
+      "step": 157
+    },
+    {
+      "epoch": 0.15570337521557034,
+      "grad_norm": 0.1731565296649933,
+      "learning_rate": 1.4319492279412388e-05,
+      "loss": 0.4982,
+      "step": 158
+    },
+    {
+      "epoch": 0.15668883961566887,
+      "grad_norm": 0.19718369841575623,
+      "learning_rate": 1.3678321299188801e-05,
+      "loss": 0.5252,
+      "step": 159
+    },
+    {
+      "epoch": 0.15767430401576743,
+      "grad_norm": 0.19219151139259338,
+      "learning_rate": 1.3049554138967051e-05,
+      "loss": 0.6744,
+      "step": 160
+    },
+    {
+      "epoch": 0.158659768415866,
+      "grad_norm": 0.19782665371894836,
+      "learning_rate": 1.2433405522156332e-05,
+      "loss": 0.5857,
+      "step": 161
+    },
+    {
+      "epoch": 0.15964523281596452,
+      "grad_norm": 0.19759586453437805,
+      "learning_rate": 1.183008586294485e-05,
+      "loss": 0.6169,
+      "step": 162
+    },
+    {
+      "epoch": 0.16063069721606307,
+      "grad_norm": 0.2008185088634491,
+      "learning_rate": 1.1239801194443506e-05,
+      "loss": 0.5558,
+      "step": 163
+    },
+    {
+      "epoch": 0.16161616161616163,
+      "grad_norm": 0.20355170965194702,
+      "learning_rate": 1.066275309832584e-05,
+      "loss": 0.6305,
+      "step": 164
+    },
+    {
+      "epoch": 0.16260162601626016,
+      "grad_norm": 0.2057691514492035,
+      "learning_rate": 1.0099138635988026e-05,
+      "loss": 0.629,
+      "step": 165
+    },
+    {
+      "epoch": 0.16358709041635872,
+      "grad_norm": 0.21220219135284424,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.5687,
+      "step": 166
+    },
+    {
+      "epoch": 0.16457255481645724,
+      "grad_norm": 0.22451376914978027,
+      "learning_rate": 9.012975854638949e-06,
+      "loss": 0.7258,
+      "step": 167
+    },
+    {
+      "epoch": 0.1655580192165558,
+      "grad_norm": 0.24449403584003448,
+      "learning_rate": 8.490798459222476e-06,
+      "loss": 0.6929,
+      "step": 168
+    },
+    {
+      "epoch": 0.16654348361665436,
+      "grad_norm": 0.2417096346616745,
+      "learning_rate": 7.982796418105371e-06,
+      "loss": 0.7392,
+      "step": 169
+    },
+    {
+      "epoch": 0.1675289480167529,
+      "grad_norm": 0.27315232157707214,
+      "learning_rate": 7.489143213519301e-06,
+      "loss": 0.7316,
+      "step": 170
+    },
+    {
+      "epoch": 0.16851441241685144,
+      "grad_norm": 0.25194498896598816,
+      "learning_rate": 7.010007427581378e-06,
+      "loss": 0.5979,
+      "step": 171
+    },
+    {
+      "epoch": 0.16949987681695,
+      "grad_norm": 0.24112963676452637,
+      "learning_rate": 6.5455526847235825e-06,
+      "loss": 0.69,
+      "step": 172
+    },
+    {
+      "epoch": 0.17048534121704853,
+      "grad_norm": 0.2511669397354126,
+      "learning_rate": 6.0959375958151045e-06,
+      "loss": 0.5824,
+      "step": 173
+    },
+    {
+      "epoch": 0.1714708056171471,
+      "grad_norm": 0.24215401709079742,
+      "learning_rate": 5.6613157039969055e-06,
+      "loss": 0.6823,
+      "step": 174
+    },
+    {
+      "epoch": 0.17245627001724562,
+      "grad_norm": 0.28520438075065613,
+      "learning_rate": 5.241835432246889e-06,
+      "loss": 0.7416,
+      "step": 175
+    },
+    {
+      "epoch": 0.17344173441734417,
+      "grad_norm": 0.2623746395111084,
+      "learning_rate": 4.837640032693558e-06,
+      "loss": 0.6514,
+      "step": 176
+    },
+    {
+      "epoch": 0.17442719881744273,
+      "grad_norm": 0.2783282995223999,
+      "learning_rate": 4.448867537695578e-06,
+      "loss": 0.7159,
+      "step": 177
+    },
+    {
+      "epoch": 0.17541266321754126,
+      "grad_norm": 0.27611035108566284,
+      "learning_rate": 4.075650712703849e-06,
+      "loss": 0.7525,
+      "step": 178
+    },
+    {
+      "epoch": 0.17639812761763982,
+      "grad_norm": 0.270900696516037,
+      "learning_rate": 3.71811701092219e-06,
+      "loss": 0.7557,
+      "step": 179
+    },
+    {
+      "epoch": 0.17738359201773837,
+      "grad_norm": 0.2633064389228821,
+      "learning_rate": 3.376388529782215e-06,
+      "loss": 0.5719,
+      "step": 180
+    },
+    {
+      "epoch": 0.1783690564178369,
+      "grad_norm": 0.2590899169445038,
+      "learning_rate": 3.0505819692471792e-06,
+      "loss": 0.5344,
+      "step": 181
+    },
+    {
+      "epoch": 0.17935452081793546,
+      "grad_norm": 0.28156155347824097,
+      "learning_rate": 2.7408085919590264e-06,
+      "loss": 0.6333,
+      "step": 182
+    },
+    {
+      "epoch": 0.180339985218034,
+      "grad_norm": 0.2902732491493225,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.5511,
+      "step": 183
+    },
+    {
+      "epoch": 0.18132544961813254,
+      "grad_norm": 0.3187778890132904,
+      "learning_rate": 2.1697790249779636e-06,
+      "loss": 0.7326,
+      "step": 184
+    },
+    {
+      "epoch": 0.1823109140182311,
+      "grad_norm": 0.2755086421966553,
+      "learning_rate": 1.908717841359048e-06,
+      "loss": 0.4491,
+      "step": 185
+    },
+    {
+      "epoch": 0.18329637841832963,
+      "grad_norm": 0.33460649847984314,
+      "learning_rate": 1.6640797865406288e-06,
+      "loss": 0.6483,
+      "step": 186
+    },
+    {
+      "epoch": 0.1842818428184282,
+      "grad_norm": 0.3185296356678009,
+      "learning_rate": 1.4359484041943038e-06,
+      "loss": 0.6061,
+      "step": 187
+    },
+    {
+      "epoch": 0.18526730721852674,
+      "grad_norm": 0.343330055475235,
+      "learning_rate": 1.2244016009781701e-06,
+      "loss": 0.7053,
+      "step": 188
+    },
+    {
+      "epoch": 0.18625277161862527,
+      "grad_norm": 0.35610195994377136,
+      "learning_rate": 1.0295116199317057e-06,
+      "loss": 0.6565,
+      "step": 189
+    },
+    {
+      "epoch": 0.18723823601872383,
+      "grad_norm": 0.3516658842563629,
+      "learning_rate": 8.513450158049108e-07,
+      "loss": 0.7232,
+      "step": 190
+    },
+    {
+      "epoch": 0.18822370041882236,
+      "grad_norm": 0.37256768345832825,
+      "learning_rate": 6.899626323298713e-07,
+      "loss": 0.5819,
+      "step": 191
+    },
+    {
+      "epoch": 0.18920916481892092,
+      "grad_norm": 0.4146597385406494,
+      "learning_rate": 5.454195814427021e-07,
+      "loss": 0.653,
+      "step": 192
+    },
+    {
+      "epoch": 0.19019462921901947,
+      "grad_norm": 0.3835974335670471,
+      "learning_rate": 4.177652244628627e-07,
+      "loss": 0.6009,
+      "step": 193
+    },
+    {
+      "epoch": 0.191180093619118,
+      "grad_norm": 0.5377586483955383,
+      "learning_rate": 3.0704315523631953e-07,
+      "loss": 0.7371,
+      "step": 194
+    },
+    {
+      "epoch": 0.19216555801921656,
+      "grad_norm": 0.4629654884338379,
+      "learning_rate": 2.1329118524827662e-07,
+      "loss": 0.7908,
+      "step": 195
+    },
+    {
+      "epoch": 0.19315102241931512,
+      "grad_norm": 0.4720406234264374,
+      "learning_rate": 1.3654133071059893e-07,
+      "loss": 0.7832,
+      "step": 196
+    },
+    {
+      "epoch": 0.19413648681941365,
+      "grad_norm": 0.5157343745231628,
+      "learning_rate": 7.681980162830282e-08,
+      "loss": 0.8257,
+      "step": 197
+    },
+    {
+      "epoch": 0.1951219512195122,
+      "grad_norm": 0.5014312267303467,
+      "learning_rate": 3.4146992848854695e-08,
+      "loss": 0.7237,
+      "step": 198
+    },
+    {
+      "epoch": 0.19610741561961073,
+      "grad_norm": 0.5394123792648315,
+      "learning_rate": 8.537477097364522e-09,
+      "loss": 0.6931,
+      "step": 199
+    },
+    {
+      "epoch": 0.1970928800197093,
+      "grad_norm": 0.6996267437934875,
+      "learning_rate": 0.0,
+      "loss": 0.7056,
+      "step": 200
+    },
+    {
+      "epoch": 0.1970928800197093,
+      "eval_loss": 0.584774374961853,
+      "eval_runtime": 160.1549,
+      "eval_samples_per_second": 10.671,
+      "eval_steps_per_second": 5.339,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.401698120071905e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null