Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2b2a8d6b1b9cc916dcd927218d956fd2f73425668bf1534a4faca8ff87a1364e
 size 295488936

 version https://git-lfs.github.com/spec/v1
+oid sha256:7b56d7ce4cc6a16eb26ebcd0c69c98e3c457d30e2714e23bbe0850b9fd2b3838
 size 295488936

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c41b0a8a25dbc28151cbdcae3476dba2168f3beaf5a506cf2a90d7830cc0f868
 size 150486964

 version https://git-lfs.github.com/spec/v1
+oid sha256:83baa310e1dba0a7550a118d027b2dbf738abf2a6554055de0fff4757e9f69c8
 size 150486964

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e1b2449c9d3d4f80fb993481ef43903a102d1cde52f163fd752bda3c49c9b03f
 size 14180

 version https://git-lfs.github.com/spec/v1
+oid sha256:2f71936b5c4cff1cb03b7ea1f012a7d6570bffc03f561a681fb5e0ed80a7a28e
 size 14180

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6405f9fabb3d2e45adcc13758e2849ef278b423e1ee939354d45ffa4e9327204
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a032dcbe590e69725a7fa9cedb5934521429cb4c18983c09c5035cbcaaeebd93
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.6553597450256348,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.25510204081632654,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 33.659,
       "eval_steps_per_second": 8.432,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.102581815083008e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.6537404656410217,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.3401360544217687,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 33.659,
       "eval_steps_per_second": 8.432,
       "step": 150
+    },
+    {
+      "epoch": 0.2568027210884354,
+      "grad_norm": 0.20743505656719208,
+      "learning_rate": 1.242586363525737e-05,
+      "loss": 0.5836,
+      "step": 151
+    },
+    {
+      "epoch": 0.2585034013605442,
+      "grad_norm": 0.23101773858070374,
+      "learning_rate": 1.1950528629100457e-05,
+      "loss": 0.6812,
+      "step": 152
+    },
+    {
+      "epoch": 0.2602040816326531,
+      "grad_norm": 0.22349244356155396,
+      "learning_rate": 1.1482862068344121e-05,
+      "loss": 0.6376,
+      "step": 153
+    },
+    {
+      "epoch": 0.2619047619047619,
+      "grad_norm": 0.23622667789459229,
+      "learning_rate": 1.1022991808356442e-05,
+      "loss": 0.6703,
+      "step": 154
+    },
+    {
+      "epoch": 0.26360544217687076,
+      "grad_norm": 0.2300838977098465,
+      "learning_rate": 1.0571043573074737e-05,
+      "loss": 0.6579,
+      "step": 155
+    },
+    {
+      "epoch": 0.2653061224489796,
+      "grad_norm": 0.23446151614189148,
+      "learning_rate": 1.0127140920633857e-05,
+      "loss": 0.6648,
+      "step": 156
+    },
+    {
+      "epoch": 0.26700680272108845,
+      "grad_norm": 0.23773601651191711,
+      "learning_rate": 9.69140520958662e-06,
+      "loss": 0.6624,
+      "step": 157
+    },
+    {
+      "epoch": 0.2687074829931973,
+      "grad_norm": 0.23287071287631989,
+      "learning_rate": 9.263955565725648e-06,
+      "loss": 0.6676,
+      "step": 158
+    },
+    {
+      "epoch": 0.27040816326530615,
+      "grad_norm": 0.22927455604076385,
+      "learning_rate": 8.844908849515509e-06,
+      "loss": 0.6513,
+      "step": 159
+    },
+    {
+      "epoch": 0.272108843537415,
+      "grad_norm": 0.23780395090579987,
+      "learning_rate": 8.434379624144261e-06,
+      "loss": 0.6377,
+      "step": 160
+    },
+    {
+      "epoch": 0.27380952380952384,
+      "grad_norm": 0.23513314127922058,
+      "learning_rate": 8.032480124203013e-06,
+      "loss": 0.6464,
+      "step": 161
+    },
+    {
+      "epoch": 0.2755102040816326,
+      "grad_norm": 0.23223648965358734,
+      "learning_rate": 7.639320225002106e-06,
+      "loss": 0.6287,
+      "step": 162
+    },
+    {
+      "epoch": 0.27721088435374147,
+      "grad_norm": 0.2369530349969864,
+      "learning_rate": 7.255007412532307e-06,
+      "loss": 0.6652,
+      "step": 163
+    },
+    {
+      "epoch": 0.2789115646258503,
+      "grad_norm": 0.2352808564901352,
+      "learning_rate": 6.8796467540791986e-06,
+      "loss": 0.6554,
+      "step": 164
+    },
+    {
+      "epoch": 0.28061224489795916,
+      "grad_norm": 0.23590978980064392,
+      "learning_rate": 6.513340869498859e-06,
+      "loss": 0.6387,
+      "step": 165
+    },
+    {
+      "epoch": 0.282312925170068,
+      "grad_norm": 0.24238228797912598,
+      "learning_rate": 6.1561899031625794e-06,
+      "loss": 0.6717,
+      "step": 166
+    },
+    {
+      "epoch": 0.28401360544217685,
+      "grad_norm": 0.23621121048927307,
+      "learning_rate": 5.808291496578435e-06,
+      "loss": 0.647,
+      "step": 167
+    },
+    {
+      "epoch": 0.2857142857142857,
+      "grad_norm": 0.2383495420217514,
+      "learning_rate": 5.469740761697044e-06,
+      "loss": 0.6615,
+      "step": 168
+    },
+    {
+      "epoch": 0.28741496598639454,
+      "grad_norm": 0.23851458728313446,
+      "learning_rate": 5.140630254908905e-06,
+      "loss": 0.6346,
+      "step": 169
+    },
+    {
+      "epoch": 0.2891156462585034,
+      "grad_norm": 0.23707586526870728,
+      "learning_rate": 4.821049951740442e-06,
+      "loss": 0.6646,
+      "step": 170
+    },
+    {
+      "epoch": 0.29081632653061223,
+      "grad_norm": 0.24620512127876282,
+      "learning_rate": 4.511087222255528e-06,
+      "loss": 0.6786,
+      "step": 171
+    },
+    {
+      "epoch": 0.2925170068027211,
+      "grad_norm": 0.23762977123260498,
+      "learning_rate": 4.2108268071694616e-06,
+      "loss": 0.6565,
+      "step": 172
+    },
+    {
+      "epoch": 0.2942176870748299,
+      "grad_norm": 0.24971109628677368,
+      "learning_rate": 3.9203507946816445e-06,
+      "loss": 0.7011,
+      "step": 173
+    },
+    {
+      "epoch": 0.29591836734693877,
+      "grad_norm": 0.24630703032016754,
+      "learning_rate": 3.6397385980335e-06,
+      "loss": 0.6313,
+      "step": 174
+    },
+    {
+      "epoch": 0.2976190476190476,
+      "grad_norm": 0.248443141579628,
+      "learning_rate": 3.3690669337977e-06,
+      "loss": 0.6703,
+      "step": 175
+    },
+    {
+      "epoch": 0.29931972789115646,
+      "grad_norm": 0.2575363516807556,
+      "learning_rate": 3.1084098009046106e-06,
+      "loss": 0.6883,
+      "step": 176
+    },
+    {
+      "epoch": 0.3010204081632653,
+      "grad_norm": 0.2538343369960785,
+      "learning_rate": 2.8578384604117217e-06,
+      "loss": 0.6952,
+      "step": 177
+    },
+    {
+      "epoch": 0.30272108843537415,
+      "grad_norm": 0.25473159551620483,
+      "learning_rate": 2.6174214160215704e-06,
+      "loss": 0.6877,
+      "step": 178
+    },
+    {
+      "epoch": 0.304421768707483,
+      "grad_norm": 0.2566973865032196,
+      "learning_rate": 2.3872243953535535e-06,
+      "loss": 0.6795,
+      "step": 179
+    },
+    {
+      "epoch": 0.30612244897959184,
+      "grad_norm": 0.24843615293502808,
+      "learning_rate": 2.1673103319746146e-06,
+      "loss": 0.6552,
+      "step": 180
+    },
+    {
+      "epoch": 0.3078231292517007,
+      "grad_norm": 0.255484014749527,
+      "learning_rate": 1.957739348193859e-06,
+      "loss": 0.6854,
+      "step": 181
+    },
+    {
+      "epoch": 0.30952380952380953,
+      "grad_norm": 0.24806556105613708,
+      "learning_rate": 1.7585687386256944e-06,
+      "loss": 0.6547,
+      "step": 182
+    },
+    {
+      "epoch": 0.3112244897959184,
+      "grad_norm": 0.24836336076259613,
+      "learning_rate": 1.5698529545260744e-06,
+      "loss": 0.6485,
+      "step": 183
+    },
+    {
+      "epoch": 0.3129251700680272,
+      "grad_norm": 0.25505492091178894,
+      "learning_rate": 1.3916435889060575e-06,
+      "loss": 0.6795,
+      "step": 184
+    },
+    {
+      "epoch": 0.31462585034013607,
+      "grad_norm": 0.26232901215553284,
+      "learning_rate": 1.2239893624267852e-06,
+      "loss": 0.6813,
+      "step": 185
+    },
+    {
+      "epoch": 0.3163265306122449,
+      "grad_norm": 0.26474517583847046,
+      "learning_rate": 1.0669361100797704e-06,
+      "loss": 0.6814,
+      "step": 186
+    },
+    {
+      "epoch": 0.31802721088435376,
+      "grad_norm": 0.26200371980667114,
+      "learning_rate": 9.205267686560293e-07,
+      "loss": 0.6425,
+      "step": 187
+    },
+    {
+      "epoch": 0.3197278911564626,
+      "grad_norm": 0.2582118511199951,
+      "learning_rate": 7.848013650076258e-07,
+      "loss": 0.6708,
+      "step": 188
+    },
+    {
+      "epoch": 0.32142857142857145,
+      "grad_norm": 0.25343313813209534,
+      "learning_rate": 6.597970051047053e-07,
+      "loss": 0.632,
+      "step": 189
+    },
+    {
+      "epoch": 0.3231292517006803,
+      "grad_norm": 0.2599485516548157,
+      "learning_rate": 5.455478638911071e-07,
+      "loss": 0.6482,
+      "step": 190
+    },
+    {
+      "epoch": 0.32482993197278914,
+      "grad_norm": 0.25874727964401245,
+      "learning_rate": 4.420851759412603e-07,
+      "loss": 0.6601,
+      "step": 191
+    },
+    {
+      "epoch": 0.32653061224489793,
+      "grad_norm": 0.2675853669643402,
+      "learning_rate": 3.4943722692099224e-07,
+      "loss": 0.6596,
+      "step": 192
+    },
+    {
+      "epoch": 0.3282312925170068,
+      "grad_norm": 0.27086105942726135,
+      "learning_rate": 2.676293458544743e-07,
+      "loss": 0.6787,
+      "step": 193
+    },
+    {
+      "epoch": 0.3299319727891156,
+      "grad_norm": 0.27428770065307617,
+      "learning_rate": 1.9668389819954338e-07,
+      "loss": 0.6979,
+      "step": 194
+    },
+    {
+      "epoch": 0.33163265306122447,
+      "grad_norm": 0.276022344827652,
+      "learning_rate": 1.3662027973320614e-07,
+      "loss": 0.6733,
+      "step": 195
+    },
+    {
+      "epoch": 0.3333333333333333,
+      "grad_norm": 0.28401777148246765,
+      "learning_rate": 8.745491124901861e-08,
+      "loss": 0.6435,
+      "step": 196
+    },
+    {
+      "epoch": 0.33503401360544216,
+      "grad_norm": 0.28814497590065,
+      "learning_rate": 4.920123406781052e-08,
+      "loss": 0.6764,
+      "step": 197
+    },
+    {
+      "epoch": 0.336734693877551,
+      "grad_norm": 0.3017368018627167,
+      "learning_rate": 2.1869706362958044e-08,
+      "loss": 0.6491,
+      "step": 198
+    },
+    {
+      "epoch": 0.33843537414965985,
+      "grad_norm": 0.3238164484500885,
+      "learning_rate": 5.467800301239834e-09,
+      "loss": 0.6186,
+      "step": 199
+    },
+    {
+      "epoch": 0.3401360544217687,
+      "grad_norm": 0.6410134434700012,
+      "learning_rate": 0.0,
+      "loss": 0.6382,
+      "step": 200
+    },
+    {
+      "epoch": 0.3401360544217687,
+      "eval_loss": 0.6537404656410217,
+      "eval_runtime": 29.3996,
+      "eval_samples_per_second": 33.674,
+      "eval_steps_per_second": 8.435,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.463305468510208e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null