Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b063d903c804c01f64bca9126671bbb625e3046d5b6e17bcee2f8191400f1998
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:19cd8af9a43a188d48d4ee78e4caf76e6e8aeb62d4ebbbbe0053dfdc8661e68b
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1f9e02ce501a745ffccad238bd51cf347543942bfe0201f9c287a591c77c9bbb
 size 341314196

 version https://git-lfs.github.com/spec/v1
+oid sha256:d81e69137a5da2eb4b892d7be23fe5f270dd20b9f0f29e6b3021ea9febc5685e
 size 341314196

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:36badd60a03c2fb3ef43bb07f50ebcd87e9d61cb55565df5d8cafbe0f9fb4b8b
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:b2c48f73842e891eb6c68489ba8e167639def86b833e4d9b7d57d381d0ecd7f2
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6405f9fabb3d2e45adcc13758e2849ef278b423e1ee939354d45ffa4e9327204
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a032dcbe590e69725a7fa9cedb5934521429cb4c18983c09c5035cbcaaeebd93
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.1231499910354614,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.016735001255125094,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 12.765,
       "eval_steps_per_second": 3.192,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.315568343374889e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.1155003309249878,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.022313335006833458,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 12.765,
       "eval_steps_per_second": 3.192,
       "step": 150
+    },
+    {
+      "epoch": 0.016846567930159262,
+      "grad_norm": 1.347996711730957,
+      "learning_rate": 1.242586363525737e-05,
+      "loss": 1.6669,
+      "step": 151
+    },
+    {
+      "epoch": 0.01695813460519343,
+      "grad_norm": 0.7223047018051147,
+      "learning_rate": 1.1950528629100457e-05,
+      "loss": 1.4154,
+      "step": 152
+    },
+    {
+      "epoch": 0.017069701280227598,
+      "grad_norm": 0.7790156006813049,
+      "learning_rate": 1.1482862068344121e-05,
+      "loss": 1.3367,
+      "step": 153
+    },
+    {
+      "epoch": 0.017181267955261762,
+      "grad_norm": 0.7638139128684998,
+      "learning_rate": 1.1022991808356442e-05,
+      "loss": 1.0699,
+      "step": 154
+    },
+    {
+      "epoch": 0.01729283463029593,
+      "grad_norm": 0.8289293646812439,
+      "learning_rate": 1.0571043573074737e-05,
+      "loss": 1.2266,
+      "step": 155
+    },
+    {
+      "epoch": 0.017404401305330098,
+      "grad_norm": 0.8301885724067688,
+      "learning_rate": 1.0127140920633857e-05,
+      "loss": 1.3556,
+      "step": 156
+    },
+    {
+      "epoch": 0.017515967980364266,
+      "grad_norm": 0.7769188284873962,
+      "learning_rate": 9.69140520958662e-06,
+      "loss": 1.3362,
+      "step": 157
+    },
+    {
+      "epoch": 0.017627534655398434,
+      "grad_norm": 0.8720978498458862,
+      "learning_rate": 9.263955565725648e-06,
+      "loss": 1.0811,
+      "step": 158
+    },
+    {
+      "epoch": 0.017739101330432598,
+      "grad_norm": 0.7598316073417664,
+      "learning_rate": 8.844908849515509e-06,
+      "loss": 1.0352,
+      "step": 159
+    },
+    {
+      "epoch": 0.017850668005466766,
+      "grad_norm": 0.7994357347488403,
+      "learning_rate": 8.434379624144261e-06,
+      "loss": 1.1984,
+      "step": 160
+    },
+    {
+      "epoch": 0.017962234680500934,
+      "grad_norm": 0.841153621673584,
+      "learning_rate": 8.032480124203013e-06,
+      "loss": 1.2358,
+      "step": 161
+    },
+    {
+      "epoch": 0.018073801355535102,
+      "grad_norm": 0.8118249773979187,
+      "learning_rate": 7.639320225002106e-06,
+      "loss": 1.0032,
+      "step": 162
+    },
+    {
+      "epoch": 0.01818536803056927,
+      "grad_norm": 0.753839373588562,
+      "learning_rate": 7.255007412532307e-06,
+      "loss": 1.1015,
+      "step": 163
+    },
+    {
+      "epoch": 0.018296934705603438,
+      "grad_norm": 0.7360984086990356,
+      "learning_rate": 6.8796467540791986e-06,
+      "loss": 1.1351,
+      "step": 164
+    },
+    {
+      "epoch": 0.018408501380637602,
+      "grad_norm": 0.8430283069610596,
+      "learning_rate": 6.513340869498859e-06,
+      "loss": 1.1797,
+      "step": 165
+    },
+    {
+      "epoch": 0.01852006805567177,
+      "grad_norm": 0.7918187975883484,
+      "learning_rate": 6.1561899031625794e-06,
+      "loss": 1.2193,
+      "step": 166
+    },
+    {
+      "epoch": 0.018631634730705938,
+      "grad_norm": 0.7992611527442932,
+      "learning_rate": 5.808291496578435e-06,
+      "loss": 1.268,
+      "step": 167
+    },
+    {
+      "epoch": 0.018743201405740106,
+      "grad_norm": 0.7855931520462036,
+      "learning_rate": 5.469740761697044e-06,
+      "loss": 1.178,
+      "step": 168
+    },
+    {
+      "epoch": 0.018854768080774274,
+      "grad_norm": 0.8650913834571838,
+      "learning_rate": 5.140630254908905e-06,
+      "loss": 1.2404,
+      "step": 169
+    },
+    {
+      "epoch": 0.01896633475580844,
+      "grad_norm": 0.8121924996376038,
+      "learning_rate": 4.821049951740442e-06,
+      "loss": 1.1776,
+      "step": 170
+    },
+    {
+      "epoch": 0.019077901430842606,
+      "grad_norm": 0.7831167578697205,
+      "learning_rate": 4.511087222255528e-06,
+      "loss": 1.2594,
+      "step": 171
+    },
+    {
+      "epoch": 0.019189468105876774,
+      "grad_norm": 0.8442049026489258,
+      "learning_rate": 4.2108268071694616e-06,
+      "loss": 0.9845,
+      "step": 172
+    },
+    {
+      "epoch": 0.01930103478091094,
+      "grad_norm": 0.9648184180259705,
+      "learning_rate": 3.9203507946816445e-06,
+      "loss": 1.1247,
+      "step": 173
+    },
+    {
+      "epoch": 0.01941260145594511,
+      "grad_norm": 0.814885139465332,
+      "learning_rate": 3.6397385980335e-06,
+      "loss": 1.3107,
+      "step": 174
+    },
+    {
+      "epoch": 0.019524168130979278,
+      "grad_norm": 0.8654019236564636,
+      "learning_rate": 3.3690669337977e-06,
+      "loss": 1.2594,
+      "step": 175
+    },
+    {
+      "epoch": 0.019635734806013445,
+      "grad_norm": 0.9481486678123474,
+      "learning_rate": 3.1084098009046106e-06,
+      "loss": 1.1462,
+      "step": 176
+    },
+    {
+      "epoch": 0.01974730148104761,
+      "grad_norm": 0.7980362772941589,
+      "learning_rate": 2.8578384604117217e-06,
+      "loss": 1.117,
+      "step": 177
+    },
+    {
+      "epoch": 0.019858868156081778,
+      "grad_norm": 0.8693552613258362,
+      "learning_rate": 2.6174214160215704e-06,
+      "loss": 1.1145,
+      "step": 178
+    },
+    {
+      "epoch": 0.019970434831115946,
+      "grad_norm": 0.8843334317207336,
+      "learning_rate": 2.3872243953535535e-06,
+      "loss": 1.0491,
+      "step": 179
+    },
+    {
+      "epoch": 0.020082001506150114,
+      "grad_norm": 0.8390738368034363,
+      "learning_rate": 2.1673103319746146e-06,
+      "loss": 0.9996,
+      "step": 180
+    },
+    {
+      "epoch": 0.02019356818118428,
+      "grad_norm": 0.9163740873336792,
+      "learning_rate": 1.957739348193859e-06,
+      "loss": 1.198,
+      "step": 181
+    },
+    {
+      "epoch": 0.020305134856218446,
+      "grad_norm": 0.8822169899940491,
+      "learning_rate": 1.7585687386256944e-06,
+      "loss": 1.0289,
+      "step": 182
+    },
+    {
+      "epoch": 0.020416701531252614,
+      "grad_norm": 0.8542013168334961,
+      "learning_rate": 1.5698529545260744e-06,
+      "loss": 0.9726,
+      "step": 183
+    },
+    {
+      "epoch": 0.02052826820628678,
+      "grad_norm": 0.9263859987258911,
+      "learning_rate": 1.3916435889060575e-06,
+      "loss": 1.1067,
+      "step": 184
+    },
+    {
+      "epoch": 0.02063983488132095,
+      "grad_norm": 0.9441400170326233,
+      "learning_rate": 1.2239893624267852e-06,
+      "loss": 1.1066,
+      "step": 185
+    },
+    {
+      "epoch": 0.020751401556355117,
+      "grad_norm": 0.9362796545028687,
+      "learning_rate": 1.0669361100797704e-06,
+      "loss": 1.0002,
+      "step": 186
+    },
+    {
+      "epoch": 0.020862968231389285,
+      "grad_norm": 0.9713947772979736,
+      "learning_rate": 9.205267686560293e-07,
+      "loss": 1.1045,
+      "step": 187
+    },
+    {
+      "epoch": 0.02097453490642345,
+      "grad_norm": 0.9421945214271545,
+      "learning_rate": 7.848013650076258e-07,
+      "loss": 1.094,
+      "step": 188
+    },
+    {
+      "epoch": 0.021086101581457618,
+      "grad_norm": 0.9486850500106812,
+      "learning_rate": 6.597970051047053e-07,
+      "loss": 1.0785,
+      "step": 189
+    },
+    {
+      "epoch": 0.021197668256491786,
+      "grad_norm": 1.1238409280776978,
+      "learning_rate": 5.455478638911071e-07,
+      "loss": 1.1079,
+      "step": 190
+    },
+    {
+      "epoch": 0.021309234931525953,
+      "grad_norm": 1.0330231189727783,
+      "learning_rate": 4.420851759412603e-07,
+      "loss": 0.9687,
+      "step": 191
+    },
+    {
+      "epoch": 0.02142080160656012,
+      "grad_norm": 1.064351201057434,
+      "learning_rate": 3.4943722692099224e-07,
+      "loss": 1.0296,
+      "step": 192
+    },
+    {
+      "epoch": 0.02153236828159429,
+      "grad_norm": 1.0056447982788086,
+      "learning_rate": 2.676293458544743e-07,
+      "loss": 0.9839,
+      "step": 193
+    },
+    {
+      "epoch": 0.021643934956628454,
+      "grad_norm": 1.1110754013061523,
+      "learning_rate": 1.9668389819954338e-07,
+      "loss": 0.9976,
+      "step": 194
+    },
+    {
+      "epoch": 0.02175550163166262,
+      "grad_norm": 1.2064679861068726,
+      "learning_rate": 1.3662027973320614e-07,
+      "loss": 1.1326,
+      "step": 195
+    },
+    {
+      "epoch": 0.02186706830669679,
+      "grad_norm": 1.1165443658828735,
+      "learning_rate": 8.745491124901861e-08,
+      "loss": 0.9374,
+      "step": 196
+    },
+    {
+      "epoch": 0.021978634981730957,
+      "grad_norm": 1.0634437799453735,
+      "learning_rate": 4.920123406781052e-08,
+      "loss": 0.979,
+      "step": 197
+    },
+    {
+      "epoch": 0.022090201656765125,
+      "grad_norm": 1.4238533973693848,
+      "learning_rate": 2.1869706362958044e-08,
+      "loss": 1.0523,
+      "step": 198
+    },
+    {
+      "epoch": 0.022201768331799293,
+      "grad_norm": 1.357772707939148,
+      "learning_rate": 5.467800301239834e-09,
+      "loss": 1.0345,
+      "step": 199
+    },
+    {
+      "epoch": 0.022313335006833458,
+      "grad_norm": 1.6248823404312134,
+      "learning_rate": 0.0,
+      "loss": 0.9803,
+      "step": 200
+    },
+    {
+      "epoch": 0.022313335006833458,
+      "eval_loss": 1.1155003309249878,
+      "eval_runtime": 1183.9514,
+      "eval_samples_per_second": 12.751,
+      "eval_steps_per_second": 3.188,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.08868155248214e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null