Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7fa0eb25e6e0746a93d7853a1a924b845f391a8218e9b1354676b971d5e2135e
 size 34456

 version https://git-lfs.github.com/spec/v1
+oid sha256:0bdaf8e7bebe4344211acbfe1c1ac858c8e2772334f4e2966d1cfb2b84f26fc9
 size 34456

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f99d706449bd6e3df1821553f2d772e9683d212448be99886f2ee6b133197c7c
 size 73222

 version https://git-lfs.github.com/spec/v1
+oid sha256:3cd8e8ab0be7f10f689add62252f17948b3692544594af3a46aebba3f769a2c1
 size 73222

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a85cfc33bd570d8ceb9245680529984d7b7b8936f15ed9a8752e734196c52b93
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:3939a83b79bcf6e9fbd8d5a72464e06e7302232845ed1ade497114ae09fbe6d8
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6405f9fabb3d2e45adcc13758e2849ef278b423e1ee939354d45ffa4e9327204
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a032dcbe590e69725a7fa9cedb5934521429cb4c18983c09c5035cbcaaeebd93
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 11.5,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.10124873439082012,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 225.291,
       "eval_steps_per_second": 56.345,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1103,7 +1461,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 24118847078400.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 11.5,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.13499831252109348,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 225.291,
       "eval_steps_per_second": 56.345,
       "step": 150
+    },
+    {
+      "epoch": 0.10192372595342558,
+      "grad_norm": 0.00019876357691828161,
+      "learning_rate": 1.242586363525737e-05,
+      "loss": 46.0,
+      "step": 151
+    },
+    {
+      "epoch": 0.10259871751603104,
+      "grad_norm": 0.0002352362935198471,
+      "learning_rate": 1.1950528629100457e-05,
+      "loss": 46.0,
+      "step": 152
+    },
+    {
+      "epoch": 0.10327370907863652,
+      "grad_norm": 0.00019797831191681325,
+      "learning_rate": 1.1482862068344121e-05,
+      "loss": 46.0,
+      "step": 153
+    },
+    {
+      "epoch": 0.10394870064124198,
+      "grad_norm": 0.0002398581855231896,
+      "learning_rate": 1.1022991808356442e-05,
+      "loss": 46.0,
+      "step": 154
+    },
+    {
+      "epoch": 0.10462369220384746,
+      "grad_norm": 0.00019246955343987793,
+      "learning_rate": 1.0571043573074737e-05,
+      "loss": 46.0,
+      "step": 155
+    },
+    {
+      "epoch": 0.10529868376645292,
+      "grad_norm": 0.00017457427748013288,
+      "learning_rate": 1.0127140920633857e-05,
+      "loss": 46.0,
+      "step": 156
+    },
+    {
+      "epoch": 0.10597367532905838,
+      "grad_norm": 0.00029743704362772405,
+      "learning_rate": 9.69140520958662e-06,
+      "loss": 46.0,
+      "step": 157
+    },
+    {
+      "epoch": 0.10664866689166386,
+      "grad_norm": 0.00017338975158054382,
+      "learning_rate": 9.263955565725648e-06,
+      "loss": 46.0,
+      "step": 158
+    },
+    {
+      "epoch": 0.10732365845426932,
+      "grad_norm": 0.00029915120103396475,
+      "learning_rate": 8.844908849515509e-06,
+      "loss": 46.0,
+      "step": 159
+    },
+    {
+      "epoch": 0.1079986500168748,
+      "grad_norm": 0.00023515670909546316,
+      "learning_rate": 8.434379624144261e-06,
+      "loss": 46.0,
+      "step": 160
+    },
+    {
+      "epoch": 0.10867364157948026,
+      "grad_norm": 0.00025672902120277286,
+      "learning_rate": 8.032480124203013e-06,
+      "loss": 46.0,
+      "step": 161
+    },
+    {
+      "epoch": 0.10934863314208572,
+      "grad_norm": 0.00027126428904011846,
+      "learning_rate": 7.639320225002106e-06,
+      "loss": 46.0,
+      "step": 162
+    },
+    {
+      "epoch": 0.1100236247046912,
+      "grad_norm": 0.0002725074300542474,
+      "learning_rate": 7.255007412532307e-06,
+      "loss": 46.0,
+      "step": 163
+    },
+    {
+      "epoch": 0.11069861626729666,
+      "grad_norm": 0.0002880211395677179,
+      "learning_rate": 6.8796467540791986e-06,
+      "loss": 46.0,
+      "step": 164
+    },
+    {
+      "epoch": 0.11137360782990213,
+      "grad_norm": 0.00027329163276590407,
+      "learning_rate": 6.513340869498859e-06,
+      "loss": 46.0,
+      "step": 165
+    },
+    {
+      "epoch": 0.1120485993925076,
+      "grad_norm": 0.00027789725572802126,
+      "learning_rate": 6.1561899031625794e-06,
+      "loss": 46.0,
+      "step": 166
+    },
+    {
+      "epoch": 0.11272359095511306,
+      "grad_norm": 0.0002505785960238427,
+      "learning_rate": 5.808291496578435e-06,
+      "loss": 46.0,
+      "step": 167
+    },
+    {
+      "epoch": 0.11339858251771853,
+      "grad_norm": 0.00036295506288297474,
+      "learning_rate": 5.469740761697044e-06,
+      "loss": 46.0,
+      "step": 168
+    },
+    {
+      "epoch": 0.114073574080324,
+      "grad_norm": 0.000231773461564444,
+      "learning_rate": 5.140630254908905e-06,
+      "loss": 46.0,
+      "step": 169
+    },
+    {
+      "epoch": 0.11474856564292946,
+      "grad_norm": 0.00036393426125869155,
+      "learning_rate": 4.821049951740442e-06,
+      "loss": 46.0,
+      "step": 170
+    },
+    {
+      "epoch": 0.11542355720553493,
+      "grad_norm": 0.0003143355716019869,
+      "learning_rate": 4.511087222255528e-06,
+      "loss": 46.0,
+      "step": 171
+    },
+    {
+      "epoch": 0.1160985487681404,
+      "grad_norm": 0.00033845927100628614,
+      "learning_rate": 4.2108268071694616e-06,
+      "loss": 46.0,
+      "step": 172
+    },
+    {
+      "epoch": 0.11677354033074587,
+      "grad_norm": 0.0003002511220984161,
+      "learning_rate": 3.9203507946816445e-06,
+      "loss": 46.0,
+      "step": 173
+    },
+    {
+      "epoch": 0.11744853189335133,
+      "grad_norm": 0.00032554613426327705,
+      "learning_rate": 3.6397385980335e-06,
+      "loss": 46.0,
+      "step": 174
+    },
+    {
+      "epoch": 0.1181235234559568,
+      "grad_norm": 0.0002870889729820192,
+      "learning_rate": 3.3690669337977e-06,
+      "loss": 46.0,
+      "step": 175
+    },
+    {
+      "epoch": 0.11879851501856227,
+      "grad_norm": 0.0003786945599131286,
+      "learning_rate": 3.1084098009046106e-06,
+      "loss": 46.0,
+      "step": 176
+    },
+    {
+      "epoch": 0.11947350658116773,
+      "grad_norm": 0.0003410293720662594,
+      "learning_rate": 2.8578384604117217e-06,
+      "loss": 46.0,
+      "step": 177
+    },
+    {
+      "epoch": 0.12014849814377321,
+      "grad_norm": 0.0002813494356814772,
+      "learning_rate": 2.6174214160215704e-06,
+      "loss": 46.0,
+      "step": 178
+    },
+    {
+      "epoch": 0.12082348970637867,
+      "grad_norm": 0.00023148763284552842,
+      "learning_rate": 2.3872243953535535e-06,
+      "loss": 46.0,
+      "step": 179
+    },
+    {
+      "epoch": 0.12149848126898413,
+      "grad_norm": 0.00044364703353494406,
+      "learning_rate": 2.1673103319746146e-06,
+      "loss": 46.0,
+      "step": 180
+    },
+    {
+      "epoch": 0.12217347283158961,
+      "grad_norm": 0.00044808979146182537,
+      "learning_rate": 1.957739348193859e-06,
+      "loss": 46.0,
+      "step": 181
+    },
+    {
+      "epoch": 0.12284846439419507,
+      "grad_norm": 0.0001999161613639444,
+      "learning_rate": 1.7585687386256944e-06,
+      "loss": 46.0,
+      "step": 182
+    },
+    {
+      "epoch": 0.12352345595680055,
+      "grad_norm": 0.00022155142505653203,
+      "learning_rate": 1.5698529545260744e-06,
+      "loss": 46.0,
+      "step": 183
+    },
+    {
+      "epoch": 0.12419844751940601,
+      "grad_norm": 0.0003148863324895501,
+      "learning_rate": 1.3916435889060575e-06,
+      "loss": 46.0,
+      "step": 184
+    },
+    {
+      "epoch": 0.12487343908201147,
+      "grad_norm": 0.0004106830747332424,
+      "learning_rate": 1.2239893624267852e-06,
+      "loss": 46.0,
+      "step": 185
+    },
+    {
+      "epoch": 0.12554843064461693,
+      "grad_norm": 0.00034603834501467645,
+      "learning_rate": 1.0669361100797704e-06,
+      "loss": 46.0,
+      "step": 186
+    },
+    {
+      "epoch": 0.1262234222072224,
+      "grad_norm": 0.0004213521024212241,
+      "learning_rate": 9.205267686560293e-07,
+      "loss": 46.0,
+      "step": 187
+    },
+    {
+      "epoch": 0.12689841376982788,
+      "grad_norm": 0.0002621794701553881,
+      "learning_rate": 7.848013650076258e-07,
+      "loss": 46.0,
+      "step": 188
+    },
+    {
+      "epoch": 0.12757340533243333,
+      "grad_norm": 0.000253551232162863,
+      "learning_rate": 6.597970051047053e-07,
+      "loss": 46.0,
+      "step": 189
+    },
+    {
+      "epoch": 0.1282483968950388,
+      "grad_norm": 0.00030335516203194857,
+      "learning_rate": 5.455478638911071e-07,
+      "loss": 46.0,
+      "step": 190
+    },
+    {
+      "epoch": 0.12892338845764428,
+      "grad_norm": 0.00027419746038503945,
+      "learning_rate": 4.420851759412603e-07,
+      "loss": 46.0,
+      "step": 191
+    },
+    {
+      "epoch": 0.12959838002024976,
+      "grad_norm": 0.0003773905918933451,
+      "learning_rate": 3.4943722692099224e-07,
+      "loss": 46.0,
+      "step": 192
+    },
+    {
+      "epoch": 0.1302733715828552,
+      "grad_norm": 0.00046274266787804663,
+      "learning_rate": 2.676293458544743e-07,
+      "loss": 46.0,
+      "step": 193
+    },
+    {
+      "epoch": 0.13094836314546068,
+      "grad_norm": 0.00036215598811395466,
+      "learning_rate": 1.9668389819954338e-07,
+      "loss": 46.0,
+      "step": 194
+    },
+    {
+      "epoch": 0.13162335470806616,
+      "grad_norm": 0.00038238154957070947,
+      "learning_rate": 1.3662027973320614e-07,
+      "loss": 46.0,
+      "step": 195
+    },
+    {
+      "epoch": 0.1322983462706716,
+      "grad_norm": 0.0004448361578397453,
+      "learning_rate": 8.745491124901861e-08,
+      "loss": 46.0,
+      "step": 196
+    },
+    {
+      "epoch": 0.13297333783327708,
+      "grad_norm": 0.0004605569993145764,
+      "learning_rate": 4.920123406781052e-08,
+      "loss": 46.0,
+      "step": 197
+    },
+    {
+      "epoch": 0.13364832939588256,
+      "grad_norm": 0.0005355703178793192,
+      "learning_rate": 2.1869706362958044e-08,
+      "loss": 46.0,
+      "step": 198
+    },
+    {
+      "epoch": 0.134323320958488,
+      "grad_norm": 0.00035198143450543284,
+      "learning_rate": 5.467800301239834e-09,
+      "loss": 46.0,
+      "step": 199
+    },
+    {
+      "epoch": 0.13499831252109348,
+      "grad_norm": 0.0006307726143859327,
+      "learning_rate": 0.0,
+      "loss": 46.0,
+      "step": 200
+    },
+    {
+      "epoch": 0.13499831252109348,
+      "eval_loss": 11.5,
+      "eval_runtime": 11.0674,
+      "eval_samples_per_second": 225.436,
+      "eval_steps_per_second": 56.382,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 32158462771200.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null