bane5631 commited on
Commit
ad8e488
·
verified ·
1 Parent(s): d62d1cd

Training in progress, step 314, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3aaf1ffc8743b661c24ffddf27f33c30d0bf873fa43be1a656ab249520615e8d
3
  size 295488936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0f39368f135c9559d4295ae28ed2bbd25c473808fb5a2e6393a51fd4c627c8c
3
  size 295488936
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13906c1d2d1874c052a2dffaa33cc5553f8bc05ff44ccdbf8cbb8eda90e2875f
3
  size 150487412
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd509530684de3d7fb8f946689241b94c7faad18cf8c5938ef5f4cdd2ab85016
3
  size 150487412
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc8c82c87fe80ee509f889329bd2c1f33412a2618800882fa3ef31712bbae899
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90a0fe62b5ce4fa26deccbcf8596f226f5ec4cccd3f13880f8be34b81ff0861e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e56605411c75d4f5d10e2119513fb3cbc4f1b7f0d9a95ce7a12185ef30e8d070
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5304cf14ef62a028101f5aad80a675689064bce29890c1269eb16b7d46d866d9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.6946364641189575,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 2.8708133971291865,
5
  "eval_steps": 50,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2163,6 +2163,104 @@
2163
  "eval_samples_per_second": 30.901,
2164
  "eval_steps_per_second": 7.856,
2165
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2166
  }
2167
  ],
2168
  "logging_steps": 1,
@@ -2186,12 +2284,12 @@
2186
  "should_evaluate": false,
2187
  "should_log": false,
2188
  "should_save": true,
2189
- "should_training_stop": false
2190
  },
2191
  "attributes": {}
2192
  }
2193
  },
2194
- "total_flos": 8.361646850310144e+16,
2195
  "train_batch_size": 8,
2196
  "trial_name": null,
2197
  "trial_params": null
 
1
  {
2
  "best_metric": 1.6946364641189575,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 3.0047846889952154,
5
  "eval_steps": 50,
6
+ "global_step": 314,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2163
  "eval_samples_per_second": 30.901,
2164
  "eval_steps_per_second": 7.856,
2165
  "step": 300
2166
+ },
2167
+ {
2168
+ "epoch": 2.8803827751196174,
2169
+ "grad_norm": 0.6223769783973694,
2170
+ "learning_rate": 4.505323691412711e-07,
2171
+ "loss": 1.3203,
2172
+ "step": 301
2173
+ },
2174
+ {
2175
+ "epoch": 2.889952153110048,
2176
+ "grad_norm": 0.6258677244186401,
2177
+ "learning_rate": 3.839710131477492e-07,
2178
+ "loss": 1.1759,
2179
+ "step": 302
2180
+ },
2181
+ {
2182
+ "epoch": 2.8995215311004783,
2183
+ "grad_norm": 0.6979731321334839,
2184
+ "learning_rate": 3.2270837404318464e-07,
2185
+ "loss": 1.541,
2186
+ "step": 303
2187
+ },
2188
+ {
2189
+ "epoch": 2.909090909090909,
2190
+ "grad_norm": 0.7176746129989624,
2191
+ "learning_rate": 2.667509943378721e-07,
2192
+ "loss": 1.3922,
2193
+ "step": 304
2194
+ },
2195
+ {
2196
+ "epoch": 2.9186602870813396,
2197
+ "grad_norm": 0.7628390789031982,
2198
+ "learning_rate": 2.161048499698115e-07,
2199
+ "loss": 1.3605,
2200
+ "step": 305
2201
+ },
2202
+ {
2203
+ "epoch": 2.92822966507177,
2204
+ "grad_norm": 0.785723865032196,
2205
+ "learning_rate": 1.7077534966650766e-07,
2206
+ "loss": 1.3788,
2207
+ "step": 306
2208
+ },
2209
+ {
2210
+ "epoch": 2.937799043062201,
2211
+ "grad_norm": 0.8244166374206543,
2212
+ "learning_rate": 1.3076733436734322e-07,
2213
+ "loss": 1.323,
2214
+ "step": 307
2215
+ },
2216
+ {
2217
+ "epoch": 2.9473684210526314,
2218
+ "grad_norm": 0.9065255522727966,
2219
+ "learning_rate": 9.60850767065924e-08,
2220
+ "loss": 1.2804,
2221
+ "step": 308
2222
+ },
2223
+ {
2224
+ "epoch": 2.9569377990430623,
2225
+ "grad_norm": 1.011901617050171,
2226
+ "learning_rate": 6.673228055715241e-08,
2227
+ "loss": 1.2738,
2228
+ "step": 309
2229
+ },
2230
+ {
2231
+ "epoch": 2.9665071770334928,
2232
+ "grad_norm": 1.1093995571136475,
2233
+ "learning_rate": 4.2712080634949024e-08,
2234
+ "loss": 1.1685,
2235
+ "step": 310
2236
+ },
2237
+ {
2238
+ "epoch": 2.9760765550239237,
2239
+ "grad_norm": 1.3170857429504395,
2240
+ "learning_rate": 2.4027042164198598e-08,
2241
+ "loss": 0.9393,
2242
+ "step": 311
2243
+ },
2244
+ {
2245
+ "epoch": 2.985645933014354,
2246
+ "grad_norm": 1.4136571884155273,
2247
+ "learning_rate": 1.0679160603449534e-08,
2248
+ "loss": 0.8915,
2249
+ "step": 312
2250
+ },
2251
+ {
2252
+ "epoch": 2.9952153110047846,
2253
+ "grad_norm": 2.444126844406128,
2254
+ "learning_rate": 2.669861432463838e-09,
2255
+ "loss": 0.636,
2256
+ "step": 313
2257
+ },
2258
+ {
2259
+ "epoch": 3.0047846889952154,
2260
+ "grad_norm": 0.7383264303207397,
2261
+ "learning_rate": 0.0,
2262
+ "loss": 1.9637,
2263
+ "step": 314
2264
  }
2265
  ],
2266
  "logging_steps": 1,
 
2284
  "should_evaluate": false,
2285
  "should_log": false,
2286
  "should_save": true,
2287
+ "should_training_stop": true
2288
  },
2289
  "attributes": {}
2290
  }
2291
  },
2292
+ "total_flos": 8.763060328071168e+16,
2293
  "train_batch_size": 8,
2294
  "trial_name": null,
2295
  "trial_params": null