lesso18 commited on
Commit
4b1f8e2
·
verified ·
1 Parent(s): 174285e

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34bd11160aec9fe2bd858aad8d63001600dd28dbfe0f9393afbc194a4bd2fe33
3
  size 1006723888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:825ecd2a9b415c9b1862546edd0f3945a56a381c68920f130700de62462f1f2e
3
  size 1006723888
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0814d4e09fd596d42c4126e7f9af7a0d9627a886bb5ab4e2a995a620103f3614
3
- size 511971028
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:064ecfb5d89792208a73611c0ec367e4fd173f534715a3b65af1db393e805133
3
+ size 511971668
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a84bdfc985f9f71e6bcb152a1eb9fd0ac393101acf6977b32e80460062d0456
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:130b330e493d984dcf2540998fdf543192159617f46ed883e63a7374d38984b7
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5245962b37abe5debb29d3c0771306ab720257c32ca897672f6c78751ec6642f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e3407292f65425c0593f095883f5f3c050baccb28fc15b3c1ad88090d6c23ef
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.5013260841369629,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-250",
4
- "epoch": 0.051991265467401473,
5
  "eval_steps": 50,
6
- "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -230,6 +230,49 @@
230
  "eval_samples_per_second": 6.282,
231
  "eval_steps_per_second": 1.574,
232
  "step": 250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  }
234
  ],
235
  "logging_steps": 10,
@@ -258,7 +301,7 @@
258
  "attributes": {}
259
  }
260
  },
261
- "total_flos": 3.360440978296013e+16,
262
  "train_batch_size": 1,
263
  "trial_name": null,
264
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.4650544822216034,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-300",
4
+ "epoch": 0.06238951856088177,
5
  "eval_steps": 50,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
230
  "eval_samples_per_second": 6.282,
231
  "eval_steps_per_second": 1.574,
232
  "step": 250
233
+ },
234
+ {
235
+ "epoch": 0.054070916086097535,
236
+ "grad_norm": 1.2806155681610107,
237
+ "learning_rate": 0.00012039360249617425,
238
+ "loss": 0.4848,
239
+ "step": 260
240
+ },
241
+ {
242
+ "epoch": 0.0561505667047936,
243
+ "grad_norm": 0.8744197487831116,
244
+ "learning_rate": 0.00011280404514057264,
245
+ "loss": 0.4888,
246
+ "step": 270
247
+ },
248
+ {
249
+ "epoch": 0.05823021732348965,
250
+ "grad_norm": 1.0379194021224976,
251
+ "learning_rate": 0.00010519595485942743,
252
+ "loss": 0.4923,
253
+ "step": 280
254
+ },
255
+ {
256
+ "epoch": 0.060309867942185715,
257
+ "grad_norm": 1.0062249898910522,
258
+ "learning_rate": 9.76063975038258e-05,
259
+ "loss": 0.4953,
260
+ "step": 290
261
+ },
262
+ {
263
+ "epoch": 0.06238951856088177,
264
+ "grad_norm": 1.2005985975265503,
265
+ "learning_rate": 9.00723486343046e-05,
266
+ "loss": 0.4437,
267
+ "step": 300
268
+ },
269
+ {
270
+ "epoch": 0.06238951856088177,
271
+ "eval_loss": 0.4650544822216034,
272
+ "eval_runtime": 80.7039,
273
+ "eval_samples_per_second": 6.282,
274
+ "eval_steps_per_second": 1.574,
275
+ "step": 300
276
  }
277
  ],
278
  "logging_steps": 10,
 
301
  "attributes": {}
302
  }
303
  },
304
+ "total_flos": 4.027195140656333e+16,
305
  "train_batch_size": 1,
306
  "trial_name": null,
307
  "trial_params": null