oldiday commited on
Commit
26a8460
·
verified ·
1 Parent(s): f0e3fcd

Training in progress, step 350, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:787f2551b6ac8583af6eea1cb71af383da2ce2104f41d2230e2a07da90e5d21c
3
  size 72396376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:000da61d1797457b2c95bcb27a7b2b110d3afb6a606a87ecd4e2f320b4fad9d7
3
  size 72396376
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f7a6512e6cf7cc15a8a708ef1e3cc6ec2b4bf938b14afef26cc69ace80971c5
3
  size 37134740
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:105ac16b5d228dd7aa94d2fb6725026dc57df3bf84599096844a4c1dedfd4e77
3
  size 37134740
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19e2e3b1a3fb431d188c6e72e3cee4c7684c54ae7e071aa6381813d0d35eaabc
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:205bd1398626770ca707f06808286a5c34cea314101953ce4ecf1df4984e8133
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8d9346c4fcc90fb1ec8546736583b76a4fae6bc25cb93181337c187d15da94a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77c9b643e87499abdfb160399a4a6e4965274897c037cf91e4cd5d5d65b2b404
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.2161424160003662,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-300",
4
- "epoch": 0.1958863858961802,
5
  "eval_steps": 50,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -273,6 +273,49 @@
273
  "eval_samples_per_second": 60.921,
274
  "eval_steps_per_second": 15.236,
275
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  }
277
  ],
278
  "logging_steps": 10,
@@ -301,7 +344,7 @@
301
  "attributes": {}
302
  }
303
  },
304
- "total_flos": 4.875582621037363e+16,
305
  "train_batch_size": 8,
306
  "trial_name": null,
307
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.2151703834533691,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-350",
4
+ "epoch": 0.22853411687887693,
5
  "eval_steps": 50,
6
+ "global_step": 350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
273
  "eval_samples_per_second": 60.921,
274
  "eval_steps_per_second": 15.236,
275
  "step": 300
276
+ },
277
+ {
278
+ "epoch": 0.20241593209271955,
279
+ "grad_norm": 0.058099668473005295,
280
+ "learning_rate": 9.733794785622253e-05,
281
+ "loss": 1.1525,
282
+ "step": 310
283
+ },
284
+ {
285
+ "epoch": 0.20894547828925888,
286
+ "grad_norm": 0.09674125164747238,
287
+ "learning_rate": 9.202138944469168e-05,
288
+ "loss": 1.2296,
289
+ "step": 320
290
+ },
291
+ {
292
+ "epoch": 0.21547502448579825,
293
+ "grad_norm": 0.15291635692119598,
294
+ "learning_rate": 8.672744727162781e-05,
295
+ "loss": 1.2739,
296
+ "step": 330
297
+ },
298
+ {
299
+ "epoch": 0.2220045706823376,
300
+ "grad_norm": 0.2810160219669342,
301
+ "learning_rate": 8.147112759128859e-05,
302
+ "loss": 1.3062,
303
+ "step": 340
304
+ },
305
+ {
306
+ "epoch": 0.22853411687887693,
307
+ "grad_norm": 0.978479266166687,
308
+ "learning_rate": 7.626733001288851e-05,
309
+ "loss": 1.3263,
310
+ "step": 350
311
+ },
312
+ {
313
+ "epoch": 0.22853411687887693,
314
+ "eval_loss": 1.2151703834533691,
315
+ "eval_runtime": 41.9719,
316
+ "eval_samples_per_second": 61.446,
317
+ "eval_steps_per_second": 15.367,
318
+ "step": 350
319
  }
320
  ],
321
  "logging_steps": 10,
 
344
  "attributes": {}
345
  }
346
  },
347
+ "total_flos": 5.692182665940173e+16,
348
  "train_batch_size": 8,
349
  "trial_name": null,
350
  "trial_params": null