oldiday commited on
Commit
f324c1a
·
verified ·
1 Parent(s): 5f24a9d

Training in progress, step 550, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1169929913bb5c932000d9767b768f52232cc25b87794b8470c01fa18b6febe3
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6af08d76a9fceca32bff48f799195d010dca79f33f8c81d319400ad2402ccb7
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec6eb1e6db84c334570c122af91c407546f0a6028677bbf77e984e91c6f2d2a0
3
  size 85723732
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ea6c15f3b7a29a14d859e550c46d3a37ab660f7fac91aa2b67f82f5dfb3e735
3
  size 85723732
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46bad0fc23adfd09dbc19f3f7be3435ddfe15414ebed26616a6aecc380fbdf3f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24234b9f02099e93a544d84dde2a4254f3926f51e08f716f06999d6cb929bbeb
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b782a524e5b57eb023365370accae538ac5e68454bafa53a6dd8b2c51cead56
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bb9c0d62d6b3cf0976c16f73e9bd814b298ebffa1786831bc2a68d8e48809b9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.8650081157684326,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-500",
4
- "epoch": 0.13652809065465218,
5
  "eval_steps": 50,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -445,6 +445,49 @@
445
  "eval_samples_per_second": 25.164,
446
  "eval_steps_per_second": 6.291,
447
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448
  }
449
  ],
450
  "logging_steps": 10,
@@ -473,7 +516,7 @@
473
  "attributes": {}
474
  }
475
  },
476
- "total_flos": 3.51564749340672e+17,
477
  "train_batch_size": 8,
478
  "trial_name": null,
479
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.862091302871704,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-550",
4
+ "epoch": 0.15018089972011742,
5
  "eval_steps": 50,
6
+ "global_step": 550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
445
  "eval_samples_per_second": 25.164,
446
  "eval_steps_per_second": 6.291,
447
  "step": 500
448
+ },
449
+ {
450
+ "epoch": 0.13925865246774524,
451
+ "grad_norm": 10.133707046508789,
452
+ "learning_rate": 1.1264792494342857e-05,
453
+ "loss": 7.0925,
454
+ "step": 510
455
+ },
456
+ {
457
+ "epoch": 0.14198921428083827,
458
+ "grad_norm": 10.4485445022583,
459
+ "learning_rate": 8.936522714508678e-06,
460
+ "loss": 7.8088,
461
+ "step": 520
462
+ },
463
+ {
464
+ "epoch": 0.14471977609393133,
465
+ "grad_norm": 13.265645980834961,
466
+ "learning_rate": 6.866382254766157e-06,
467
+ "loss": 7.6878,
468
+ "step": 530
469
+ },
470
+ {
471
+ "epoch": 0.14745033790702436,
472
+ "grad_norm": 12.639242172241211,
473
+ "learning_rate": 5.060239153161872e-06,
474
+ "loss": 7.4659,
475
+ "step": 540
476
+ },
477
+ {
478
+ "epoch": 0.15018089972011742,
479
+ "grad_norm": 26.739566802978516,
480
+ "learning_rate": 3.5232131185484076e-06,
481
+ "loss": 7.7885,
482
+ "step": 550
483
+ },
484
+ {
485
+ "epoch": 0.15018089972011742,
486
+ "eval_loss": 1.862091302871704,
487
+ "eval_runtime": 245.1426,
488
+ "eval_samples_per_second": 25.161,
489
+ "eval_steps_per_second": 6.29,
490
+ "step": 550
491
  }
492
  ],
493
  "logging_steps": 10,
 
516
  "attributes": {}
517
  }
518
  },
519
+ "total_flos": 3.867212242747392e+17,
520
  "train_batch_size": 8,
521
  "trial_name": null,
522
  "trial_params": null