oldiday commited on
Commit
dab353b
·
verified ·
1 Parent(s): 313ff78

Training in progress, step 550, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7931c8629b10bcb3c46f312d6c443a54a2a48235580af5d7456c2ae18bf0ca2
3
  size 72396376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddc849f67df44c0f94d94e50d3138d44e678691377fbca6c6a6eb5a5ada8845e
3
  size 72396376
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98c509aa7ad8a62165ac20ffbdd603d04e7f0f343031939c9db41d61e938203d
3
  size 37134740
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58ea048ccc35d066e768949acd5b2b5723823827ea6db1d1ee2200d23cdf55d5
3
  size 37134740
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:155b07930e359433d6f58b15ccf7fec7f70457bb2abf5bdbdb44970b5f4ce523
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b61b7f19e5d89fcee16059d0b521fd7346a03cdf1b23e4aaf0a3720db2476c1
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b782a524e5b57eb023365370accae538ac5e68454bafa53a6dd8b2c51cead56
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bb9c0d62d6b3cf0976c16f73e9bd814b298ebffa1786831bc2a68d8e48809b9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.2039023637771606,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-500",
4
- "epoch": 0.32647730982696704,
5
  "eval_steps": 50,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -445,6 +445,49 @@
445
  "eval_samples_per_second": 60.529,
446
  "eval_steps_per_second": 15.138,
447
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448
  }
449
  ],
450
  "logging_steps": 10,
@@ -473,7 +516,7 @@
473
  "attributes": {}
474
  }
475
  },
476
- "total_flos": 8.133976917855437e+16,
477
  "train_batch_size": 8,
478
  "trial_name": null,
479
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.2006784677505493,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-550",
4
+ "epoch": 0.35912504080966373,
5
  "eval_steps": 50,
6
+ "global_step": 550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
445
  "eval_samples_per_second": 60.529,
446
  "eval_steps_per_second": 15.138,
447
  "step": 500
448
+ },
449
+ {
450
+ "epoch": 0.3330068560235064,
451
+ "grad_norm": 0.05650899186730385,
452
+ "learning_rate": 1.1264792494342857e-05,
453
+ "loss": 1.1749,
454
+ "step": 510
455
+ },
456
+ {
457
+ "epoch": 0.3395364022200457,
458
+ "grad_norm": 0.0797816812992096,
459
+ "learning_rate": 8.936522714508678e-06,
460
+ "loss": 1.2085,
461
+ "step": 520
462
+ },
463
+ {
464
+ "epoch": 0.34606594841658506,
465
+ "grad_norm": 0.14462202787399292,
466
+ "learning_rate": 6.866382254766157e-06,
467
+ "loss": 1.2441,
468
+ "step": 530
469
+ },
470
+ {
471
+ "epoch": 0.3525954946131244,
472
+ "grad_norm": 0.30583634972572327,
473
+ "learning_rate": 5.060239153161872e-06,
474
+ "loss": 1.2586,
475
+ "step": 540
476
+ },
477
+ {
478
+ "epoch": 0.35912504080966373,
479
+ "grad_norm": 0.7339666485786438,
480
+ "learning_rate": 3.5232131185484076e-06,
481
+ "loss": 1.1969,
482
+ "step": 550
483
+ },
484
+ {
485
+ "epoch": 0.35912504080966373,
486
+ "eval_loss": 1.2006784677505493,
487
+ "eval_runtime": 41.9627,
488
+ "eval_samples_per_second": 61.459,
489
+ "eval_steps_per_second": 15.371,
490
+ "step": 550
491
  }
492
  ],
493
  "logging_steps": 10,
 
516
  "attributes": {}
517
  }
518
  },
519
+ "total_flos": 8.942571079965082e+16,
520
  "train_batch_size": 8,
521
  "trial_name": null,
522
  "trial_params": null