Training in progress, step 100, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1579384
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16785a61aa31cea9df62cbd18f8208d660df29185f09b9dd0e9aca0d2438e96a
|
3 |
size 1579384
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 857274
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9518499bb798c1ce79529bad3ff555316b6ee700915fcec92bf1504941a05032
|
3 |
size 857274
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cace2bb20bc8ba875286724acbfadddec3e5175c4ce467dea9a6adf2fcb4cb03
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2036ec7b8a4c0dbbd07ada2b2af7c3be05d304eb60a4492cb7e057daf83ea234
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 20,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -607,6 +607,154 @@
|
|
607 |
"eval_samples_per_second": 74.747,
|
608 |
"eval_steps_per_second": 74.747,
|
609 |
"step": 80
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
610 |
}
|
611 |
],
|
612 |
"logging_steps": 1,
|
@@ -626,7 +774,7 @@
|
|
626 |
"attributes": {}
|
627 |
}
|
628 |
},
|
629 |
-
"total_flos":
|
630 |
"train_batch_size": 1,
|
631 |
"trial_name": null,
|
632 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.06603111716396352,
|
5 |
"eval_steps": 20,
|
6 |
+
"global_step": 100,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
607 |
"eval_samples_per_second": 74.747,
|
608 |
"eval_steps_per_second": 74.747,
|
609 |
"step": 80
|
610 |
+
},
|
611 |
+
{
|
612 |
+
"epoch": 0.05348520490281045,
|
613 |
+
"grad_norm": 259394.6875,
|
614 |
+
"learning_rate": 0.00025777547044259435,
|
615 |
+
"loss": 200.2073,
|
616 |
+
"step": 81
|
617 |
+
},
|
618 |
+
{
|
619 |
+
"epoch": 0.05414551607445008,
|
620 |
+
"grad_norm": 1378326.625,
|
621 |
+
"learning_rate": 0.0002566389691674106,
|
622 |
+
"loss": 100.5475,
|
623 |
+
"step": 82
|
624 |
+
},
|
625 |
+
{
|
626 |
+
"epoch": 0.05480582724608972,
|
627 |
+
"grad_norm": 63462.6875,
|
628 |
+
"learning_rate": 0.00025548995333638197,
|
629 |
+
"loss": 189.407,
|
630 |
+
"step": 83
|
631 |
+
},
|
632 |
+
{
|
633 |
+
"epoch": 0.055466138417729356,
|
634 |
+
"grad_norm": 149989.21875,
|
635 |
+
"learning_rate": 0.00025432855779161076,
|
636 |
+
"loss": 655.0445,
|
637 |
+
"step": 84
|
638 |
+
},
|
639 |
+
{
|
640 |
+
"epoch": 0.05612644958936899,
|
641 |
+
"grad_norm": 161908.9375,
|
642 |
+
"learning_rate": 0.00025315491882801347,
|
643 |
+
"loss": 542.2335,
|
644 |
+
"step": 85
|
645 |
+
},
|
646 |
+
{
|
647 |
+
"epoch": 0.05678676076100862,
|
648 |
+
"grad_norm": 140391.09375,
|
649 |
+
"learning_rate": 0.00025196917417732615,
|
650 |
+
"loss": 178.0071,
|
651 |
+
"step": 86
|
652 |
+
},
|
653 |
+
{
|
654 |
+
"epoch": 0.05744707193264826,
|
655 |
+
"grad_norm": 45774.61328125,
|
656 |
+
"learning_rate": 0.0002507714629919409,
|
657 |
+
"loss": 145.9398,
|
658 |
+
"step": 87
|
659 |
+
},
|
660 |
+
{
|
661 |
+
"epoch": 0.058107383104287896,
|
662 |
+
"grad_norm": 74355.359375,
|
663 |
+
"learning_rate": 0.0002495619258285757,
|
664 |
+
"loss": 162.5158,
|
665 |
+
"step": 88
|
666 |
+
},
|
667 |
+
{
|
668 |
+
"epoch": 0.05876769427592753,
|
669 |
+
"grad_norm": 112329.7265625,
|
670 |
+
"learning_rate": 0.0002483407046317794,
|
671 |
+
"loss": 223.498,
|
672 |
+
"step": 89
|
673 |
+
},
|
674 |
+
{
|
675 |
+
"epoch": 0.05942800544756716,
|
676 |
+
"grad_norm": 488449.875,
|
677 |
+
"learning_rate": 0.00024710794271727413,
|
678 |
+
"loss": 223.1561,
|
679 |
+
"step": 90
|
680 |
+
},
|
681 |
+
{
|
682 |
+
"epoch": 0.0600883166192068,
|
683 |
+
"grad_norm": 146916.296875,
|
684 |
+
"learning_rate": 0.0002458637847551364,
|
685 |
+
"loss": 252.3947,
|
686 |
+
"step": 91
|
687 |
+
},
|
688 |
+
{
|
689 |
+
"epoch": 0.060748627790846436,
|
690 |
+
"grad_norm": 115853.0703125,
|
691 |
+
"learning_rate": 0.00024460837675281926,
|
692 |
+
"loss": 265.611,
|
693 |
+
"step": 92
|
694 |
+
},
|
695 |
+
{
|
696 |
+
"epoch": 0.06140893896248607,
|
697 |
+
"grad_norm": 95760.921875,
|
698 |
+
"learning_rate": 0.00024334186603801807,
|
699 |
+
"loss": 195.9439,
|
700 |
+
"step": 93
|
701 |
+
},
|
702 |
+
{
|
703 |
+
"epoch": 0.06206925013412571,
|
704 |
+
"grad_norm": 58220.4609375,
|
705 |
+
"learning_rate": 0.00024206440124138062,
|
706 |
+
"loss": 173.6973,
|
707 |
+
"step": 94
|
708 |
+
},
|
709 |
+
{
|
710 |
+
"epoch": 0.06272956130576535,
|
711 |
+
"grad_norm": 44573.25390625,
|
712 |
+
"learning_rate": 0.0002407761322790648,
|
713 |
+
"loss": 130.0355,
|
714 |
+
"step": 95
|
715 |
+
},
|
716 |
+
{
|
717 |
+
"epoch": 0.06338987247740498,
|
718 |
+
"grad_norm": 48302.27734375,
|
719 |
+
"learning_rate": 0.00023947721033514512,
|
720 |
+
"loss": 110.2012,
|
721 |
+
"step": 96
|
722 |
+
},
|
723 |
+
{
|
724 |
+
"epoch": 0.0640501836490446,
|
725 |
+
"grad_norm": 18446.73046875,
|
726 |
+
"learning_rate": 0.00023816778784387094,
|
727 |
+
"loss": 118.2505,
|
728 |
+
"step": 97
|
729 |
+
},
|
730 |
+
{
|
731 |
+
"epoch": 0.06471049482068425,
|
732 |
+
"grad_norm": 35311.09375,
|
733 |
+
"learning_rate": 0.0002368480184717773,
|
734 |
+
"loss": 133.5809,
|
735 |
+
"step": 98
|
736 |
+
},
|
737 |
+
{
|
738 |
+
"epoch": 0.06537080599232388,
|
739 |
+
"grad_norm": 38145.79296875,
|
740 |
+
"learning_rate": 0.00023551805709965147,
|
741 |
+
"loss": 129.8271,
|
742 |
+
"step": 99
|
743 |
+
},
|
744 |
+
{
|
745 |
+
"epoch": 0.06603111716396352,
|
746 |
+
"grad_norm": 32865.98046875,
|
747 |
+
"learning_rate": 0.00023417805980435736,
|
748 |
+
"loss": 116.0362,
|
749 |
+
"step": 100
|
750 |
+
},
|
751 |
+
{
|
752 |
+
"epoch": 0.06603111716396352,
|
753 |
+
"eval_loss": 9.961955070495605,
|
754 |
+
"eval_runtime": 6.5733,
|
755 |
+
"eval_samples_per_second": 75.305,
|
756 |
+
"eval_steps_per_second": 75.305,
|
757 |
+
"step": 100
|
758 |
}
|
759 |
],
|
760 |
"logging_steps": 1,
|
|
|
774 |
"attributes": {}
|
775 |
}
|
776 |
},
|
777 |
+
"total_flos": 65231696953344.0,
|
778 |
"train_batch_size": 1,
|
779 |
"trial_name": null,
|
780 |
"trial_params": null
|