Training in progress, step 120, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1579384
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:393273cdca286ccb40e14205ea36723e7c5c1240080ed35a78dbeb665d070ee9
|
3 |
size 1579384
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 857274
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d54a2e2580a8617019d978feb2d6817db3c1af9507f0f49fc2ef229fd5ccdce
|
3 |
size 857274
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eed95f9b60d673089274be5847cbb94c802446b0a76989fa02d0940d21190b7e
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a705acd53999263a453e7147fb79a4ea1e0e69c6cfafd3b3b9170876c31d6d7f
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 20,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -755,6 +755,154 @@
|
|
755 |
"eval_samples_per_second": 75.305,
|
756 |
"eval_steps_per_second": 75.305,
|
757 |
"step": 100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
758 |
}
|
759 |
],
|
760 |
"logging_steps": 1,
|
@@ -774,7 +922,7 @@
|
|
774 |
"attributes": {}
|
775 |
}
|
776 |
},
|
777 |
-
"total_flos":
|
778 |
"train_batch_size": 1,
|
779 |
"trial_name": null,
|
780 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.07923734059675622,
|
5 |
"eval_steps": 20,
|
6 |
+
"global_step": 120,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
755 |
"eval_samples_per_second": 75.305,
|
756 |
"eval_steps_per_second": 75.305,
|
757 |
"step": 100
|
758 |
+
},
|
759 |
+
{
|
760 |
+
"epoch": 0.06669142833560315,
|
761 |
+
"grad_norm": 18817.16796875,
|
762 |
+
"learning_rate": 0.00023282818384051866,
|
763 |
+
"loss": 113.1262,
|
764 |
+
"step": 101
|
765 |
+
},
|
766 |
+
{
|
767 |
+
"epoch": 0.06735173950724278,
|
768 |
+
"grad_norm": 24054.869140625,
|
769 |
+
"learning_rate": 0.00023146858762206489,
|
770 |
+
"loss": 108.1982,
|
771 |
+
"step": 102
|
772 |
+
},
|
773 |
+
{
|
774 |
+
"epoch": 0.06801205067888243,
|
775 |
+
"grad_norm": 34655.8671875,
|
776 |
+
"learning_rate": 0.00023009943070364044,
|
777 |
+
"loss": 108.4203,
|
778 |
+
"step": 103
|
779 |
+
},
|
780 |
+
{
|
781 |
+
"epoch": 0.06867236185052206,
|
782 |
+
"grad_norm": 29425.787109375,
|
783 |
+
"learning_rate": 0.0002287208737618801,
|
784 |
+
"loss": 107.4073,
|
785 |
+
"step": 104
|
786 |
+
},
|
787 |
+
{
|
788 |
+
"epoch": 0.0693326730221617,
|
789 |
+
"grad_norm": 13952.1171875,
|
790 |
+
"learning_rate": 0.00022733307857655325,
|
791 |
+
"loss": 105.1398,
|
792 |
+
"step": 105
|
793 |
+
},
|
794 |
+
{
|
795 |
+
"epoch": 0.06999298419380133,
|
796 |
+
"grad_norm": 20227.431640625,
|
797 |
+
"learning_rate": 0.00022593620801157808,
|
798 |
+
"loss": 115.2134,
|
799 |
+
"step": 106
|
800 |
+
},
|
801 |
+
{
|
802 |
+
"epoch": 0.07065329536544096,
|
803 |
+
"grad_norm": 19999.79296875,
|
804 |
+
"learning_rate": 0.00022453042599590882,
|
805 |
+
"loss": 113.6159,
|
806 |
+
"step": 107
|
807 |
+
},
|
808 |
+
{
|
809 |
+
"epoch": 0.0713136065370806,
|
810 |
+
"grad_norm": 18226.33203125,
|
811 |
+
"learning_rate": 0.00022311589750429787,
|
812 |
+
"loss": 110.2182,
|
813 |
+
"step": 108
|
814 |
+
},
|
815 |
+
{
|
816 |
+
"epoch": 0.07197391770872023,
|
817 |
+
"grad_norm": 15471.123046875,
|
818 |
+
"learning_rate": 0.00022169278853793545,
|
819 |
+
"loss": 98.862,
|
820 |
+
"step": 109
|
821 |
+
},
|
822 |
+
{
|
823 |
+
"epoch": 0.07263422888035986,
|
824 |
+
"grad_norm": 9518.90625,
|
825 |
+
"learning_rate": 0.00022026126610496852,
|
826 |
+
"loss": 100.519,
|
827 |
+
"step": 110
|
828 |
+
},
|
829 |
+
{
|
830 |
+
"epoch": 0.07329454005199951,
|
831 |
+
"grad_norm": 12838.0771484375,
|
832 |
+
"learning_rate": 0.0002188214982009016,
|
833 |
+
"loss": 99.1184,
|
834 |
+
"step": 111
|
835 |
+
},
|
836 |
+
{
|
837 |
+
"epoch": 0.07395485122363914,
|
838 |
+
"grad_norm": 13236.9697265625,
|
839 |
+
"learning_rate": 0.00021737365378888187,
|
840 |
+
"loss": 108.3643,
|
841 |
+
"step": 112
|
842 |
+
},
|
843 |
+
{
|
844 |
+
"epoch": 0.07461516239527878,
|
845 |
+
"grad_norm": 21540.712890625,
|
846 |
+
"learning_rate": 0.00021591790277987043,
|
847 |
+
"loss": 106.4385,
|
848 |
+
"step": 113
|
849 |
+
},
|
850 |
+
{
|
851 |
+
"epoch": 0.07527547356691841,
|
852 |
+
"grad_norm": 13282.7333984375,
|
853 |
+
"learning_rate": 0.00021445441601270276,
|
854 |
+
"loss": 111.6325,
|
855 |
+
"step": 114
|
856 |
+
},
|
857 |
+
{
|
858 |
+
"epoch": 0.07593578473855804,
|
859 |
+
"grad_norm": 32402.203125,
|
860 |
+
"learning_rate": 0.00021298336523403968,
|
861 |
+
"loss": 102.4856,
|
862 |
+
"step": 115
|
863 |
+
},
|
864 |
+
{
|
865 |
+
"epoch": 0.07659609591019768,
|
866 |
+
"grad_norm": 23308.939453125,
|
867 |
+
"learning_rate": 0.0002115049230782124,
|
868 |
+
"loss": 99.6906,
|
869 |
+
"step": 116
|
870 |
+
},
|
871 |
+
{
|
872 |
+
"epoch": 0.07725640708183731,
|
873 |
+
"grad_norm": 21524.953125,
|
874 |
+
"learning_rate": 0.00021001926304696296,
|
875 |
+
"loss": 90.451,
|
876 |
+
"step": 117
|
877 |
+
},
|
878 |
+
{
|
879 |
+
"epoch": 0.07791671825347696,
|
880 |
+
"grad_norm": 13045.5537109375,
|
881 |
+
"learning_rate": 0.00020852655948908316,
|
882 |
+
"loss": 93.52,
|
883 |
+
"step": 118
|
884 |
+
},
|
885 |
+
{
|
886 |
+
"epoch": 0.07857702942511659,
|
887 |
+
"grad_norm": 18377.09375,
|
888 |
+
"learning_rate": 0.0002070269875799538,
|
889 |
+
"loss": 85.6482,
|
890 |
+
"step": 119
|
891 |
+
},
|
892 |
+
{
|
893 |
+
"epoch": 0.07923734059675622,
|
894 |
+
"grad_norm": 12025.564453125,
|
895 |
+
"learning_rate": 0.00020552072330098716,
|
896 |
+
"loss": 89.6598,
|
897 |
+
"step": 120
|
898 |
+
},
|
899 |
+
{
|
900 |
+
"epoch": 0.07923734059675622,
|
901 |
+
"eval_loss": 12.788580894470215,
|
902 |
+
"eval_runtime": 6.5873,
|
903 |
+
"eval_samples_per_second": 75.145,
|
904 |
+
"eval_steps_per_second": 75.145,
|
905 |
+
"step": 120
|
906 |
}
|
907 |
],
|
908 |
"logging_steps": 1,
|
|
|
922 |
"attributes": {}
|
923 |
}
|
924 |
},
|
925 |
+
"total_flos": 81003622170624.0,
|
926 |
"train_batch_size": 1,
|
927 |
"trial_name": null,
|
928 |
"trial_params": null
|