Training in progress, step 130, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 319876032
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:869af5620d3d85d7c0db115351fa817c5904fa6f51f53e8d481d34177a3d9341
|
3 |
size 319876032
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 640009682
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:afd7f4c7619ded30407f68888930fdfa979bea1a64de2ebe37b0fdc904faa13e
|
3 |
size 640009682
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a670f2d851c0c4cc79716f31d1954ad248700ff958468f6d2721e422863a0fe
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b8d02756226521458daee3f69c94f8a0b4245ed6c8f1de64c08045d2547f98c
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -850,6 +850,97 @@
|
|
850 |
"learning_rate": 2.2611647606329732e-05,
|
851 |
"loss": 43.9194,
|
852 |
"step": 117
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
853 |
}
|
854 |
],
|
855 |
"logging_steps": 1,
|
@@ -869,7 +960,7 @@
|
|
869 |
"attributes": {}
|
870 |
}
|
871 |
},
|
872 |
-
"total_flos":
|
873 |
"train_batch_size": 2,
|
874 |
"trial_name": null,
|
875 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.17300894156789354,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 130,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
850 |
"learning_rate": 2.2611647606329732e-05,
|
851 |
"loss": 43.9194,
|
852 |
"step": 117
|
853 |
+
},
|
854 |
+
{
|
855 |
+
"epoch": 0.1570388854231649,
|
856 |
+
"grad_norm": 4.738860607147217,
|
857 |
+
"learning_rate": 2.1330077944190924e-05,
|
858 |
+
"loss": 44.4921,
|
859 |
+
"step": 118
|
860 |
+
},
|
861 |
+
{
|
862 |
+
"epoch": 0.15836972343522562,
|
863 |
+
"grad_norm": 5.531933307647705,
|
864 |
+
"learning_rate": 2.0079850005167007e-05,
|
865 |
+
"loss": 47.8288,
|
866 |
+
"step": 119
|
867 |
+
},
|
868 |
+
{
|
869 |
+
"epoch": 0.15970056144728634,
|
870 |
+
"grad_norm": 5.006840229034424,
|
871 |
+
"learning_rate": 1.8861693887167408e-05,
|
872 |
+
"loss": 47.1657,
|
873 |
+
"step": 120
|
874 |
+
},
|
875 |
+
{
|
876 |
+
"epoch": 0.16103139945934705,
|
877 |
+
"grad_norm": 4.700850963592529,
|
878 |
+
"learning_rate": 1.767632095906137e-05,
|
879 |
+
"loss": 45.0584,
|
880 |
+
"step": 121
|
881 |
+
},
|
882 |
+
{
|
883 |
+
"epoch": 0.16236223747140777,
|
884 |
+
"grad_norm": 5.489245414733887,
|
885 |
+
"learning_rate": 1.652442344525833e-05,
|
886 |
+
"loss": 45.8487,
|
887 |
+
"step": 122
|
888 |
+
},
|
889 |
+
{
|
890 |
+
"epoch": 0.1636930754834685,
|
891 |
+
"grad_norm": 4.946503162384033,
|
892 |
+
"learning_rate": 1.5406674021468438e-05,
|
893 |
+
"loss": 46.2189,
|
894 |
+
"step": 123
|
895 |
+
},
|
896 |
+
{
|
897 |
+
"epoch": 0.16502391349552922,
|
898 |
+
"grad_norm": 8.596658706665039,
|
899 |
+
"learning_rate": 1.4323725421878949e-05,
|
900 |
+
"loss": 44.8387,
|
901 |
+
"step": 124
|
902 |
+
},
|
903 |
+
{
|
904 |
+
"epoch": 0.16635475150758994,
|
905 |
+
"grad_norm": 4.896723747253418,
|
906 |
+
"learning_rate": 1.3276210057975772e-05,
|
907 |
+
"loss": 46.1841,
|
908 |
+
"step": 125
|
909 |
+
},
|
910 |
+
{
|
911 |
+
"epoch": 0.16768558951965065,
|
912 |
+
"grad_norm": 5.261892795562744,
|
913 |
+
"learning_rate": 1.2264739649232993e-05,
|
914 |
+
"loss": 44.6968,
|
915 |
+
"step": 126
|
916 |
+
},
|
917 |
+
{
|
918 |
+
"epoch": 0.16901642753171137,
|
919 |
+
"grad_norm": 5.7036027908325195,
|
920 |
+
"learning_rate": 1.1289904865885935e-05,
|
921 |
+
"loss": 43.3378,
|
922 |
+
"step": 127
|
923 |
+
},
|
924 |
+
{
|
925 |
+
"epoch": 0.17034726554377208,
|
926 |
+
"grad_norm": 4.333169460296631,
|
927 |
+
"learning_rate": 1.0352274983996303e-05,
|
928 |
+
"loss": 46.5715,
|
929 |
+
"step": 128
|
930 |
+
},
|
931 |
+
{
|
932 |
+
"epoch": 0.17167810355583282,
|
933 |
+
"grad_norm": 5.31616735458374,
|
934 |
+
"learning_rate": 9.452397553011157e-06,
|
935 |
+
"loss": 46.6349,
|
936 |
+
"step": 129
|
937 |
+
},
|
938 |
+
{
|
939 |
+
"epoch": 0.17300894156789354,
|
940 |
+
"grad_norm": 4.633855819702148,
|
941 |
+
"learning_rate": 8.590798076009264e-06,
|
942 |
+
"loss": 46.4312,
|
943 |
+
"step": 130
|
944 |
}
|
945 |
],
|
946 |
"logging_steps": 1,
|
|
|
960 |
"attributes": {}
|
961 |
}
|
962 |
},
|
963 |
+
"total_flos": 7.338166546386125e+17,
|
964 |
"train_batch_size": 2,
|
965 |
"trial_name": null,
|
966 |
"trial_params": null
|