prxy5606 commited on
Commit
8cf6f47
·
verified ·
1 Parent(s): 66c4b9e

Training in progress, step 150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68a258ec929d9218770e1f86fd3f1de785918e5b52a2e2c30529f74998eaa2aa
3
  size 201892112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec6c70d02926ceaad335a0100cf74749c51b9965734af9abb176fc361ef43d16
3
  size 201892112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2e1ef4ae464844442d88115251951165c07376033477b3523521d128587207a
3
  size 102864548
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a94c48d26b881d79abd3097469e8f5783eefc08a8dea05c6c89d1da2fb16137
3
  size 102864548
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:212dcba99a7b1ae77770a31ef6779f7d223c41005e87ffc275e7c8bccb2d4cfd
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2e43872645f9f101ee403b5a709d668383b6b6bfeb7f368e5762a1069266f6e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87acae65691e01ffbaf4c13ce5265904843d2731d4c47c0eed57fce5257b0710
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c9bd7e4942c5b62a922cd992f05c7b79b177cefa5f8b5cc986083011c9c9245
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.8714919686317444,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 1.556420233463035,
5
  "eval_steps": 50,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -731,6 +731,364 @@
731
  "eval_samples_per_second": 41.446,
732
  "eval_steps_per_second": 10.362,
733
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
734
  }
735
  ],
736
  "logging_steps": 1,
@@ -759,7 +1117,7 @@
759
  "attributes": {}
760
  }
761
  },
762
- "total_flos": 2.581108901491507e+16,
763
  "train_batch_size": 8,
764
  "trial_name": null,
765
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.8657492399215698,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
+ "epoch": 2.3346303501945527,
5
  "eval_steps": 50,
6
+ "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
731
  "eval_samples_per_second": 41.446,
732
  "eval_steps_per_second": 10.362,
733
  "step": 100
734
+ },
735
+ {
736
+ "epoch": 1.5719844357976653,
737
+ "grad_norm": 0.3435685634613037,
738
+ "learning_rate": 5.0429174054104355e-05,
739
+ "loss": 0.8486,
740
+ "step": 101
741
+ },
742
+ {
743
+ "epoch": 1.5875486381322959,
744
+ "grad_norm": 0.3485165536403656,
745
+ "learning_rate": 4.9570825945895656e-05,
746
+ "loss": 0.8179,
747
+ "step": 102
748
+ },
749
+ {
750
+ "epoch": 1.6031128404669261,
751
+ "grad_norm": 0.3704763352870941,
752
+ "learning_rate": 4.8712604317250576e-05,
753
+ "loss": 0.9108,
754
+ "step": 103
755
+ },
756
+ {
757
+ "epoch": 1.6186770428015564,
758
+ "grad_norm": 0.33625274896621704,
759
+ "learning_rate": 4.785476209002227e-05,
760
+ "loss": 0.8262,
761
+ "step": 104
762
+ },
763
+ {
764
+ "epoch": 1.6342412451361867,
765
+ "grad_norm": 0.3515668511390686,
766
+ "learning_rate": 4.699755207425259e-05,
767
+ "loss": 0.8336,
768
+ "step": 105
769
+ },
770
+ {
771
+ "epoch": 1.649805447470817,
772
+ "grad_norm": 0.3408714830875397,
773
+ "learning_rate": 4.6141226893667684e-05,
774
+ "loss": 0.714,
775
+ "step": 106
776
+ },
777
+ {
778
+ "epoch": 1.6653696498054473,
779
+ "grad_norm": 0.36845165491104126,
780
+ "learning_rate": 4.5286038911228785e-05,
781
+ "loss": 0.8147,
782
+ "step": 107
783
+ },
784
+ {
785
+ "epoch": 1.6809338521400778,
786
+ "grad_norm": 0.3738980293273926,
787
+ "learning_rate": 4.443224015475956e-05,
788
+ "loss": 0.8297,
789
+ "step": 108
790
+ },
791
+ {
792
+ "epoch": 1.6964980544747081,
793
+ "grad_norm": 0.38755711913108826,
794
+ "learning_rate": 4.358008224267245e-05,
795
+ "loss": 0.8268,
796
+ "step": 109
797
+ },
798
+ {
799
+ "epoch": 1.7120622568093387,
800
+ "grad_norm": 0.367796927690506,
801
+ "learning_rate": 4.272981630981551e-05,
802
+ "loss": 0.7379,
803
+ "step": 110
804
+ },
805
+ {
806
+ "epoch": 1.727626459143969,
807
+ "grad_norm": 0.3794459104537964,
808
+ "learning_rate": 4.188169293346183e-05,
809
+ "loss": 0.7374,
810
+ "step": 111
811
+ },
812
+ {
813
+ "epoch": 1.7431906614785992,
814
+ "grad_norm": 0.4307495653629303,
815
+ "learning_rate": 4.103596205946323e-05,
816
+ "loss": 0.7783,
817
+ "step": 112
818
+ },
819
+ {
820
+ "epoch": 1.7587548638132295,
821
+ "grad_norm": 0.3288561701774597,
822
+ "learning_rate": 4.019287292859016e-05,
823
+ "loss": 0.8254,
824
+ "step": 113
825
+ },
826
+ {
827
+ "epoch": 1.7743190661478598,
828
+ "grad_norm": 0.3198075592517853,
829
+ "learning_rate": 3.9352674003079225e-05,
830
+ "loss": 0.8722,
831
+ "step": 114
832
+ },
833
+ {
834
+ "epoch": 1.7898832684824901,
835
+ "grad_norm": 0.3411862850189209,
836
+ "learning_rate": 3.851561289341023e-05,
837
+ "loss": 0.8044,
838
+ "step": 115
839
+ },
840
+ {
841
+ "epoch": 1.8054474708171206,
842
+ "grad_norm": 0.33337950706481934,
843
+ "learning_rate": 3.768193628533427e-05,
844
+ "loss": 0.7877,
845
+ "step": 116
846
+ },
847
+ {
848
+ "epoch": 1.821011673151751,
849
+ "grad_norm": 0.34444868564605713,
850
+ "learning_rate": 3.6851889867174275e-05,
851
+ "loss": 0.8396,
852
+ "step": 117
853
+ },
854
+ {
855
+ "epoch": 1.8365758754863815,
856
+ "grad_norm": 0.35829517245292664,
857
+ "learning_rate": 3.602571825741953e-05,
858
+ "loss": 0.7998,
859
+ "step": 118
860
+ },
861
+ {
862
+ "epoch": 1.8521400778210118,
863
+ "grad_norm": 0.33558353781700134,
864
+ "learning_rate": 3.520366493263554e-05,
865
+ "loss": 0.7706,
866
+ "step": 119
867
+ },
868
+ {
869
+ "epoch": 1.867704280155642,
870
+ "grad_norm": 0.3696330487728119,
871
+ "learning_rate": 3.438597215571027e-05,
872
+ "loss": 0.9026,
873
+ "step": 120
874
+ },
875
+ {
876
+ "epoch": 1.8832684824902723,
877
+ "grad_norm": 0.34265345335006714,
878
+ "learning_rate": 3.357288090445827e-05,
879
+ "loss": 0.8013,
880
+ "step": 121
881
+ },
882
+ {
883
+ "epoch": 1.8988326848249026,
884
+ "grad_norm": 0.3573516309261322,
885
+ "learning_rate": 3.2764630800603314e-05,
886
+ "loss": 0.8148,
887
+ "step": 122
888
+ },
889
+ {
890
+ "epoch": 1.914396887159533,
891
+ "grad_norm": 0.36973556876182556,
892
+ "learning_rate": 3.196146003916084e-05,
893
+ "loss": 0.7784,
894
+ "step": 123
895
+ },
896
+ {
897
+ "epoch": 1.9299610894941635,
898
+ "grad_norm": 0.37476617097854614,
899
+ "learning_rate": 3.116360531824074e-05,
900
+ "loss": 0.8075,
901
+ "step": 124
902
+ },
903
+ {
904
+ "epoch": 1.9455252918287937,
905
+ "grad_norm": 0.36869266629219055,
906
+ "learning_rate": 3.0371301769291417e-05,
907
+ "loss": 0.7631,
908
+ "step": 125
909
+ },
910
+ {
911
+ "epoch": 1.9610894941634243,
912
+ "grad_norm": 0.38179001212120056,
913
+ "learning_rate": 2.9584782887805328e-05,
914
+ "loss": 0.7691,
915
+ "step": 126
916
+ },
917
+ {
918
+ "epoch": 1.9766536964980546,
919
+ "grad_norm": 0.4227118492126465,
920
+ "learning_rate": 2.8804280464506973e-05,
921
+ "loss": 0.7856,
922
+ "step": 127
923
+ },
924
+ {
925
+ "epoch": 1.9922178988326849,
926
+ "grad_norm": 0.46422451734542847,
927
+ "learning_rate": 2.8030024517042907e-05,
928
+ "loss": 0.8267,
929
+ "step": 128
930
+ },
931
+ {
932
+ "epoch": 2.007782101167315,
933
+ "grad_norm": 1.0904223918914795,
934
+ "learning_rate": 2.726224322219473e-05,
935
+ "loss": 1.539,
936
+ "step": 129
937
+ },
938
+ {
939
+ "epoch": 2.0233463035019454,
940
+ "grad_norm": 0.2821767330169678,
941
+ "learning_rate": 2.650116284863402e-05,
942
+ "loss": 0.6871,
943
+ "step": 130
944
+ },
945
+ {
946
+ "epoch": 2.0389105058365757,
947
+ "grad_norm": 0.3239165246486664,
948
+ "learning_rate": 2.5747007690240198e-05,
949
+ "loss": 0.8398,
950
+ "step": 131
951
+ },
952
+ {
953
+ "epoch": 2.054474708171206,
954
+ "grad_norm": 0.33111441135406494,
955
+ "learning_rate": 2.500000000000001e-05,
956
+ "loss": 0.8076,
957
+ "step": 132
958
+ },
959
+ {
960
+ "epoch": 2.0700389105058368,
961
+ "grad_norm": 0.36252424120903015,
962
+ "learning_rate": 2.426035992450848e-05,
963
+ "loss": 0.8682,
964
+ "step": 133
965
+ },
966
+ {
967
+ "epoch": 2.085603112840467,
968
+ "grad_norm": 0.3458457589149475,
969
+ "learning_rate": 2.3528305439090743e-05,
970
+ "loss": 0.7667,
971
+ "step": 134
972
+ },
973
+ {
974
+ "epoch": 2.1011673151750974,
975
+ "grad_norm": 0.3382876217365265,
976
+ "learning_rate": 2.280405228356377e-05,
977
+ "loss": 0.7525,
978
+ "step": 135
979
+ },
980
+ {
981
+ "epoch": 2.1167315175097277,
982
+ "grad_norm": 0.34494414925575256,
983
+ "learning_rate": 2.2087813898656774e-05,
984
+ "loss": 0.7819,
985
+ "step": 136
986
+ },
987
+ {
988
+ "epoch": 2.132295719844358,
989
+ "grad_norm": 0.3596420884132385,
990
+ "learning_rate": 2.137980136310926e-05,
991
+ "loss": 0.7824,
992
+ "step": 137
993
+ },
994
+ {
995
+ "epoch": 2.1478599221789882,
996
+ "grad_norm": 0.35264426469802856,
997
+ "learning_rate": 2.068022333146522e-05,
998
+ "loss": 0.7032,
999
+ "step": 138
1000
+ },
1001
+ {
1002
+ "epoch": 2.1634241245136185,
1003
+ "grad_norm": 0.3882901072502136,
1004
+ "learning_rate": 1.9989285972581595e-05,
1005
+ "loss": 0.826,
1006
+ "step": 139
1007
+ },
1008
+ {
1009
+ "epoch": 2.178988326848249,
1010
+ "grad_norm": 0.38036638498306274,
1011
+ "learning_rate": 1.9307192908869397e-05,
1012
+ "loss": 0.7353,
1013
+ "step": 140
1014
+ },
1015
+ {
1016
+ "epoch": 2.1945525291828796,
1017
+ "grad_norm": 0.3903452157974243,
1018
+ "learning_rate": 1.863414515628531e-05,
1019
+ "loss": 0.712,
1020
+ "step": 141
1021
+ },
1022
+ {
1023
+ "epoch": 2.21011673151751,
1024
+ "grad_norm": 0.4027177095413208,
1025
+ "learning_rate": 1.7970341065091245e-05,
1026
+ "loss": 0.7076,
1027
+ "step": 142
1028
+ },
1029
+ {
1030
+ "epoch": 2.22568093385214,
1031
+ "grad_norm": 0.4014608561992645,
1032
+ "learning_rate": 1.7315976261399696e-05,
1033
+ "loss": 0.6695,
1034
+ "step": 143
1035
+ },
1036
+ {
1037
+ "epoch": 2.2412451361867705,
1038
+ "grad_norm": 0.43404000997543335,
1039
+ "learning_rate": 1.667124358952184e-05,
1040
+ "loss": 0.7546,
1041
+ "step": 144
1042
+ },
1043
+ {
1044
+ "epoch": 2.2568093385214008,
1045
+ "grad_norm": 0.37586840987205505,
1046
+ "learning_rate": 1.6036333055135344e-05,
1047
+ "loss": 0.6992,
1048
+ "step": 145
1049
+ },
1050
+ {
1051
+ "epoch": 2.272373540856031,
1052
+ "grad_norm": 0.34168022871017456,
1053
+ "learning_rate": 1.541143176928891e-05,
1054
+ "loss": 0.8211,
1055
+ "step": 146
1056
+ },
1057
+ {
1058
+ "epoch": 2.2879377431906613,
1059
+ "grad_norm": 0.33489376306533813,
1060
+ "learning_rate": 1.4796723893259712e-05,
1061
+ "loss": 0.7855,
1062
+ "step": 147
1063
+ },
1064
+ {
1065
+ "epoch": 2.3035019455252916,
1066
+ "grad_norm": 0.3305031359195709,
1067
+ "learning_rate": 1.4192390584280346e-05,
1068
+ "loss": 0.6823,
1069
+ "step": 148
1070
+ },
1071
+ {
1072
+ "epoch": 2.319066147859922,
1073
+ "grad_norm": 0.3635585606098175,
1074
+ "learning_rate": 1.3598609942150765e-05,
1075
+ "loss": 0.7662,
1076
+ "step": 149
1077
+ },
1078
+ {
1079
+ "epoch": 2.3346303501945527,
1080
+ "grad_norm": 0.34550437331199646,
1081
+ "learning_rate": 1.3015556956751669e-05,
1082
+ "loss": 0.7758,
1083
+ "step": 150
1084
+ },
1085
+ {
1086
+ "epoch": 2.3346303501945527,
1087
+ "eval_loss": 0.8657492399215698,
1088
+ "eval_runtime": 2.6057,
1089
+ "eval_samples_per_second": 41.447,
1090
+ "eval_steps_per_second": 10.362,
1091
+ "step": 150
1092
  }
1093
  ],
1094
  "logging_steps": 1,
 
1117
  "attributes": {}
1118
  }
1119
  },
1120
+ "total_flos": 3.829000395187814e+16,
1121
  "train_batch_size": 8,
1122
  "trial_name": null,
1123
  "trial_params": null