eddysang commited on
Commit
a7285b6
·
verified ·
1 Parent(s): c2b3e65

Training in progress, step 153, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5aa9f9271e49eb861079866208618baaab7b7d3de962c114cc41a783fb00b37
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a288529085e85aa2a2b58c37011dd16b7750557b7f069f97f1bcb476e9141825
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bed79e89e8e6580c15caa81b1e7c98e4a6124d56f11fb92ea2f800dde6ddc40
3
  size 671466706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89d5536243cdc3bdfd284d91f65637bbf5ab03984fe38ebed05dcf439569ea11
3
  size 671466706
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1f2394615adf7a2cb7801d62c2302d9fa5f947b6a2b4286eb85a36a8c5e0bd0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:630b121fadd21d13ee17a86473b928204282dffbb81941a61c173dd06a7af5f0
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6aeb1fbb5e964bbc83fa43b049054867ad1faca8f43271132d241ae074069d5d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e76ff8614026ec7c5c2d9793615ca4e2f707e550ce0b5a4376af475431afe3f1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.20023004370830458,
5
  "eval_steps": 50,
6
- "global_step": 136,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -983,6 +983,133 @@
983
  "learning_rate": 4.212216399081918e-05,
984
  "loss": 0.0341,
985
  "step": 136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
986
  }
987
  ],
988
  "logging_steps": 1,
@@ -1002,7 +1129,7 @@
1002
  "attributes": {}
1003
  }
1004
  },
1005
- "total_flos": 1.4388676596124877e+18,
1006
  "train_batch_size": 2,
1007
  "trial_name": null,
1008
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.22525879917184266,
5
  "eval_steps": 50,
6
+ "global_step": 153,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
983
  "learning_rate": 4.212216399081918e-05,
984
  "loss": 0.0341,
985
  "step": 136
986
+ },
987
+ {
988
+ "epoch": 0.20170232344145386,
989
+ "grad_norm": 0.18119709193706512,
990
+ "learning_rate": 4.095071251953399e-05,
991
+ "loss": 0.0831,
992
+ "step": 137
993
+ },
994
+ {
995
+ "epoch": 0.20317460317460317,
996
+ "grad_norm": 0.09372559934854507,
997
+ "learning_rate": 3.978963279105821e-05,
998
+ "loss": 0.0261,
999
+ "step": 138
1000
+ },
1001
+ {
1002
+ "epoch": 0.2046468829077525,
1003
+ "grad_norm": 0.08984406292438507,
1004
+ "learning_rate": 3.863927848152472e-05,
1005
+ "loss": 0.0399,
1006
+ "step": 139
1007
+ },
1008
+ {
1009
+ "epoch": 0.20611916264090177,
1010
+ "grad_norm": 0.0921633318066597,
1011
+ "learning_rate": 3.750000000000001e-05,
1012
+ "loss": 0.0189,
1013
+ "step": 140
1014
+ },
1015
+ {
1016
+ "epoch": 0.20759144237405108,
1017
+ "grad_norm": 0.11086931824684143,
1018
+ "learning_rate": 3.637214438174593e-05,
1019
+ "loss": 0.0431,
1020
+ "step": 141
1021
+ },
1022
+ {
1023
+ "epoch": 0.20906372210720037,
1024
+ "grad_norm": 0.1010395959019661,
1025
+ "learning_rate": 3.525605518250964e-05,
1026
+ "loss": 0.0696,
1027
+ "step": 142
1028
+ },
1029
+ {
1030
+ "epoch": 0.21053600184034968,
1031
+ "grad_norm": 0.07738685607910156,
1032
+ "learning_rate": 3.415207237387297e-05,
1033
+ "loss": 0.0134,
1034
+ "step": 143
1035
+ },
1036
+ {
1037
+ "epoch": 0.21200828157349896,
1038
+ "grad_norm": 0.08697827160358429,
1039
+ "learning_rate": 3.3060532239693994e-05,
1040
+ "loss": 0.0294,
1041
+ "step": 144
1042
+ },
1043
+ {
1044
+ "epoch": 0.21348056130664828,
1045
+ "grad_norm": 0.13390277326107025,
1046
+ "learning_rate": 3.198176727367156e-05,
1047
+ "loss": 0.064,
1048
+ "step": 145
1049
+ },
1050
+ {
1051
+ "epoch": 0.21495284103979756,
1052
+ "grad_norm": 0.12035319209098816,
1053
+ "learning_rate": 3.091610607806452e-05,
1054
+ "loss": 0.0378,
1055
+ "step": 146
1056
+ },
1057
+ {
1058
+ "epoch": 0.21642512077294687,
1059
+ "grad_norm": 0.09978077560663223,
1060
+ "learning_rate": 2.986387326359637e-05,
1061
+ "loss": 0.0356,
1062
+ "step": 147
1063
+ },
1064
+ {
1065
+ "epoch": 0.21789740050609616,
1066
+ "grad_norm": 0.09956356137990952,
1067
+ "learning_rate": 2.8825389350575624e-05,
1068
+ "loss": 0.0476,
1069
+ "step": 148
1070
+ },
1071
+ {
1072
+ "epoch": 0.21936968023924547,
1073
+ "grad_norm": 0.09759137779474258,
1074
+ "learning_rate": 2.78009706712622e-05,
1075
+ "loss": 0.0383,
1076
+ "step": 149
1077
+ },
1078
+ {
1079
+ "epoch": 0.22084195997239475,
1080
+ "grad_norm": 0.06408429890871048,
1081
+ "learning_rate": 2.6790929273509545e-05,
1082
+ "loss": 0.0172,
1083
+ "step": 150
1084
+ },
1085
+ {
1086
+ "epoch": 0.22084195997239475,
1087
+ "eval_loss": 0.04416579380631447,
1088
+ "eval_runtime": 1045.846,
1089
+ "eval_samples_per_second": 2.188,
1090
+ "eval_steps_per_second": 1.094,
1091
+ "step": 150
1092
+ },
1093
+ {
1094
+ "epoch": 0.22231423970554406,
1095
+ "grad_norm": 0.07642810791730881,
1096
+ "learning_rate": 2.579557282571196e-05,
1097
+ "loss": 0.0281,
1098
+ "step": 151
1099
+ },
1100
+ {
1101
+ "epoch": 0.22378651943869335,
1102
+ "grad_norm": 0.06974484771490097,
1103
+ "learning_rate": 2.4815204523085654e-05,
1104
+ "loss": 0.0204,
1105
+ "step": 152
1106
+ },
1107
+ {
1108
+ "epoch": 0.22525879917184266,
1109
+ "grad_norm": 0.0952039286494255,
1110
+ "learning_rate": 2.385012299531262e-05,
1111
+ "loss": 0.0515,
1112
+ "step": 153
1113
  }
1114
  ],
1115
  "logging_steps": 1,
 
1129
  "attributes": {}
1130
  }
1131
  },
1132
+ "total_flos": 1.6162316446146232e+18,
1133
  "train_batch_size": 2,
1134
  "trial_name": null,
1135
  "trial_params": null