error577 commited on
Commit
9407c89
·
verified ·
1 Parent(s): edadfc1

Training in progress, step 160, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b583587d17c6c7a97a11c9e59c5d69ae6878502971c7d00024d35410cc8bc4ab
3
  size 1579384
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:189a826c21fcc12d4d822272c9376eb9989b7d2c52eae6a814ab821e7de631b0
3
  size 1579384
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a855e5c8ec97ac48e47fa165986806e7c257deeed8f91cad3e04ecdca2ca02a2
3
  size 857274
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d19d42888a784db5d3792889b6d90f80c505a9dbce4098d0ae5d04ef2168686c
3
  size 857274
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a8bec8ada3d1dd406d9aa43c4a27a47f026c1e315b02eae7d5bc447ebde9bea
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f97b059151fb3e9951195534ddd111a9c4803b6431cdba0bb72c51bd50ade686
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50bdc92009888fc74192f19079ad05c940764efafef78e70a576eb66733f4db5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d554bdcf1252a9ad2f8bf8ecd99330af5af219432c92d1857da98ea0ae84e1df
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.09244356402954892,
5
  "eval_steps": 20,
6
- "global_step": 140,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1051,6 +1051,154 @@
1051
  "eval_samples_per_second": 75.135,
1052
  "eval_steps_per_second": 75.135,
1053
  "step": 140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1054
  }
1055
  ],
1056
  "logging_steps": 1,
@@ -1070,7 +1218,7 @@
1070
  "attributes": {}
1071
  }
1072
  },
1073
- "total_flos": 93374726012928.0,
1074
  "train_batch_size": 1,
1075
  "trial_name": null,
1076
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.10564978746234163,
5
  "eval_steps": 20,
6
+ "global_step": 160,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1051
  "eval_samples_per_second": 75.135,
1052
  "eval_steps_per_second": 75.135,
1053
  "step": 140
1054
+ },
1055
+ {
1056
+ "epoch": 0.09310387520118855,
1057
+ "grad_norm": 50641.609375,
1058
+ "learning_rate": 0.00017266235158867752,
1059
+ "loss": 91.73,
1060
+ "step": 141
1061
+ },
1062
+ {
1063
+ "epoch": 0.0937641863728282,
1064
+ "grad_norm": 31302.37109375,
1065
+ "learning_rate": 0.00017105474416700164,
1066
+ "loss": 101.3363,
1067
+ "step": 142
1068
+ },
1069
+ {
1070
+ "epoch": 0.09442449754446783,
1071
+ "grad_norm": 25933.974609375,
1072
+ "learning_rate": 0.0001694446658777458,
1073
+ "loss": 94.2955,
1074
+ "step": 143
1075
+ },
1076
+ {
1077
+ "epoch": 0.09508480871610747,
1078
+ "grad_norm": 28032.328125,
1079
+ "learning_rate": 0.00016783230567073596,
1080
+ "loss": 98.1662,
1081
+ "step": 144
1082
+ },
1083
+ {
1084
+ "epoch": 0.0957451198877471,
1085
+ "grad_norm": 25707.517578125,
1086
+ "learning_rate": 0.00016621785276359127,
1087
+ "loss": 96.4839,
1088
+ "step": 145
1089
+ },
1090
+ {
1091
+ "epoch": 0.09640543105938673,
1092
+ "grad_norm": 59317.1875,
1093
+ "learning_rate": 0.0001646014966195185,
1094
+ "loss": 97.2288,
1095
+ "step": 146
1096
+ },
1097
+ {
1098
+ "epoch": 0.09706574223102638,
1099
+ "grad_norm": 115944.59375,
1100
+ "learning_rate": 0.00016298342692507763,
1101
+ "loss": 99.3989,
1102
+ "step": 147
1103
+ },
1104
+ {
1105
+ "epoch": 0.097726053402666,
1106
+ "grad_norm": 166985.421875,
1107
+ "learning_rate": 0.00016136383356792156,
1108
+ "loss": 108.0774,
1109
+ "step": 148
1110
+ },
1111
+ {
1112
+ "epoch": 0.09838636457430563,
1113
+ "grad_norm": 136255.21875,
1114
+ "learning_rate": 0.0001597429066145116,
1115
+ "loss": 132.089,
1116
+ "step": 149
1117
+ },
1118
+ {
1119
+ "epoch": 0.09904667574594528,
1120
+ "grad_norm": 69448.328125,
1121
+ "learning_rate": 0.0001581208362878126,
1122
+ "loss": 129.43,
1123
+ "step": 150
1124
+ },
1125
+ {
1126
+ "epoch": 0.09970698691758491,
1127
+ "grad_norm": 11727.447265625,
1128
+ "learning_rate": 0.00015649781294496933,
1129
+ "loss": 101.6987,
1130
+ "step": 151
1131
+ },
1132
+ {
1133
+ "epoch": 0.10036729808922455,
1134
+ "grad_norm": 19763.63671875,
1135
+ "learning_rate": 0.00015487402705496707,
1136
+ "loss": 97.9096,
1137
+ "step": 152
1138
+ },
1139
+ {
1140
+ "epoch": 0.10102760926086418,
1141
+ "grad_norm": 32434.076171875,
1142
+ "learning_rate": 0.0001532496691762796,
1143
+ "loss": 101.298,
1144
+ "step": 153
1145
+ },
1146
+ {
1147
+ "epoch": 0.10168792043250381,
1148
+ "grad_norm": 22052.291015625,
1149
+ "learning_rate": 0.00015162492993450597,
1150
+ "loss": 100.8446,
1151
+ "step": 154
1152
+ },
1153
+ {
1154
+ "epoch": 0.10234823160414346,
1155
+ "grad_norm": 12854.66796875,
1156
+ "learning_rate": 0.00015,
1157
+ "loss": 108.0937,
1158
+ "step": 155
1159
+ },
1160
+ {
1161
+ "epoch": 0.10300854277578309,
1162
+ "grad_norm": 18811.01953125,
1163
+ "learning_rate": 0.00014837507006549403,
1164
+ "loss": 99.9735,
1165
+ "step": 156
1166
+ },
1167
+ {
1168
+ "epoch": 0.10366885394742273,
1169
+ "grad_norm": 12819.19140625,
1170
+ "learning_rate": 0.00014675033082372038,
1171
+ "loss": 108.136,
1172
+ "step": 157
1173
+ },
1174
+ {
1175
+ "epoch": 0.10432916511906236,
1176
+ "grad_norm": 33140.94140625,
1177
+ "learning_rate": 0.00014512597294503293,
1178
+ "loss": 100.7064,
1179
+ "step": 158
1180
+ },
1181
+ {
1182
+ "epoch": 0.10498947629070199,
1183
+ "grad_norm": 12379.7744140625,
1184
+ "learning_rate": 0.00014350218705503067,
1185
+ "loss": 108.1111,
1186
+ "step": 159
1187
+ },
1188
+ {
1189
+ "epoch": 0.10564978746234163,
1190
+ "grad_norm": 15650.5546875,
1191
+ "learning_rate": 0.00014187916371218736,
1192
+ "loss": 98.0636,
1193
+ "step": 160
1194
+ },
1195
+ {
1196
+ "epoch": 0.10564978746234163,
1197
+ "eval_loss": 11.360566139221191,
1198
+ "eval_runtime": 6.5542,
1199
+ "eval_samples_per_second": 75.524,
1200
+ "eval_steps_per_second": 75.524,
1201
+ "step": 160
1202
  }
1203
  ],
1204
  "logging_steps": 1,
 
1218
  "attributes": {}
1219
  }
1220
  },
1221
+ "total_flos": 105203669925888.0,
1222
  "train_batch_size": 1,
1223
  "trial_name": null,
1224
  "trial_params": null