prxy5606 commited on
Commit
2e3cf9e
·
verified ·
1 Parent(s): 6983b99

Training in progress, step 193, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec6c70d02926ceaad335a0100cf74749c51b9965734af9abb176fc361ef43d16
3
  size 201892112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38bce14ef22ca30cb7842c91346c893c96f6d4b1229c0c15c5674571ac6f5d4e
3
  size 201892112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a94c48d26b881d79abd3097469e8f5783eefc08a8dea05c6c89d1da2fb16137
3
  size 102864548
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cd95f68fd8a1a6d11a01fd68d92cebe30891911861682a758bab130fbe57b32
3
  size 102864548
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2e43872645f9f101ee403b5a709d668383b6b6bfeb7f368e5762a1069266f6e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d6f2d95fee078ae6685f33ce2ab0558591568bec11bb8996c3ce0c08be35279
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c9bd7e4942c5b62a922cd992f05c7b79b177cefa5f8b5cc986083011c9c9245
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e35b64cddddbcb296f3261846a2764caa97bfc895b4a9f0211dd2b8a6e73b2a5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.8657492399215698,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
- "epoch": 2.3346303501945527,
5
  "eval_steps": 50,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1089,6 +1089,307 @@
1089
  "eval_samples_per_second": 41.447,
1090
  "eval_steps_per_second": 10.362,
1091
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1092
  }
1093
  ],
1094
  "logging_steps": 1,
@@ -1112,12 +1413,12 @@
1112
  "should_evaluate": false,
1113
  "should_log": false,
1114
  "should_save": true,
1115
- "should_training_stop": false
1116
  },
1117
  "attributes": {}
1118
  }
1119
  },
1120
- "total_flos": 3.829000395187814e+16,
1121
  "train_batch_size": 8,
1122
  "trial_name": null,
1123
  "trial_params": null
 
1
  {
2
  "best_metric": 0.8657492399215698,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
+ "epoch": 3.0038910505836576,
5
  "eval_steps": 50,
6
+ "global_step": 193,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1089
  "eval_samples_per_second": 41.447,
1090
  "eval_steps_per_second": 10.362,
1091
  "step": 150
1092
+ },
1093
+ {
1094
+ "epoch": 2.350194552529183,
1095
+ "grad_norm": 0.374606192111969,
1096
+ "learning_rate": 1.2443403456474017e-05,
1097
+ "loss": 0.7851,
1098
+ "step": 151
1099
+ },
1100
+ {
1101
+ "epoch": 2.3657587548638133,
1102
+ "grad_norm": 0.3662430942058563,
1103
+ "learning_rate": 1.1882318057580489e-05,
1104
+ "loss": 0.74,
1105
+ "step": 152
1106
+ },
1107
+ {
1108
+ "epoch": 2.3813229571984436,
1109
+ "grad_norm": 0.3952215909957886,
1110
+ "learning_rate": 1.1332466114513512e-05,
1111
+ "loss": 0.8131,
1112
+ "step": 153
1113
+ },
1114
+ {
1115
+ "epoch": 2.396887159533074,
1116
+ "grad_norm": 0.38979917764663696,
1117
+ "learning_rate": 1.0794009671164484e-05,
1118
+ "loss": 0.821,
1119
+ "step": 154
1120
+ },
1121
+ {
1122
+ "epoch": 2.412451361867704,
1123
+ "grad_norm": 0.37041887640953064,
1124
+ "learning_rate": 1.0267107413118742e-05,
1125
+ "loss": 0.7174,
1126
+ "step": 155
1127
+ },
1128
+ {
1129
+ "epoch": 2.4280155642023344,
1130
+ "grad_norm": 0.41075101494789124,
1131
+ "learning_rate": 9.751914620890206e-06,
1132
+ "loss": 0.7549,
1133
+ "step": 156
1134
+ },
1135
+ {
1136
+ "epoch": 2.443579766536965,
1137
+ "grad_norm": 0.4010712206363678,
1138
+ "learning_rate": 9.248583124159438e-06,
1139
+ "loss": 0.7279,
1140
+ "step": 157
1141
+ },
1142
+ {
1143
+ "epoch": 2.4591439688715955,
1144
+ "grad_norm": 0.40612390637397766,
1145
+ "learning_rate": 8.757261257028777e-06,
1146
+ "loss": 0.7278,
1147
+ "step": 158
1148
+ },
1149
+ {
1150
+ "epoch": 2.4747081712062258,
1151
+ "grad_norm": 0.4142809510231018,
1152
+ "learning_rate": 8.278093814307637e-06,
1153
+ "loss": 0.7059,
1154
+ "step": 159
1155
+ },
1156
+ {
1157
+ "epoch": 2.490272373540856,
1158
+ "grad_norm": 0.46315258741378784,
1159
+ "learning_rate": 7.81122200884072e-06,
1160
+ "loss": 0.7573,
1161
+ "step": 160
1162
+ },
1163
+ {
1164
+ "epoch": 2.5058365758754864,
1165
+ "grad_norm": 0.38741129636764526,
1166
+ "learning_rate": 7.356783429892023e-06,
1167
+ "loss": 0.7816,
1168
+ "step": 161
1169
+ },
1170
+ {
1171
+ "epoch": 2.5214007782101167,
1172
+ "grad_norm": 0.31710007786750793,
1173
+ "learning_rate": 6.9149120025965905e-06,
1174
+ "loss": 0.6611,
1175
+ "step": 162
1176
+ },
1177
+ {
1178
+ "epoch": 2.536964980544747,
1179
+ "grad_norm": 0.3693569004535675,
1180
+ "learning_rate": 6.4857379484922375e-06,
1181
+ "loss": 0.8636,
1182
+ "step": 163
1183
+ },
1184
+ {
1185
+ "epoch": 2.5525291828793772,
1186
+ "grad_norm": 0.34923169016838074,
1187
+ "learning_rate": 6.069387747142591e-06,
1188
+ "loss": 0.7413,
1189
+ "step": 164
1190
+ },
1191
+ {
1192
+ "epoch": 2.5680933852140075,
1193
+ "grad_norm": 0.3575231432914734,
1194
+ "learning_rate": 5.665984098862992e-06,
1195
+ "loss": 0.7553,
1196
+ "step": 165
1197
+ },
1198
+ {
1199
+ "epoch": 2.5836575875486383,
1200
+ "grad_norm": 0.3520582318305969,
1201
+ "learning_rate": 5.275645888560232e-06,
1202
+ "loss": 0.7394,
1203
+ "step": 166
1204
+ },
1205
+ {
1206
+ "epoch": 2.5992217898832686,
1207
+ "grad_norm": 0.37276095151901245,
1208
+ "learning_rate": 4.898488150696467e-06,
1209
+ "loss": 0.7774,
1210
+ "step": 167
1211
+ },
1212
+ {
1213
+ "epoch": 2.614785992217899,
1214
+ "grad_norm": 0.3675203323364258,
1215
+ "learning_rate": 4.534622035388214e-06,
1216
+ "loss": 0.7305,
1217
+ "step": 168
1218
+ },
1219
+ {
1220
+ "epoch": 2.630350194552529,
1221
+ "grad_norm": 0.3742014467716217,
1222
+ "learning_rate": 4.184154775649768e-06,
1223
+ "loss": 0.7991,
1224
+ "step": 169
1225
+ },
1226
+ {
1227
+ "epoch": 2.6459143968871595,
1228
+ "grad_norm": 0.38438355922698975,
1229
+ "learning_rate": 3.8471896557912e-06,
1230
+ "loss": 0.7519,
1231
+ "step": 170
1232
+ },
1233
+ {
1234
+ "epoch": 2.6614785992217898,
1235
+ "grad_norm": 0.41580691933631897,
1236
+ "learning_rate": 3.523825980979989e-06,
1237
+ "loss": 0.7663,
1238
+ "step": 171
1239
+ },
1240
+ {
1241
+ "epoch": 2.6770428015564205,
1242
+ "grad_norm": 0.4046590030193329,
1243
+ "learning_rate": 3.2141590479753236e-06,
1244
+ "loss": 0.7113,
1245
+ "step": 172
1246
+ },
1247
+ {
1248
+ "epoch": 2.692607003891051,
1249
+ "grad_norm": 0.4197351038455963,
1250
+ "learning_rate": 2.918280117043709e-06,
1251
+ "loss": 0.8028,
1252
+ "step": 173
1253
+ },
1254
+ {
1255
+ "epoch": 2.708171206225681,
1256
+ "grad_norm": 0.4047519862651825,
1257
+ "learning_rate": 2.636276385064157e-06,
1258
+ "loss": 0.7148,
1259
+ "step": 174
1260
+ },
1261
+ {
1262
+ "epoch": 2.7237354085603114,
1263
+ "grad_norm": 0.42242541909217834,
1264
+ "learning_rate": 2.3682309598308747e-06,
1265
+ "loss": 0.6966,
1266
+ "step": 175
1267
+ },
1268
+ {
1269
+ "epoch": 2.7392996108949417,
1270
+ "grad_norm": 0.4608907103538513,
1271
+ "learning_rate": 2.114222835560986e-06,
1272
+ "loss": 0.7255,
1273
+ "step": 176
1274
+ },
1275
+ {
1276
+ "epoch": 2.754863813229572,
1277
+ "grad_norm": 0.4060788154602051,
1278
+ "learning_rate": 1.8743268696145954e-06,
1279
+ "loss": 0.833,
1280
+ "step": 177
1281
+ },
1282
+ {
1283
+ "epoch": 2.7704280155642023,
1284
+ "grad_norm": 0.33689865469932556,
1285
+ "learning_rate": 1.6486137604339813e-06,
1286
+ "loss": 0.7972,
1287
+ "step": 178
1288
+ },
1289
+ {
1290
+ "epoch": 2.7859922178988326,
1291
+ "grad_norm": 0.3548411726951599,
1292
+ "learning_rate": 1.4371500267084338e-06,
1293
+ "loss": 0.7801,
1294
+ "step": 179
1295
+ },
1296
+ {
1297
+ "epoch": 2.801556420233463,
1298
+ "grad_norm": 0.3427428603172302,
1299
+ "learning_rate": 1.2399979877708745e-06,
1300
+ "loss": 0.7001,
1301
+ "step": 180
1302
+ },
1303
+ {
1304
+ "epoch": 2.817120622568093,
1305
+ "grad_norm": 0.36466020345687866,
1306
+ "learning_rate": 1.0572157452321097e-06,
1307
+ "loss": 0.7898,
1308
+ "step": 181
1309
+ },
1310
+ {
1311
+ "epoch": 2.832684824902724,
1312
+ "grad_norm": 0.39266034960746765,
1313
+ "learning_rate": 8.888571658579703e-07,
1314
+ "loss": 0.7809,
1315
+ "step": 182
1316
+ },
1317
+ {
1318
+ "epoch": 2.848249027237354,
1319
+ "grad_norm": 0.3834577202796936,
1320
+ "learning_rate": 7.349718656945504e-07,
1321
+ "loss": 0.7889,
1322
+ "step": 183
1323
+ },
1324
+ {
1325
+ "epoch": 2.8638132295719845,
1326
+ "grad_norm": 0.3879198133945465,
1327
+ "learning_rate": 5.956051954461472e-07,
1328
+ "loss": 0.8306,
1329
+ "step": 184
1330
+ },
1331
+ {
1332
+ "epoch": 2.8793774319066148,
1333
+ "grad_norm": 0.36868515610694885,
1334
+ "learning_rate": 4.7079822711015296e-07,
1335
+ "loss": 0.7332,
1336
+ "step": 185
1337
+ },
1338
+ {
1339
+ "epoch": 2.894941634241245,
1340
+ "grad_norm": 0.38267624378204346,
1341
+ "learning_rate": 3.605877418729975e-07,
1342
+ "loss": 0.6865,
1343
+ "step": 186
1344
+ },
1345
+ {
1346
+ "epoch": 2.9105058365758754,
1347
+ "grad_norm": 0.39616188406944275,
1348
+ "learning_rate": 2.6500621927054715e-07,
1349
+ "loss": 0.7302,
1350
+ "step": 187
1351
+ },
1352
+ {
1353
+ "epoch": 2.926070038910506,
1354
+ "grad_norm": 0.4041755199432373,
1355
+ "learning_rate": 1.840818276162226e-07,
1356
+ "loss": 0.8152,
1357
+ "step": 188
1358
+ },
1359
+ {
1360
+ "epoch": 2.9416342412451364,
1361
+ "grad_norm": 0.4070306122303009,
1362
+ "learning_rate": 1.1783841569968367e-07,
1363
+ "loss": 0.6876,
1364
+ "step": 189
1365
+ },
1366
+ {
1367
+ "epoch": 2.9571984435797667,
1368
+ "grad_norm": 0.42624837160110474,
1369
+ "learning_rate": 6.629550575847354e-08,
1370
+ "loss": 0.7026,
1371
+ "step": 190
1372
+ },
1373
+ {
1374
+ "epoch": 2.972762645914397,
1375
+ "grad_norm": 0.4421103298664093,
1376
+ "learning_rate": 2.946828772473764e-08,
1377
+ "loss": 0.7546,
1378
+ "step": 191
1379
+ },
1380
+ {
1381
+ "epoch": 2.9883268482490273,
1382
+ "grad_norm": 0.4374568462371826,
1383
+ "learning_rate": 7.36761474865455e-09,
1384
+ "loss": 0.7218,
1385
+ "step": 192
1386
+ },
1387
+ {
1388
+ "epoch": 3.0038910505836576,
1389
+ "grad_norm": 0.8102526068687439,
1390
+ "learning_rate": 0.0,
1391
+ "loss": 1.2001,
1392
+ "step": 193
1393
  }
1394
  ],
1395
  "logging_steps": 1,
 
1413
  "should_evaluate": false,
1414
  "should_log": false,
1415
  "should_save": true,
1416
+ "should_training_stop": true
1417
  },
1418
  "attributes": {}
1419
  }
1420
  },
1421
+ "total_flos": 4.879575712530432e+16,
1422
  "train_batch_size": 8,
1423
  "trial_name": null,
1424
  "trial_params": null