ErrorAI commited on
Commit
6cff4d0
·
verified ·
1 Parent(s): 7972720

Training in progress, step 209, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4035eb3a15d3e5448fa28916cd6f49e43837caaff57d6d8f42386f32521b896
3
  size 36981072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ab3deabdda454b8ba7b5da21606bed5fc85eeb58e81b95e9205ce6729732a91
3
  size 36981072
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f3e64a11f6ea47c0343abd2040f7093d7c4bfe06345ca4bb64d6ad732f03072
3
  size 19859140
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c117e27cf9a5aef902d3fe3bf38f7208e397229761bc747594e25017e5b98083
3
  size 19859140
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:299bec1be8e3127922a76464b06da5f7e4edb847830ce04c77eb12728fc77775
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac5c70cc4249ce78f1de16abfd76c61b15f83edba2de982644545ea9b6120ee6
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c6151dd7288eb98bf063d6d1d782a1bec4b1d6191cf4071752734d494723af6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4083f9205f14e306d566b4a8d0cb4a12225c61d8b6993dde499f090442c7be50
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7607655502392344,
5
  "eval_steps": 500,
6
- "global_step": 159,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1120,6 +1120,356 @@
1120
  "learning_rate": 1.4104403846777909e-05,
1121
  "loss": 1.4913,
1122
  "step": 159
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1123
  }
1124
  ],
1125
  "logging_steps": 1,
@@ -1134,12 +1484,12 @@
1134
  "should_evaluate": false,
1135
  "should_log": false,
1136
  "should_save": true,
1137
- "should_training_stop": false
1138
  },
1139
  "attributes": {}
1140
  }
1141
  },
1142
- "total_flos": 2.1176676280958976e+16,
1143
  "train_batch_size": 4,
1144
  "trial_name": null,
1145
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 209,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1120
  "learning_rate": 1.4104403846777909e-05,
1121
  "loss": 1.4913,
1122
  "step": 159
1123
+ },
1124
+ {
1125
+ "epoch": 0.7655502392344498,
1126
+ "grad_norm": 0.2924661338329315,
1127
+ "learning_rate": 1.3572659544410494e-05,
1128
+ "loss": 1.5388,
1129
+ "step": 160
1130
+ },
1131
+ {
1132
+ "epoch": 0.7703349282296651,
1133
+ "grad_norm": 0.30911317467689514,
1134
+ "learning_rate": 1.3049554138967051e-05,
1135
+ "loss": 1.5633,
1136
+ "step": 161
1137
+ },
1138
+ {
1139
+ "epoch": 0.7751196172248804,
1140
+ "grad_norm": 0.31692859530448914,
1141
+ "learning_rate": 1.2535211687093535e-05,
1142
+ "loss": 1.5382,
1143
+ "step": 162
1144
+ },
1145
+ {
1146
+ "epoch": 0.7799043062200957,
1147
+ "grad_norm": 0.32245802879333496,
1148
+ "learning_rate": 1.202975416726464e-05,
1149
+ "loss": 1.5613,
1150
+ "step": 163
1151
+ },
1152
+ {
1153
+ "epoch": 0.784688995215311,
1154
+ "grad_norm": 0.3302960991859436,
1155
+ "learning_rate": 1.1533301450856054e-05,
1156
+ "loss": 1.4634,
1157
+ "step": 164
1158
+ },
1159
+ {
1160
+ "epoch": 0.7894736842105263,
1161
+ "grad_norm": 0.33953121304512024,
1162
+ "learning_rate": 1.1045971273716477e-05,
1163
+ "loss": 1.5284,
1164
+ "step": 165
1165
+ },
1166
+ {
1167
+ "epoch": 0.7942583732057417,
1168
+ "grad_norm": 0.3454139530658722,
1169
+ "learning_rate": 1.0567879208246084e-05,
1170
+ "loss": 1.5508,
1171
+ "step": 166
1172
+ },
1173
+ {
1174
+ "epoch": 0.7990430622009569,
1175
+ "grad_norm": 0.3438206613063812,
1176
+ "learning_rate": 1.0099138635988026e-05,
1177
+ "loss": 1.5568,
1178
+ "step": 167
1179
+ },
1180
+ {
1181
+ "epoch": 0.8038277511961722,
1182
+ "grad_norm": 0.3348008692264557,
1183
+ "learning_rate": 9.639860720739525e-06,
1184
+ "loss": 1.3964,
1185
+ "step": 168
1186
+ },
1187
+ {
1188
+ "epoch": 0.8086124401913876,
1189
+ "grad_norm": 0.33726564049720764,
1190
+ "learning_rate": 9.190154382188921e-06,
1191
+ "loss": 1.4585,
1192
+ "step": 169
1193
+ },
1194
+ {
1195
+ "epoch": 0.8133971291866029,
1196
+ "grad_norm": 0.3814249038696289,
1197
+ "learning_rate": 8.75012627008489e-06,
1198
+ "loss": 1.4147,
1199
+ "step": 170
1200
+ },
1201
+ {
1202
+ "epoch": 0.8181818181818182,
1203
+ "grad_norm": 0.3630184531211853,
1204
+ "learning_rate": 8.31988073894403e-06,
1205
+ "loss": 1.539,
1206
+ "step": 171
1207
+ },
1208
+ {
1209
+ "epoch": 0.8229665071770335,
1210
+ "grad_norm": 0.39133140444755554,
1211
+ "learning_rate": 7.899519823302743e-06,
1212
+ "loss": 1.7378,
1213
+ "step": 172
1214
+ },
1215
+ {
1216
+ "epoch": 0.8277511961722488,
1217
+ "grad_norm": 0.36332792043685913,
1218
+ "learning_rate": 7.489143213519301e-06,
1219
+ "loss": 1.5765,
1220
+ "step": 173
1221
+ },
1222
+ {
1223
+ "epoch": 0.8325358851674641,
1224
+ "grad_norm": 0.39158034324645996,
1225
+ "learning_rate": 7.088848232131861e-06,
1226
+ "loss": 1.5155,
1227
+ "step": 174
1228
+ },
1229
+ {
1230
+ "epoch": 0.8373205741626795,
1231
+ "grad_norm": 0.40499481558799744,
1232
+ "learning_rate": 6.698729810778065e-06,
1233
+ "loss": 1.5312,
1234
+ "step": 175
1235
+ },
1236
+ {
1237
+ "epoch": 0.8421052631578947,
1238
+ "grad_norm": 0.41012099385261536,
1239
+ "learning_rate": 6.318880467681526e-06,
1240
+ "loss": 1.7301,
1241
+ "step": 176
1242
+ },
1243
+ {
1244
+ "epoch": 0.84688995215311,
1245
+ "grad_norm": 0.4328974783420563,
1246
+ "learning_rate": 5.949390285710776e-06,
1247
+ "loss": 1.6651,
1248
+ "step": 177
1249
+ },
1250
+ {
1251
+ "epoch": 0.8516746411483254,
1252
+ "grad_norm": 0.4051513373851776,
1253
+ "learning_rate": 5.590346891015758e-06,
1254
+ "loss": 1.5258,
1255
+ "step": 178
1256
+ },
1257
+ {
1258
+ "epoch": 0.8564593301435407,
1259
+ "grad_norm": 0.49103522300720215,
1260
+ "learning_rate": 5.241835432246889e-06,
1261
+ "loss": 1.8134,
1262
+ "step": 179
1263
+ },
1264
+ {
1265
+ "epoch": 0.861244019138756,
1266
+ "grad_norm": 0.4615950286388397,
1267
+ "learning_rate": 4.903938560361698e-06,
1268
+ "loss": 1.5319,
1269
+ "step": 180
1270
+ },
1271
+ {
1272
+ "epoch": 0.8660287081339713,
1273
+ "grad_norm": 0.47922465205192566,
1274
+ "learning_rate": 4.576736409023813e-06,
1275
+ "loss": 1.7446,
1276
+ "step": 181
1277
+ },
1278
+ {
1279
+ "epoch": 0.8708133971291866,
1280
+ "grad_norm": 0.5150140523910522,
1281
+ "learning_rate": 4.260306575598949e-06,
1282
+ "loss": 1.6205,
1283
+ "step": 182
1284
+ },
1285
+ {
1286
+ "epoch": 0.8755980861244019,
1287
+ "grad_norm": 0.5273977518081665,
1288
+ "learning_rate": 3.954724102752316e-06,
1289
+ "loss": 1.7335,
1290
+ "step": 183
1291
+ },
1292
+ {
1293
+ "epoch": 0.8803827751196173,
1294
+ "grad_norm": 0.529906690120697,
1295
+ "learning_rate": 3.660061460651981e-06,
1296
+ "loss": 1.8156,
1297
+ "step": 184
1298
+ },
1299
+ {
1300
+ "epoch": 0.8851674641148325,
1301
+ "grad_norm": 0.5719719529151917,
1302
+ "learning_rate": 3.376388529782215e-06,
1303
+ "loss": 1.9336,
1304
+ "step": 185
1305
+ },
1306
+ {
1307
+ "epoch": 0.8899521531100478,
1308
+ "grad_norm": 0.5587736964225769,
1309
+ "learning_rate": 3.1037725843711062e-06,
1310
+ "loss": 1.7484,
1311
+ "step": 186
1312
+ },
1313
+ {
1314
+ "epoch": 0.8947368421052632,
1315
+ "grad_norm": 0.6443825364112854,
1316
+ "learning_rate": 2.842278276436128e-06,
1317
+ "loss": 2.0777,
1318
+ "step": 187
1319
+ },
1320
+ {
1321
+ "epoch": 0.8995215311004785,
1322
+ "grad_norm": 0.6428802609443665,
1323
+ "learning_rate": 2.591967620451707e-06,
1324
+ "loss": 1.8287,
1325
+ "step": 188
1326
+ },
1327
+ {
1328
+ "epoch": 0.9043062200956937,
1329
+ "grad_norm": 0.7180678844451904,
1330
+ "learning_rate": 2.3528999786421756e-06,
1331
+ "loss": 1.9069,
1332
+ "step": 189
1333
+ },
1334
+ {
1335
+ "epoch": 0.9090909090909091,
1336
+ "grad_norm": 0.6900708079338074,
1337
+ "learning_rate": 2.1251320469037827e-06,
1338
+ "loss": 1.9678,
1339
+ "step": 190
1340
+ },
1341
+ {
1342
+ "epoch": 0.9138755980861244,
1343
+ "grad_norm": 0.6777265667915344,
1344
+ "learning_rate": 1.908717841359048e-06,
1345
+ "loss": 1.7879,
1346
+ "step": 191
1347
+ },
1348
+ {
1349
+ "epoch": 0.9186602870813397,
1350
+ "grad_norm": 0.760741651058197,
1351
+ "learning_rate": 1.70370868554659e-06,
1352
+ "loss": 1.8427,
1353
+ "step": 192
1354
+ },
1355
+ {
1356
+ "epoch": 0.9234449760765551,
1357
+ "grad_norm": 0.776866614818573,
1358
+ "learning_rate": 1.5101531982495308e-06,
1359
+ "loss": 1.787,
1360
+ "step": 193
1361
+ },
1362
+ {
1363
+ "epoch": 0.9282296650717703,
1364
+ "grad_norm": 0.9523407816886902,
1365
+ "learning_rate": 1.328097281965357e-06,
1366
+ "loss": 1.978,
1367
+ "step": 194
1368
+ },
1369
+ {
1370
+ "epoch": 0.9330143540669856,
1371
+ "grad_norm": 0.9729501605033875,
1372
+ "learning_rate": 1.157584112019966e-06,
1373
+ "loss": 2.0224,
1374
+ "step": 195
1375
+ },
1376
+ {
1377
+ "epoch": 0.937799043062201,
1378
+ "grad_norm": 1.0963469743728638,
1379
+ "learning_rate": 9.986541263284077e-07,
1380
+ "loss": 1.7795,
1381
+ "step": 196
1382
+ },
1383
+ {
1384
+ "epoch": 0.9425837320574163,
1385
+ "grad_norm": 1.3530256748199463,
1386
+ "learning_rate": 8.513450158049108e-07,
1387
+ "loss": 1.9669,
1388
+ "step": 197
1389
+ },
1390
+ {
1391
+ "epoch": 0.9473684210526315,
1392
+ "grad_norm": 1.6831023693084717,
1393
+ "learning_rate": 7.156917154243048e-07,
1394
+ "loss": 1.9834,
1395
+ "step": 198
1396
+ },
1397
+ {
1398
+ "epoch": 0.9521531100478469,
1399
+ "grad_norm": 1.99193274974823,
1400
+ "learning_rate": 5.917263959370312e-07,
1401
+ "loss": 2.0608,
1402
+ "step": 199
1403
+ },
1404
+ {
1405
+ "epoch": 0.9569377990430622,
1406
+ "grad_norm": 2.2735681533813477,
1407
+ "learning_rate": 4.794784562397458e-07,
1408
+ "loss": 1.5297,
1409
+ "step": 200
1410
+ },
1411
+ {
1412
+ "epoch": 0.9617224880382775,
1413
+ "grad_norm": 0.24847882986068726,
1414
+ "learning_rate": 3.7897451640321323e-07,
1415
+ "loss": 1.4625,
1416
+ "step": 201
1417
+ },
1418
+ {
1419
+ "epoch": 0.9665071770334929,
1420
+ "grad_norm": 0.2849666178226471,
1421
+ "learning_rate": 2.902384113592782e-07,
1422
+ "loss": 1.7225,
1423
+ "step": 202
1424
+ },
1425
+ {
1426
+ "epoch": 0.9712918660287081,
1427
+ "grad_norm": 0.3164761960506439,
1428
+ "learning_rate": 2.1329118524827662e-07,
1429
+ "loss": 1.4438,
1430
+ "step": 203
1431
+ },
1432
+ {
1433
+ "epoch": 0.9760765550239234,
1434
+ "grad_norm": 0.34920385479927063,
1435
+ "learning_rate": 1.481510864283553e-07,
1436
+ "loss": 1.6457,
1437
+ "step": 204
1438
+ },
1439
+ {
1440
+ "epoch": 0.9808612440191388,
1441
+ "grad_norm": 0.42475542426109314,
1442
+ "learning_rate": 9.483356314779479e-08,
1443
+ "loss": 1.7947,
1444
+ "step": 205
1445
+ },
1446
+ {
1447
+ "epoch": 0.9856459330143541,
1448
+ "grad_norm": 0.5095606446266174,
1449
+ "learning_rate": 5.3351259881379014e-08,
1450
+ "loss": 1.8242,
1451
+ "step": 206
1452
+ },
1453
+ {
1454
+ "epoch": 0.9904306220095693,
1455
+ "grad_norm": 0.6890068054199219,
1456
+ "learning_rate": 2.371401433170495e-08,
1457
+ "loss": 2.0936,
1458
+ "step": 207
1459
+ },
1460
+ {
1461
+ "epoch": 0.9952153110047847,
1462
+ "grad_norm": 0.8535192608833313,
1463
+ "learning_rate": 5.928855096154484e-09,
1464
+ "loss": 1.5928,
1465
+ "step": 208
1466
+ },
1467
+ {
1468
+ "epoch": 1.0,
1469
+ "grad_norm": 1.58793306350708,
1470
+ "learning_rate": 0.0,
1471
+ "loss": 1.6603,
1472
+ "step": 209
1473
  }
1474
  ],
1475
  "logging_steps": 1,
 
1484
  "should_evaluate": false,
1485
  "should_log": false,
1486
  "should_save": true,
1487
+ "should_training_stop": true
1488
  },
1489
  "attributes": {}
1490
  }
1491
  },
1492
+ "total_flos": 2.769507127708877e+16,
1493
  "train_batch_size": 4,
1494
  "trial_name": null,
1495
  "trial_params": null