whiteapple8222 commited on
Commit
fa8bba6
·
verified ·
1 Parent(s): 3a83561

Training in progress, step 209, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dee75951899355ab4767e3c2e79760dab1ba10394af8381b63df31cd287be9f0
3
  size 36981072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:596dd3d653f65dbf33f78de413c736e9c579f229a7fca10711c61757995a6915
3
  size 36981072
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df4e95c0e60f3e0684c125f9c39e0736e583ec69441d200d0ee7bf9fff117d73
3
  size 19859140
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3de72dbd2b05ef0e8f06f2955a324f494281230d2548ec60ea57db9889162d43
3
  size 19859140
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:299bec1be8e3127922a76464b06da5f7e4edb847830ce04c77eb12728fc77775
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac5c70cc4249ce78f1de16abfd76c61b15f83edba2de982644545ea9b6120ee6
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c6151dd7288eb98bf063d6d1d782a1bec4b1d6191cf4071752734d494723af6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4083f9205f14e306d566b4a8d0cb4a12225c61d8b6993dde499f090442c7be50
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7607655502392344,
5
  "eval_steps": 500,
6
- "global_step": 159,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1120,6 +1120,356 @@
1120
  "learning_rate": 1.4104403846777909e-05,
1121
  "loss": 1.4903,
1122
  "step": 159
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1123
  }
1124
  ],
1125
  "logging_steps": 1,
@@ -1134,12 +1484,12 @@
1134
  "should_evaluate": false,
1135
  "should_log": false,
1136
  "should_save": true,
1137
- "should_training_stop": false
1138
  },
1139
  "attributes": {}
1140
  }
1141
  },
1142
- "total_flos": 2.1176676280958976e+16,
1143
  "train_batch_size": 4,
1144
  "trial_name": null,
1145
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 209,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1120
  "learning_rate": 1.4104403846777909e-05,
1121
  "loss": 1.4903,
1122
  "step": 159
1123
+ },
1124
+ {
1125
+ "epoch": 0.7655502392344498,
1126
+ "grad_norm": 0.2952757477760315,
1127
+ "learning_rate": 1.3572659544410494e-05,
1128
+ "loss": 1.5378,
1129
+ "step": 160
1130
+ },
1131
+ {
1132
+ "epoch": 0.7703349282296651,
1133
+ "grad_norm": 0.311739057302475,
1134
+ "learning_rate": 1.3049554138967051e-05,
1135
+ "loss": 1.5632,
1136
+ "step": 161
1137
+ },
1138
+ {
1139
+ "epoch": 0.7751196172248804,
1140
+ "grad_norm": 0.32148003578186035,
1141
+ "learning_rate": 1.2535211687093535e-05,
1142
+ "loss": 1.5378,
1143
+ "step": 162
1144
+ },
1145
+ {
1146
+ "epoch": 0.7799043062200957,
1147
+ "grad_norm": 0.326665461063385,
1148
+ "learning_rate": 1.202975416726464e-05,
1149
+ "loss": 1.5604,
1150
+ "step": 163
1151
+ },
1152
+ {
1153
+ "epoch": 0.784688995215311,
1154
+ "grad_norm": 0.33070483803749084,
1155
+ "learning_rate": 1.1533301450856054e-05,
1156
+ "loss": 1.4631,
1157
+ "step": 164
1158
+ },
1159
+ {
1160
+ "epoch": 0.7894736842105263,
1161
+ "grad_norm": 0.34155699610710144,
1162
+ "learning_rate": 1.1045971273716477e-05,
1163
+ "loss": 1.53,
1164
+ "step": 165
1165
+ },
1166
+ {
1167
+ "epoch": 0.7942583732057417,
1168
+ "grad_norm": 0.3466791808605194,
1169
+ "learning_rate": 1.0567879208246084e-05,
1170
+ "loss": 1.5503,
1171
+ "step": 166
1172
+ },
1173
+ {
1174
+ "epoch": 0.7990430622009569,
1175
+ "grad_norm": 0.3450649082660675,
1176
+ "learning_rate": 1.0099138635988026e-05,
1177
+ "loss": 1.5562,
1178
+ "step": 167
1179
+ },
1180
+ {
1181
+ "epoch": 0.8038277511961722,
1182
+ "grad_norm": 0.3394903242588043,
1183
+ "learning_rate": 9.639860720739525e-06,
1184
+ "loss": 1.3966,
1185
+ "step": 168
1186
+ },
1187
+ {
1188
+ "epoch": 0.8086124401913876,
1189
+ "grad_norm": 0.34013882279396057,
1190
+ "learning_rate": 9.190154382188921e-06,
1191
+ "loss": 1.4578,
1192
+ "step": 169
1193
+ },
1194
+ {
1195
+ "epoch": 0.8133971291866029,
1196
+ "grad_norm": 0.40502288937568665,
1197
+ "learning_rate": 8.75012627008489e-06,
1198
+ "loss": 1.4135,
1199
+ "step": 170
1200
+ },
1201
+ {
1202
+ "epoch": 0.8181818181818182,
1203
+ "grad_norm": 0.3667527437210083,
1204
+ "learning_rate": 8.31988073894403e-06,
1205
+ "loss": 1.539,
1206
+ "step": 171
1207
+ },
1208
+ {
1209
+ "epoch": 0.8229665071770335,
1210
+ "grad_norm": 0.39868226647377014,
1211
+ "learning_rate": 7.899519823302743e-06,
1212
+ "loss": 1.7387,
1213
+ "step": 172
1214
+ },
1215
+ {
1216
+ "epoch": 0.8277511961722488,
1217
+ "grad_norm": 0.3658325672149658,
1218
+ "learning_rate": 7.489143213519301e-06,
1219
+ "loss": 1.5761,
1220
+ "step": 173
1221
+ },
1222
+ {
1223
+ "epoch": 0.8325358851674641,
1224
+ "grad_norm": 0.3974853456020355,
1225
+ "learning_rate": 7.088848232131861e-06,
1226
+ "loss": 1.5148,
1227
+ "step": 174
1228
+ },
1229
+ {
1230
+ "epoch": 0.8373205741626795,
1231
+ "grad_norm": 0.4094543755054474,
1232
+ "learning_rate": 6.698729810778065e-06,
1233
+ "loss": 1.5331,
1234
+ "step": 175
1235
+ },
1236
+ {
1237
+ "epoch": 0.8421052631578947,
1238
+ "grad_norm": 0.4123484790325165,
1239
+ "learning_rate": 6.318880467681526e-06,
1240
+ "loss": 1.7296,
1241
+ "step": 176
1242
+ },
1243
+ {
1244
+ "epoch": 0.84688995215311,
1245
+ "grad_norm": 0.44035616517066956,
1246
+ "learning_rate": 5.949390285710776e-06,
1247
+ "loss": 1.6646,
1248
+ "step": 177
1249
+ },
1250
+ {
1251
+ "epoch": 0.8516746411483254,
1252
+ "grad_norm": 0.40625545382499695,
1253
+ "learning_rate": 5.590346891015758e-06,
1254
+ "loss": 1.5258,
1255
+ "step": 178
1256
+ },
1257
+ {
1258
+ "epoch": 0.8564593301435407,
1259
+ "grad_norm": 0.5013072490692139,
1260
+ "learning_rate": 5.241835432246889e-06,
1261
+ "loss": 1.8122,
1262
+ "step": 179
1263
+ },
1264
+ {
1265
+ "epoch": 0.861244019138756,
1266
+ "grad_norm": 0.4660516083240509,
1267
+ "learning_rate": 4.903938560361698e-06,
1268
+ "loss": 1.5325,
1269
+ "step": 180
1270
+ },
1271
+ {
1272
+ "epoch": 0.8660287081339713,
1273
+ "grad_norm": 0.48506811261177063,
1274
+ "learning_rate": 4.576736409023813e-06,
1275
+ "loss": 1.7429,
1276
+ "step": 181
1277
+ },
1278
+ {
1279
+ "epoch": 0.8708133971291866,
1280
+ "grad_norm": 0.5250177979469299,
1281
+ "learning_rate": 4.260306575598949e-06,
1282
+ "loss": 1.6214,
1283
+ "step": 182
1284
+ },
1285
+ {
1286
+ "epoch": 0.8755980861244019,
1287
+ "grad_norm": 0.5322927236557007,
1288
+ "learning_rate": 3.954724102752316e-06,
1289
+ "loss": 1.732,
1290
+ "step": 183
1291
+ },
1292
+ {
1293
+ "epoch": 0.8803827751196173,
1294
+ "grad_norm": 0.5320829153060913,
1295
+ "learning_rate": 3.660061460651981e-06,
1296
+ "loss": 1.8157,
1297
+ "step": 184
1298
+ },
1299
+ {
1300
+ "epoch": 0.8851674641148325,
1301
+ "grad_norm": 0.5766329169273376,
1302
+ "learning_rate": 3.376388529782215e-06,
1303
+ "loss": 1.9319,
1304
+ "step": 185
1305
+ },
1306
+ {
1307
+ "epoch": 0.8899521531100478,
1308
+ "grad_norm": 0.5621324777603149,
1309
+ "learning_rate": 3.1037725843711062e-06,
1310
+ "loss": 1.7481,
1311
+ "step": 186
1312
+ },
1313
+ {
1314
+ "epoch": 0.8947368421052632,
1315
+ "grad_norm": 0.654734194278717,
1316
+ "learning_rate": 2.842278276436128e-06,
1317
+ "loss": 2.079,
1318
+ "step": 187
1319
+ },
1320
+ {
1321
+ "epoch": 0.8995215311004785,
1322
+ "grad_norm": 0.6522572636604309,
1323
+ "learning_rate": 2.591967620451707e-06,
1324
+ "loss": 1.8302,
1325
+ "step": 188
1326
+ },
1327
+ {
1328
+ "epoch": 0.9043062200956937,
1329
+ "grad_norm": 0.7326610088348389,
1330
+ "learning_rate": 2.3528999786421756e-06,
1331
+ "loss": 1.9044,
1332
+ "step": 189
1333
+ },
1334
+ {
1335
+ "epoch": 0.9090909090909091,
1336
+ "grad_norm": 0.7005221247673035,
1337
+ "learning_rate": 2.1251320469037827e-06,
1338
+ "loss": 1.9704,
1339
+ "step": 190
1340
+ },
1341
+ {
1342
+ "epoch": 0.9138755980861244,
1343
+ "grad_norm": 0.6769828200340271,
1344
+ "learning_rate": 1.908717841359048e-06,
1345
+ "loss": 1.7863,
1346
+ "step": 191
1347
+ },
1348
+ {
1349
+ "epoch": 0.9186602870813397,
1350
+ "grad_norm": 0.7701701521873474,
1351
+ "learning_rate": 1.70370868554659e-06,
1352
+ "loss": 1.8417,
1353
+ "step": 192
1354
+ },
1355
+ {
1356
+ "epoch": 0.9234449760765551,
1357
+ "grad_norm": 0.781731903553009,
1358
+ "learning_rate": 1.5101531982495308e-06,
1359
+ "loss": 1.7861,
1360
+ "step": 193
1361
+ },
1362
+ {
1363
+ "epoch": 0.9282296650717703,
1364
+ "grad_norm": 0.9508692026138306,
1365
+ "learning_rate": 1.328097281965357e-06,
1366
+ "loss": 1.975,
1367
+ "step": 194
1368
+ },
1369
+ {
1370
+ "epoch": 0.9330143540669856,
1371
+ "grad_norm": 0.9699769616127014,
1372
+ "learning_rate": 1.157584112019966e-06,
1373
+ "loss": 2.0208,
1374
+ "step": 195
1375
+ },
1376
+ {
1377
+ "epoch": 0.937799043062201,
1378
+ "grad_norm": 1.1134250164031982,
1379
+ "learning_rate": 9.986541263284077e-07,
1380
+ "loss": 1.7791,
1381
+ "step": 196
1382
+ },
1383
+ {
1384
+ "epoch": 0.9425837320574163,
1385
+ "grad_norm": 1.3487857580184937,
1386
+ "learning_rate": 8.513450158049108e-07,
1387
+ "loss": 1.9681,
1388
+ "step": 197
1389
+ },
1390
+ {
1391
+ "epoch": 0.9473684210526315,
1392
+ "grad_norm": 1.709845781326294,
1393
+ "learning_rate": 7.156917154243048e-07,
1394
+ "loss": 1.983,
1395
+ "step": 198
1396
+ },
1397
+ {
1398
+ "epoch": 0.9521531100478469,
1399
+ "grad_norm": 2.0014631748199463,
1400
+ "learning_rate": 5.917263959370312e-07,
1401
+ "loss": 2.0637,
1402
+ "step": 199
1403
+ },
1404
+ {
1405
+ "epoch": 0.9569377990430622,
1406
+ "grad_norm": 2.2905828952789307,
1407
+ "learning_rate": 4.794784562397458e-07,
1408
+ "loss": 1.5265,
1409
+ "step": 200
1410
+ },
1411
+ {
1412
+ "epoch": 0.9617224880382775,
1413
+ "grad_norm": 0.2522173821926117,
1414
+ "learning_rate": 3.7897451640321323e-07,
1415
+ "loss": 1.4622,
1416
+ "step": 201
1417
+ },
1418
+ {
1419
+ "epoch": 0.9665071770334929,
1420
+ "grad_norm": 0.2915673851966858,
1421
+ "learning_rate": 2.902384113592782e-07,
1422
+ "loss": 1.7226,
1423
+ "step": 202
1424
+ },
1425
+ {
1426
+ "epoch": 0.9712918660287081,
1427
+ "grad_norm": 0.31829988956451416,
1428
+ "learning_rate": 2.1329118524827662e-07,
1429
+ "loss": 1.444,
1430
+ "step": 203
1431
+ },
1432
+ {
1433
+ "epoch": 0.9760765550239234,
1434
+ "grad_norm": 0.35566747188568115,
1435
+ "learning_rate": 1.481510864283553e-07,
1436
+ "loss": 1.6455,
1437
+ "step": 204
1438
+ },
1439
+ {
1440
+ "epoch": 0.9808612440191388,
1441
+ "grad_norm": 0.43001261353492737,
1442
+ "learning_rate": 9.483356314779479e-08,
1443
+ "loss": 1.7939,
1444
+ "step": 205
1445
+ },
1446
+ {
1447
+ "epoch": 0.9856459330143541,
1448
+ "grad_norm": 0.5118626356124878,
1449
+ "learning_rate": 5.3351259881379014e-08,
1450
+ "loss": 1.823,
1451
+ "step": 206
1452
+ },
1453
+ {
1454
+ "epoch": 0.9904306220095693,
1455
+ "grad_norm": 0.6911094784736633,
1456
+ "learning_rate": 2.371401433170495e-08,
1457
+ "loss": 2.0949,
1458
+ "step": 207
1459
+ },
1460
+ {
1461
+ "epoch": 0.9952153110047847,
1462
+ "grad_norm": 0.847305953502655,
1463
+ "learning_rate": 5.928855096154484e-09,
1464
+ "loss": 1.5913,
1465
+ "step": 208
1466
+ },
1467
+ {
1468
+ "epoch": 1.0,
1469
+ "grad_norm": 1.5929991006851196,
1470
+ "learning_rate": 0.0,
1471
+ "loss": 1.6612,
1472
+ "step": 209
1473
  }
1474
  ],
1475
  "logging_steps": 1,
 
1484
  "should_evaluate": false,
1485
  "should_log": false,
1486
  "should_save": true,
1487
+ "should_training_stop": true
1488
  },
1489
  "attributes": {}
1490
  }
1491
  },
1492
+ "total_flos": 2.769507127708877e+16,
1493
  "train_batch_size": 4,
1494
  "trial_name": null,
1495
  "trial_params": null