error577 commited on
Commit
680bf84
·
verified ·
1 Parent(s): 43e4779

Training in progress, step 180, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:189a826c21fcc12d4d822272c9376eb9989b7d2c52eae6a814ab821e7de631b0
3
  size 1579384
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:799556006953c73a45e90e797b58f63be6f530d019bb09a777b7155cf57980bb
3
  size 1579384
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d19d42888a784db5d3792889b6d90f80c505a9dbce4098d0ae5d04ef2168686c
3
  size 857274
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f7bacefd93d57704a5538284a108f017622551a8d596a1be59020121599ae7f
3
  size 857274
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f97b059151fb3e9951195534ddd111a9c4803b6431cdba0bb72c51bd50ade686
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7829b1f33d7acc9a2062209cdbdfebc987f17f530b32db2490f0d144949302ef
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d554bdcf1252a9ad2f8bf8ecd99330af5af219432c92d1857da98ea0ae84e1df
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ddea288df3ceca5e83f8bb3470637ce8a6718abdc75c29562146104222fc96c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.10564978746234163,
5
  "eval_steps": 20,
6
- "global_step": 160,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1199,6 +1199,154 @@
1199
  "eval_samples_per_second": 75.524,
1200
  "eval_steps_per_second": 75.524,
1201
  "step": 160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1202
  }
1203
  ],
1204
  "logging_steps": 1,
@@ -1218,7 +1366,7 @@
1218
  "attributes": {}
1219
  }
1220
  },
1221
- "total_flos": 105203669925888.0,
1222
  "train_batch_size": 1,
1223
  "trial_name": null,
1224
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.11885601089513433,
5
  "eval_steps": 20,
6
+ "global_step": 180,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1199
  "eval_samples_per_second": 75.524,
1200
  "eval_steps_per_second": 75.524,
1201
  "step": 160
1202
+ },
1203
+ {
1204
+ "epoch": 0.10631009863398126,
1205
+ "grad_norm": 17918.177734375,
1206
+ "learning_rate": 0.00014025709338548836,
1207
+ "loss": 90.484,
1208
+ "step": 161
1209
+ },
1210
+ {
1211
+ "epoch": 0.1069704098056209,
1212
+ "grad_norm": 16031.154296875,
1213
+ "learning_rate": 0.00013863616643207844,
1214
+ "loss": 100.7584,
1215
+ "step": 162
1216
+ },
1217
+ {
1218
+ "epoch": 0.10763072097726054,
1219
+ "grad_norm": 12420.103515625,
1220
+ "learning_rate": 0.00013701657307492235,
1221
+ "loss": 94.4867,
1222
+ "step": 163
1223
+ },
1224
+ {
1225
+ "epoch": 0.10829103214890017,
1226
+ "grad_norm": 47068.62109375,
1227
+ "learning_rate": 0.00013539850338048154,
1228
+ "loss": 103.3284,
1229
+ "step": 164
1230
+ },
1231
+ {
1232
+ "epoch": 0.10895134332053981,
1233
+ "grad_norm": 9929.9248046875,
1234
+ "learning_rate": 0.00013378214723640876,
1235
+ "loss": 86.4407,
1236
+ "step": 165
1237
+ },
1238
+ {
1239
+ "epoch": 0.10961165449217944,
1240
+ "grad_norm": 11515.26171875,
1241
+ "learning_rate": 0.00013216769432926404,
1242
+ "loss": 90.3954,
1243
+ "step": 166
1244
+ },
1245
+ {
1246
+ "epoch": 0.11027196566381907,
1247
+ "grad_norm": 18805.5,
1248
+ "learning_rate": 0.00013055533412225422,
1249
+ "loss": 87.9701,
1250
+ "step": 167
1251
+ },
1252
+ {
1253
+ "epoch": 0.11093227683545871,
1254
+ "grad_norm": 11308.2529296875,
1255
+ "learning_rate": 0.00012894525583299833,
1256
+ "loss": 85.8245,
1257
+ "step": 168
1258
+ },
1259
+ {
1260
+ "epoch": 0.11159258800709834,
1261
+ "grad_norm": 13893.7822265625,
1262
+ "learning_rate": 0.0001273376484113225,
1263
+ "loss": 91.9876,
1264
+ "step": 169
1265
+ },
1266
+ {
1267
+ "epoch": 0.11225289917873799,
1268
+ "grad_norm": 13449.5009765625,
1269
+ "learning_rate": 0.0001257327005170853,
1270
+ "loss": 95.4308,
1271
+ "step": 170
1272
+ },
1273
+ {
1274
+ "epoch": 0.11291321035037762,
1275
+ "grad_norm": 26069.693359375,
1276
+ "learning_rate": 0.00012413060049803814,
1277
+ "loss": 91.0174,
1278
+ "step": 171
1279
+ },
1280
+ {
1281
+ "epoch": 0.11357352152201725,
1282
+ "grad_norm": 36458.125,
1283
+ "learning_rate": 0.00012253153636772156,
1284
+ "loss": 104.676,
1285
+ "step": 172
1286
+ },
1287
+ {
1288
+ "epoch": 0.11423383269365689,
1289
+ "grad_norm": 235943.09375,
1290
+ "learning_rate": 0.00012093569578340124,
1291
+ "loss": 402.9973,
1292
+ "step": 173
1293
+ },
1294
+ {
1295
+ "epoch": 0.11489414386529652,
1296
+ "grad_norm": 85501.4453125,
1297
+ "learning_rate": 0.00011934326602404528,
1298
+ "loss": 432.0625,
1299
+ "step": 174
1300
+ },
1301
+ {
1302
+ "epoch": 0.11555445503693616,
1303
+ "grad_norm": 311014.3125,
1304
+ "learning_rate": 0.00011775443396834638,
1305
+ "loss": 491.4307,
1306
+ "step": 175
1307
+ },
1308
+ {
1309
+ "epoch": 0.11621476620857579,
1310
+ "grad_norm": 644604.875,
1311
+ "learning_rate": 0.00011616938607279086,
1312
+ "loss": 401.1875,
1313
+ "step": 176
1314
+ },
1315
+ {
1316
+ "epoch": 0.11687507738021542,
1317
+ "grad_norm": 140106.71875,
1318
+ "learning_rate": 0.00011458830834977698,
1319
+ "loss": 451.2969,
1320
+ "step": 177
1321
+ },
1322
+ {
1323
+ "epoch": 0.11753538855185507,
1324
+ "grad_norm": 374728.0,
1325
+ "learning_rate": 0.0001130113863457857,
1326
+ "loss": 370.6055,
1327
+ "step": 178
1328
+ },
1329
+ {
1330
+ "epoch": 0.1181956997234947,
1331
+ "grad_norm": 1075288.75,
1332
+ "learning_rate": 0.00011143880511960584,
1333
+ "loss": 460.9453,
1334
+ "step": 179
1335
+ },
1336
+ {
1337
+ "epoch": 0.11885601089513433,
1338
+ "grad_norm": 139839.53125,
1339
+ "learning_rate": 0.00010987074922061689,
1340
+ "loss": 257.9727,
1341
+ "step": 180
1342
+ },
1343
+ {
1344
+ "epoch": 0.11885601089513433,
1345
+ "eval_loss": 9.81284236907959,
1346
+ "eval_runtime": 6.5972,
1347
+ "eval_samples_per_second": 75.032,
1348
+ "eval_steps_per_second": 75.032,
1349
+ "step": 180
1350
  }
1351
  ],
1352
  "logging_steps": 1,
 
1366
  "attributes": {}
1367
  }
1368
  },
1369
+ "total_flos": 120975595143168.0,
1370
  "train_batch_size": 1,
1371
  "trial_name": null,
1372
  "trial_params": null