whiteapple8222 commited on
Commit
77048db
·
verified ·
1 Parent(s): eb79e47

Training in progress, step 1650, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99453ce3647f7f67c815e24c7ef6884d87966f6fde3dc864d3fdeb8ceb5402dc
3
  size 131146352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6029a505f307e3098b30acc19cdd7ba452e55709d8c353bc4a3f4f8ba146e277
3
  size 131146352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4122d97521c4b1ac8196676b5c97171f0ca5b898a992617d05c09f326323fb87
3
  size 67210516
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e748695332f398e0372a0342f533eda6dda257cbd0c6ff0c31662fd1d9df830
3
  size 67210516
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23d65b844605b218e76224da81a61b12185a4cb03dea7c30d549d1f1f09b6639
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2eb5cad9f3cbd36a844058d2bce505f26319b38c69d84d8607ffff4425c91e1
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9dcc60aab8f1774b8f7c01957b9e8831a30bbaac431674250ca87e625af546e7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03bf9dc9befb01615f74b39d2b43ebf93f55dc1a1259dddadf80e9de69443c5a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.04297840335231546,
5
  "eval_steps": 500,
6
- "global_step": 1600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -11207,6 +11207,356 @@
11207
  "learning_rate": 9.99999984307167e-05,
11208
  "loss": 3.5878,
11209
  "step": 1600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11210
  }
11211
  ],
11212
  "logging_steps": 1,
@@ -11226,7 +11576,7 @@
11226
  "attributes": {}
11227
  }
11228
  },
11229
- "total_flos": 2.18358464643072e+18,
11230
  "train_batch_size": 4,
11231
  "trial_name": null,
11232
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.04432147845707532,
5
  "eval_steps": 500,
6
+ "global_step": 1650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
11207
  "learning_rate": 9.99999984307167e-05,
11208
  "loss": 3.5878,
11209
  "step": 1600
11210
+ },
11211
+ {
11212
+ "epoch": 0.04300526485441066,
11213
+ "grad_norm": 1.3168871402740479,
11214
+ "learning_rate": 9.999999842874834e-05,
11215
+ "loss": 2.8627,
11216
+ "step": 1601
11217
+ },
11218
+ {
11219
+ "epoch": 0.043032126356505856,
11220
+ "grad_norm": 1.3844327926635742,
11221
+ "learning_rate": 9.999999842677874e-05,
11222
+ "loss": 2.8701,
11223
+ "step": 1602
11224
+ },
11225
+ {
11226
+ "epoch": 0.04305898785860105,
11227
+ "grad_norm": 1.3635506629943848,
11228
+ "learning_rate": 9.999999842480789e-05,
11229
+ "loss": 2.6733,
11230
+ "step": 1603
11231
+ },
11232
+ {
11233
+ "epoch": 0.04308584936069625,
11234
+ "grad_norm": 1.3588570356369019,
11235
+ "learning_rate": 9.999999842283582e-05,
11236
+ "loss": 2.8806,
11237
+ "step": 1604
11238
+ },
11239
+ {
11240
+ "epoch": 0.04311271086279145,
11241
+ "grad_norm": 1.3737713098526,
11242
+ "learning_rate": 9.999999842086252e-05,
11243
+ "loss": 2.9025,
11244
+ "step": 1605
11245
+ },
11246
+ {
11247
+ "epoch": 0.04313957236488664,
11248
+ "grad_norm": 1.40369713306427,
11249
+ "learning_rate": 9.999999841888799e-05,
11250
+ "loss": 2.8484,
11251
+ "step": 1606
11252
+ },
11253
+ {
11254
+ "epoch": 0.043166433866981845,
11255
+ "grad_norm": 1.2884548902511597,
11256
+ "learning_rate": 9.99999984169122e-05,
11257
+ "loss": 2.7603,
11258
+ "step": 1607
11259
+ },
11260
+ {
11261
+ "epoch": 0.04319329536907704,
11262
+ "grad_norm": 1.351904273033142,
11263
+ "learning_rate": 9.99999984149352e-05,
11264
+ "loss": 3.0526,
11265
+ "step": 1608
11266
+ },
11267
+ {
11268
+ "epoch": 0.043220156871172234,
11269
+ "grad_norm": 1.4556187391281128,
11270
+ "learning_rate": 9.999999841295697e-05,
11271
+ "loss": 3.0934,
11272
+ "step": 1609
11273
+ },
11274
+ {
11275
+ "epoch": 0.043247018373267436,
11276
+ "grad_norm": 1.3308874368667603,
11277
+ "learning_rate": 9.999999841097748e-05,
11278
+ "loss": 2.7632,
11279
+ "step": 1610
11280
+ },
11281
+ {
11282
+ "epoch": 0.04327387987536263,
11283
+ "grad_norm": 1.3838894367218018,
11284
+ "learning_rate": 9.999999840899678e-05,
11285
+ "loss": 3.0159,
11286
+ "step": 1611
11287
+ },
11288
+ {
11289
+ "epoch": 0.043300741377457826,
11290
+ "grad_norm": 1.504431962966919,
11291
+ "learning_rate": 9.999999840701483e-05,
11292
+ "loss": 3.0511,
11293
+ "step": 1612
11294
+ },
11295
+ {
11296
+ "epoch": 0.04332760287955303,
11297
+ "grad_norm": 1.2878671884536743,
11298
+ "learning_rate": 9.999999840503167e-05,
11299
+ "loss": 2.8596,
11300
+ "step": 1613
11301
+ },
11302
+ {
11303
+ "epoch": 0.04335446438164822,
11304
+ "grad_norm": 1.3833988904953003,
11305
+ "learning_rate": 9.999999840304725e-05,
11306
+ "loss": 2.7807,
11307
+ "step": 1614
11308
+ },
11309
+ {
11310
+ "epoch": 0.04338132588374342,
11311
+ "grad_norm": 1.3519421815872192,
11312
+ "learning_rate": 9.999999840106163e-05,
11313
+ "loss": 2.7777,
11314
+ "step": 1615
11315
+ },
11316
+ {
11317
+ "epoch": 0.04340818738583862,
11318
+ "grad_norm": 1.4081404209136963,
11319
+ "learning_rate": 9.999999839907475e-05,
11320
+ "loss": 2.7198,
11321
+ "step": 1616
11322
+ },
11323
+ {
11324
+ "epoch": 0.043435048887933814,
11325
+ "grad_norm": 1.3711954355239868,
11326
+ "learning_rate": 9.999999839708662e-05,
11327
+ "loss": 2.717,
11328
+ "step": 1617
11329
+ },
11330
+ {
11331
+ "epoch": 0.04346191039002901,
11332
+ "grad_norm": 1.3935779333114624,
11333
+ "learning_rate": 9.999999839509728e-05,
11334
+ "loss": 3.0255,
11335
+ "step": 1618
11336
+ },
11337
+ {
11338
+ "epoch": 0.04348877189212421,
11339
+ "grad_norm": 1.4566105604171753,
11340
+ "learning_rate": 9.99999983931067e-05,
11341
+ "loss": 3.1218,
11342
+ "step": 1619
11343
+ },
11344
+ {
11345
+ "epoch": 0.043515633394219405,
11346
+ "grad_norm": 1.4367201328277588,
11347
+ "learning_rate": 9.99999983911149e-05,
11348
+ "loss": 3.0931,
11349
+ "step": 1620
11350
+ },
11351
+ {
11352
+ "epoch": 0.0435424948963146,
11353
+ "grad_norm": 1.390453815460205,
11354
+ "learning_rate": 9.999999838912187e-05,
11355
+ "loss": 2.903,
11356
+ "step": 1621
11357
+ },
11358
+ {
11359
+ "epoch": 0.0435693563984098,
11360
+ "grad_norm": 1.2955039739608765,
11361
+ "learning_rate": 9.999999838712759e-05,
11362
+ "loss": 2.9835,
11363
+ "step": 1622
11364
+ },
11365
+ {
11366
+ "epoch": 0.043596217900505,
11367
+ "grad_norm": 1.5156861543655396,
11368
+ "learning_rate": 9.999999838513207e-05,
11369
+ "loss": 3.1388,
11370
+ "step": 1623
11371
+ },
11372
+ {
11373
+ "epoch": 0.04362307940260019,
11374
+ "grad_norm": 1.4825620651245117,
11375
+ "learning_rate": 9.999999838313532e-05,
11376
+ "loss": 3.1583,
11377
+ "step": 1624
11378
+ },
11379
+ {
11380
+ "epoch": 0.04364994090469539,
11381
+ "grad_norm": 1.369698166847229,
11382
+ "learning_rate": 9.999999838113734e-05,
11383
+ "loss": 2.9362,
11384
+ "step": 1625
11385
+ },
11386
+ {
11387
+ "epoch": 0.04367680240679059,
11388
+ "grad_norm": 1.3391295671463013,
11389
+ "learning_rate": 9.999999837913813e-05,
11390
+ "loss": 2.8266,
11391
+ "step": 1626
11392
+ },
11393
+ {
11394
+ "epoch": 0.04370366390888578,
11395
+ "grad_norm": 1.4127203226089478,
11396
+ "learning_rate": 9.999999837713768e-05,
11397
+ "loss": 2.827,
11398
+ "step": 1627
11399
+ },
11400
+ {
11401
+ "epoch": 0.043730525410980985,
11402
+ "grad_norm": 1.542043685913086,
11403
+ "learning_rate": 9.999999837513601e-05,
11404
+ "loss": 3.0887,
11405
+ "step": 1628
11406
+ },
11407
+ {
11408
+ "epoch": 0.04375738691307618,
11409
+ "grad_norm": 1.513839602470398,
11410
+ "learning_rate": 9.99999983731331e-05,
11411
+ "loss": 3.0853,
11412
+ "step": 1629
11413
+ },
11414
+ {
11415
+ "epoch": 0.043784248415171374,
11416
+ "grad_norm": 1.4729801416397095,
11417
+ "learning_rate": 9.999999837112895e-05,
11418
+ "loss": 2.98,
11419
+ "step": 1630
11420
+ },
11421
+ {
11422
+ "epoch": 0.043811109917266576,
11423
+ "grad_norm": 1.509283185005188,
11424
+ "learning_rate": 9.999999836912355e-05,
11425
+ "loss": 3.2404,
11426
+ "step": 1631
11427
+ },
11428
+ {
11429
+ "epoch": 0.04383797141936177,
11430
+ "grad_norm": 1.543927550315857,
11431
+ "learning_rate": 9.999999836711694e-05,
11432
+ "loss": 3.119,
11433
+ "step": 1632
11434
+ },
11435
+ {
11436
+ "epoch": 0.043864832921456966,
11437
+ "grad_norm": 1.5025025606155396,
11438
+ "learning_rate": 9.999999836510909e-05,
11439
+ "loss": 3.1039,
11440
+ "step": 1633
11441
+ },
11442
+ {
11443
+ "epoch": 0.04389169442355217,
11444
+ "grad_norm": 1.3757987022399902,
11445
+ "learning_rate": 9.999999836310001e-05,
11446
+ "loss": 2.8637,
11447
+ "step": 1634
11448
+ },
11449
+ {
11450
+ "epoch": 0.04391855592564736,
11451
+ "grad_norm": 1.5544795989990234,
11452
+ "learning_rate": 9.99999983610897e-05,
11453
+ "loss": 3.0113,
11454
+ "step": 1635
11455
+ },
11456
+ {
11457
+ "epoch": 0.04394541742774256,
11458
+ "grad_norm": 1.4375749826431274,
11459
+ "learning_rate": 9.999999835907815e-05,
11460
+ "loss": 2.9539,
11461
+ "step": 1636
11462
+ },
11463
+ {
11464
+ "epoch": 0.04397227892983776,
11465
+ "grad_norm": 1.557188868522644,
11466
+ "learning_rate": 9.999999835706537e-05,
11467
+ "loss": 3.1977,
11468
+ "step": 1637
11469
+ },
11470
+ {
11471
+ "epoch": 0.043999140431932954,
11472
+ "grad_norm": 1.6400185823440552,
11473
+ "learning_rate": 9.999999835505136e-05,
11474
+ "loss": 3.1995,
11475
+ "step": 1638
11476
+ },
11477
+ {
11478
+ "epoch": 0.04402600193402815,
11479
+ "grad_norm": 1.4655009508132935,
11480
+ "learning_rate": 9.99999983530361e-05,
11481
+ "loss": 2.9985,
11482
+ "step": 1639
11483
+ },
11484
+ {
11485
+ "epoch": 0.04405286343612335,
11486
+ "grad_norm": 1.4965097904205322,
11487
+ "learning_rate": 9.999999835101961e-05,
11488
+ "loss": 3.1393,
11489
+ "step": 1640
11490
+ },
11491
+ {
11492
+ "epoch": 0.044079724938218545,
11493
+ "grad_norm": 1.5797890424728394,
11494
+ "learning_rate": 9.999999834900189e-05,
11495
+ "loss": 3.0014,
11496
+ "step": 1641
11497
+ },
11498
+ {
11499
+ "epoch": 0.04410658644031374,
11500
+ "grad_norm": 1.567730188369751,
11501
+ "learning_rate": 9.999999834698295e-05,
11502
+ "loss": 2.9215,
11503
+ "step": 1642
11504
+ },
11505
+ {
11506
+ "epoch": 0.04413344794240894,
11507
+ "grad_norm": 1.6687631607055664,
11508
+ "learning_rate": 9.999999834496276e-05,
11509
+ "loss": 3.2531,
11510
+ "step": 1643
11511
+ },
11512
+ {
11513
+ "epoch": 0.04416030944450414,
11514
+ "grad_norm": 1.6272687911987305,
11515
+ "learning_rate": 9.999999834294133e-05,
11516
+ "loss": 3.3399,
11517
+ "step": 1644
11518
+ },
11519
+ {
11520
+ "epoch": 0.04418717094659933,
11521
+ "grad_norm": 1.6043704748153687,
11522
+ "learning_rate": 9.999999834091869e-05,
11523
+ "loss": 3.0772,
11524
+ "step": 1645
11525
+ },
11526
+ {
11527
+ "epoch": 0.04421403244869453,
11528
+ "grad_norm": 1.6957578659057617,
11529
+ "learning_rate": 9.999999833889479e-05,
11530
+ "loss": 3.3066,
11531
+ "step": 1646
11532
+ },
11533
+ {
11534
+ "epoch": 0.04424089395078973,
11535
+ "grad_norm": 1.7268688678741455,
11536
+ "learning_rate": 9.999999833686968e-05,
11537
+ "loss": 3.3125,
11538
+ "step": 1647
11539
+ },
11540
+ {
11541
+ "epoch": 0.04426775545288492,
11542
+ "grad_norm": 1.7406071424484253,
11543
+ "learning_rate": 9.999999833484333e-05,
11544
+ "loss": 3.2264,
11545
+ "step": 1648
11546
+ },
11547
+ {
11548
+ "epoch": 0.044294616954980125,
11549
+ "grad_norm": 1.8623542785644531,
11550
+ "learning_rate": 9.999999833281574e-05,
11551
+ "loss": 3.3522,
11552
+ "step": 1649
11553
+ },
11554
+ {
11555
+ "epoch": 0.04432147845707532,
11556
+ "grad_norm": 1.8407955169677734,
11557
+ "learning_rate": 9.999999833078691e-05,
11558
+ "loss": 3.3308,
11559
+ "step": 1650
11560
  }
11561
  ],
11562
  "logging_steps": 1,
 
11576
  "attributes": {}
11577
  }
11578
  },
11579
+ "total_flos": 2.2518960104669184e+18,
11580
  "train_batch_size": 4,
11581
  "trial_name": null,
11582
  "trial_params": null