Training in progress, step 1700, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 131146352
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19c29d19ae137d8d9f6d075009b38f48ede5ee69b99c1d40e24e93602a5e42c2
|
3 |
size 131146352
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 67210516
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:53f02bf536b4b6c5ec995aa54e017724d8fb20c583e62cdd2047ff066ff5e86d
|
3 |
size 67210516
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a19b20244c2b147c4db6331bfcd526d49838499d3bb5d4e14d3a604b49d4cdc4
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c12a87648583ab623d82ad394450eed417016fd7c996ca538887d063a9458228
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -11557,6 +11557,356 @@
|
|
11557 |
"learning_rate": 9.999999833078691e-05,
|
11558 |
"loss": 3.3308,
|
11559 |
"step": 1650
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11560 |
}
|
11561 |
],
|
11562 |
"logging_steps": 1,
|
@@ -11576,7 +11926,7 @@
|
|
11576 |
"attributes": {}
|
11577 |
}
|
11578 |
},
|
11579 |
-
"total_flos": 2.
|
11580 |
"train_batch_size": 4,
|
11581 |
"trial_name": null,
|
11582 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.04566455356183518,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 1700,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
11557 |
"learning_rate": 9.999999833078691e-05,
|
11558 |
"loss": 3.3308,
|
11559 |
"step": 1650
|
11560 |
+
},
|
11561 |
+
{
|
11562 |
+
"epoch": 0.044348339959170514,
|
11563 |
+
"grad_norm": 1.4079015254974365,
|
11564 |
+
"learning_rate": 9.999999832875686e-05,
|
11565 |
+
"loss": 2.9442,
|
11566 |
+
"step": 1651
|
11567 |
+
},
|
11568 |
+
{
|
11569 |
+
"epoch": 0.044375201461265716,
|
11570 |
+
"grad_norm": 1.4412444829940796,
|
11571 |
+
"learning_rate": 9.999999832672557e-05,
|
11572 |
+
"loss": 3.038,
|
11573 |
+
"step": 1652
|
11574 |
+
},
|
11575 |
+
{
|
11576 |
+
"epoch": 0.04440206296336091,
|
11577 |
+
"grad_norm": 1.4149682521820068,
|
11578 |
+
"learning_rate": 9.999999832469304e-05,
|
11579 |
+
"loss": 2.7261,
|
11580 |
+
"step": 1653
|
11581 |
+
},
|
11582 |
+
{
|
11583 |
+
"epoch": 0.044428924465456106,
|
11584 |
+
"grad_norm": 1.411676287651062,
|
11585 |
+
"learning_rate": 9.99999983226593e-05,
|
11586 |
+
"loss": 2.9101,
|
11587 |
+
"step": 1654
|
11588 |
+
},
|
11589 |
+
{
|
11590 |
+
"epoch": 0.04445578596755131,
|
11591 |
+
"grad_norm": 1.470566987991333,
|
11592 |
+
"learning_rate": 9.99999983206243e-05,
|
11593 |
+
"loss": 2.77,
|
11594 |
+
"step": 1655
|
11595 |
+
},
|
11596 |
+
{
|
11597 |
+
"epoch": 0.0444826474696465,
|
11598 |
+
"grad_norm": 1.397863507270813,
|
11599 |
+
"learning_rate": 9.999999831858808e-05,
|
11600 |
+
"loss": 2.9582,
|
11601 |
+
"step": 1656
|
11602 |
+
},
|
11603 |
+
{
|
11604 |
+
"epoch": 0.0445095089717417,
|
11605 |
+
"grad_norm": 1.3953698873519897,
|
11606 |
+
"learning_rate": 9.999999831655063e-05,
|
11607 |
+
"loss": 3.0912,
|
11608 |
+
"step": 1657
|
11609 |
+
},
|
11610 |
+
{
|
11611 |
+
"epoch": 0.0445363704738369,
|
11612 |
+
"grad_norm": 1.3832037448883057,
|
11613 |
+
"learning_rate": 9.999999831451192e-05,
|
11614 |
+
"loss": 2.8913,
|
11615 |
+
"step": 1658
|
11616 |
+
},
|
11617 |
+
{
|
11618 |
+
"epoch": 0.044563231975932094,
|
11619 |
+
"grad_norm": 1.3912192583084106,
|
11620 |
+
"learning_rate": 9.9999998312472e-05,
|
11621 |
+
"loss": 2.6594,
|
11622 |
+
"step": 1659
|
11623 |
+
},
|
11624 |
+
{
|
11625 |
+
"epoch": 0.04459009347802729,
|
11626 |
+
"grad_norm": 1.3151278495788574,
|
11627 |
+
"learning_rate": 9.999999831043084e-05,
|
11628 |
+
"loss": 2.6635,
|
11629 |
+
"step": 1660
|
11630 |
+
},
|
11631 |
+
{
|
11632 |
+
"epoch": 0.04461695498012249,
|
11633 |
+
"grad_norm": 1.2868192195892334,
|
11634 |
+
"learning_rate": 9.999999830838846e-05,
|
11635 |
+
"loss": 2.7065,
|
11636 |
+
"step": 1661
|
11637 |
+
},
|
11638 |
+
{
|
11639 |
+
"epoch": 0.044643816482217685,
|
11640 |
+
"grad_norm": 1.387132167816162,
|
11641 |
+
"learning_rate": 9.999999830634483e-05,
|
11642 |
+
"loss": 2.6766,
|
11643 |
+
"step": 1662
|
11644 |
+
},
|
11645 |
+
{
|
11646 |
+
"epoch": 0.04467067798431288,
|
11647 |
+
"grad_norm": 1.4079433679580688,
|
11648 |
+
"learning_rate": 9.999999830429997e-05,
|
11649 |
+
"loss": 2.8444,
|
11650 |
+
"step": 1663
|
11651 |
+
},
|
11652 |
+
{
|
11653 |
+
"epoch": 0.04469753948640808,
|
11654 |
+
"grad_norm": 1.4303300380706787,
|
11655 |
+
"learning_rate": 9.999999830225387e-05,
|
11656 |
+
"loss": 2.9663,
|
11657 |
+
"step": 1664
|
11658 |
+
},
|
11659 |
+
{
|
11660 |
+
"epoch": 0.04472440098850328,
|
11661 |
+
"grad_norm": 1.39901864528656,
|
11662 |
+
"learning_rate": 9.999999830020654e-05,
|
11663 |
+
"loss": 2.9923,
|
11664 |
+
"step": 1665
|
11665 |
+
},
|
11666 |
+
{
|
11667 |
+
"epoch": 0.04475126249059847,
|
11668 |
+
"grad_norm": 1.4272572994232178,
|
11669 |
+
"learning_rate": 9.999999829815798e-05,
|
11670 |
+
"loss": 2.899,
|
11671 |
+
"step": 1666
|
11672 |
+
},
|
11673 |
+
{
|
11674 |
+
"epoch": 0.04477812399269367,
|
11675 |
+
"grad_norm": 1.492218017578125,
|
11676 |
+
"learning_rate": 9.99999982961082e-05,
|
11677 |
+
"loss": 3.0391,
|
11678 |
+
"step": 1667
|
11679 |
+
},
|
11680 |
+
{
|
11681 |
+
"epoch": 0.04480498549478887,
|
11682 |
+
"grad_norm": 1.450843095779419,
|
11683 |
+
"learning_rate": 9.999999829405716e-05,
|
11684 |
+
"loss": 2.9127,
|
11685 |
+
"step": 1668
|
11686 |
+
},
|
11687 |
+
{
|
11688 |
+
"epoch": 0.04483184699688406,
|
11689 |
+
"grad_norm": 1.3440508842468262,
|
11690 |
+
"learning_rate": 9.99999982920049e-05,
|
11691 |
+
"loss": 3.0001,
|
11692 |
+
"step": 1669
|
11693 |
+
},
|
11694 |
+
{
|
11695 |
+
"epoch": 0.044858708498979265,
|
11696 |
+
"grad_norm": 1.4402127265930176,
|
11697 |
+
"learning_rate": 9.999999828995141e-05,
|
11698 |
+
"loss": 2.819,
|
11699 |
+
"step": 1670
|
11700 |
+
},
|
11701 |
+
{
|
11702 |
+
"epoch": 0.04488557000107446,
|
11703 |
+
"grad_norm": 1.4429413080215454,
|
11704 |
+
"learning_rate": 9.999999828789667e-05,
|
11705 |
+
"loss": 3.0246,
|
11706 |
+
"step": 1671
|
11707 |
+
},
|
11708 |
+
{
|
11709 |
+
"epoch": 0.044912431503169654,
|
11710 |
+
"grad_norm": 2.6085400581359863,
|
11711 |
+
"learning_rate": 9.999999828584071e-05,
|
11712 |
+
"loss": 2.9273,
|
11713 |
+
"step": 1672
|
11714 |
+
},
|
11715 |
+
{
|
11716 |
+
"epoch": 0.044939293005264856,
|
11717 |
+
"grad_norm": 1.5993056297302246,
|
11718 |
+
"learning_rate": 9.999999828378352e-05,
|
11719 |
+
"loss": 3.2987,
|
11720 |
+
"step": 1673
|
11721 |
+
},
|
11722 |
+
{
|
11723 |
+
"epoch": 0.04496615450736005,
|
11724 |
+
"grad_norm": 1.5255417823791504,
|
11725 |
+
"learning_rate": 9.999999828172508e-05,
|
11726 |
+
"loss": 2.9552,
|
11727 |
+
"step": 1674
|
11728 |
+
},
|
11729 |
+
{
|
11730 |
+
"epoch": 0.044993016009455246,
|
11731 |
+
"grad_norm": 1.557896614074707,
|
11732 |
+
"learning_rate": 9.999999827966542e-05,
|
11733 |
+
"loss": 3.123,
|
11734 |
+
"step": 1675
|
11735 |
+
},
|
11736 |
+
{
|
11737 |
+
"epoch": 0.04501987751155045,
|
11738 |
+
"grad_norm": 1.558122992515564,
|
11739 |
+
"learning_rate": 9.999999827760452e-05,
|
11740 |
+
"loss": 2.9187,
|
11741 |
+
"step": 1676
|
11742 |
+
},
|
11743 |
+
{
|
11744 |
+
"epoch": 0.04504673901364564,
|
11745 |
+
"grad_norm": 1.425349473953247,
|
11746 |
+
"learning_rate": 9.999999827554239e-05,
|
11747 |
+
"loss": 2.899,
|
11748 |
+
"step": 1677
|
11749 |
+
},
|
11750 |
+
{
|
11751 |
+
"epoch": 0.04507360051574084,
|
11752 |
+
"grad_norm": 1.5926076173782349,
|
11753 |
+
"learning_rate": 9.999999827347903e-05,
|
11754 |
+
"loss": 3.2055,
|
11755 |
+
"step": 1678
|
11756 |
+
},
|
11757 |
+
{
|
11758 |
+
"epoch": 0.04510046201783604,
|
11759 |
+
"grad_norm": 1.6083916425704956,
|
11760 |
+
"learning_rate": 9.999999827141443e-05,
|
11761 |
+
"loss": 2.8889,
|
11762 |
+
"step": 1679
|
11763 |
+
},
|
11764 |
+
{
|
11765 |
+
"epoch": 0.045127323519931234,
|
11766 |
+
"grad_norm": 1.4613057374954224,
|
11767 |
+
"learning_rate": 9.999999826934859e-05,
|
11768 |
+
"loss": 2.968,
|
11769 |
+
"step": 1680
|
11770 |
+
},
|
11771 |
+
{
|
11772 |
+
"epoch": 0.04515418502202643,
|
11773 |
+
"grad_norm": 1.591672420501709,
|
11774 |
+
"learning_rate": 9.999999826728153e-05,
|
11775 |
+
"loss": 3.0252,
|
11776 |
+
"step": 1681
|
11777 |
+
},
|
11778 |
+
{
|
11779 |
+
"epoch": 0.04518104652412163,
|
11780 |
+
"grad_norm": 1.539560317993164,
|
11781 |
+
"learning_rate": 9.999999826521322e-05,
|
11782 |
+
"loss": 2.8637,
|
11783 |
+
"step": 1682
|
11784 |
+
},
|
11785 |
+
{
|
11786 |
+
"epoch": 0.045207908026216825,
|
11787 |
+
"grad_norm": 1.4949159622192383,
|
11788 |
+
"learning_rate": 9.99999982631437e-05,
|
11789 |
+
"loss": 3.05,
|
11790 |
+
"step": 1683
|
11791 |
+
},
|
11792 |
+
{
|
11793 |
+
"epoch": 0.04523476952831202,
|
11794 |
+
"grad_norm": 1.5287278890609741,
|
11795 |
+
"learning_rate": 9.999999826107294e-05,
|
11796 |
+
"loss": 3.0176,
|
11797 |
+
"step": 1684
|
11798 |
+
},
|
11799 |
+
{
|
11800 |
+
"epoch": 0.04526163103040722,
|
11801 |
+
"grad_norm": 1.4513976573944092,
|
11802 |
+
"learning_rate": 9.999999825900092e-05,
|
11803 |
+
"loss": 3.0043,
|
11804 |
+
"step": 1685
|
11805 |
+
},
|
11806 |
+
{
|
11807 |
+
"epoch": 0.04528849253250242,
|
11808 |
+
"grad_norm": 1.4938799142837524,
|
11809 |
+
"learning_rate": 9.99999982569277e-05,
|
11810 |
+
"loss": 2.998,
|
11811 |
+
"step": 1686
|
11812 |
+
},
|
11813 |
+
{
|
11814 |
+
"epoch": 0.04531535403459761,
|
11815 |
+
"grad_norm": 1.6789308786392212,
|
11816 |
+
"learning_rate": 9.999999825485323e-05,
|
11817 |
+
"loss": 3.108,
|
11818 |
+
"step": 1687
|
11819 |
+
},
|
11820 |
+
{
|
11821 |
+
"epoch": 0.04534221553669281,
|
11822 |
+
"grad_norm": 1.4213138818740845,
|
11823 |
+
"learning_rate": 9.999999825277752e-05,
|
11824 |
+
"loss": 3.0397,
|
11825 |
+
"step": 1688
|
11826 |
+
},
|
11827 |
+
{
|
11828 |
+
"epoch": 0.04536907703878801,
|
11829 |
+
"grad_norm": 1.5696649551391602,
|
11830 |
+
"learning_rate": 9.999999825070058e-05,
|
11831 |
+
"loss": 3.1145,
|
11832 |
+
"step": 1689
|
11833 |
+
},
|
11834 |
+
{
|
11835 |
+
"epoch": 0.0453959385408832,
|
11836 |
+
"grad_norm": 1.6566909551620483,
|
11837 |
+
"learning_rate": 9.999999824862241e-05,
|
11838 |
+
"loss": 3.3197,
|
11839 |
+
"step": 1690
|
11840 |
+
},
|
11841 |
+
{
|
11842 |
+
"epoch": 0.045422800042978405,
|
11843 |
+
"grad_norm": 1.591908574104309,
|
11844 |
+
"learning_rate": 9.999999824654302e-05,
|
11845 |
+
"loss": 2.9155,
|
11846 |
+
"step": 1691
|
11847 |
+
},
|
11848 |
+
{
|
11849 |
+
"epoch": 0.0454496615450736,
|
11850 |
+
"grad_norm": 1.6618692874908447,
|
11851 |
+
"learning_rate": 9.999999824446238e-05,
|
11852 |
+
"loss": 3.3152,
|
11853 |
+
"step": 1692
|
11854 |
+
},
|
11855 |
+
{
|
11856 |
+
"epoch": 0.045476523047168795,
|
11857 |
+
"grad_norm": 1.660921573638916,
|
11858 |
+
"learning_rate": 9.999999824238051e-05,
|
11859 |
+
"loss": 3.1987,
|
11860 |
+
"step": 1693
|
11861 |
+
},
|
11862 |
+
{
|
11863 |
+
"epoch": 0.045503384549263996,
|
11864 |
+
"grad_norm": 1.680649757385254,
|
11865 |
+
"learning_rate": 9.99999982402974e-05,
|
11866 |
+
"loss": 3.2139,
|
11867 |
+
"step": 1694
|
11868 |
+
},
|
11869 |
+
{
|
11870 |
+
"epoch": 0.04553024605135919,
|
11871 |
+
"grad_norm": 1.7884151935577393,
|
11872 |
+
"learning_rate": 9.999999823821307e-05,
|
11873 |
+
"loss": 3.2991,
|
11874 |
+
"step": 1695
|
11875 |
+
},
|
11876 |
+
{
|
11877 |
+
"epoch": 0.045557107553454386,
|
11878 |
+
"grad_norm": 1.6756354570388794,
|
11879 |
+
"learning_rate": 9.99999982361275e-05,
|
11880 |
+
"loss": 3.1816,
|
11881 |
+
"step": 1696
|
11882 |
+
},
|
11883 |
+
{
|
11884 |
+
"epoch": 0.04558396905554959,
|
11885 |
+
"grad_norm": 1.6229280233383179,
|
11886 |
+
"learning_rate": 9.999999823404069e-05,
|
11887 |
+
"loss": 3.2563,
|
11888 |
+
"step": 1697
|
11889 |
+
},
|
11890 |
+
{
|
11891 |
+
"epoch": 0.04561083055764478,
|
11892 |
+
"grad_norm": 1.625819444656372,
|
11893 |
+
"learning_rate": 9.999999823195265e-05,
|
11894 |
+
"loss": 3.1226,
|
11895 |
+
"step": 1698
|
11896 |
+
},
|
11897 |
+
{
|
11898 |
+
"epoch": 0.04563769205973998,
|
11899 |
+
"grad_norm": 1.7141685485839844,
|
11900 |
+
"learning_rate": 9.999999822986337e-05,
|
11901 |
+
"loss": 3.3355,
|
11902 |
+
"step": 1699
|
11903 |
+
},
|
11904 |
+
{
|
11905 |
+
"epoch": 0.04566455356183518,
|
11906 |
+
"grad_norm": 1.7747595310211182,
|
11907 |
+
"learning_rate": 9.999999822777286e-05,
|
11908 |
+
"loss": 3.2331,
|
11909 |
+
"step": 1700
|
11910 |
}
|
11911 |
],
|
11912 |
"logging_steps": 1,
|
|
|
11926 |
"attributes": {}
|
11927 |
}
|
11928 |
},
|
11929 |
+
"total_flos": 2.320207374503117e+18,
|
11930 |
"train_batch_size": 4,
|
11931 |
"trial_name": null,
|
11932 |
"trial_params": null
|