whiteapple8222 commited on
Commit
6d10023
·
verified ·
1 Parent(s): bf8f87c

Training in progress, step 1700, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6029a505f307e3098b30acc19cdd7ba452e55709d8c353bc4a3f4f8ba146e277
3
  size 131146352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19c29d19ae137d8d9f6d075009b38f48ede5ee69b99c1d40e24e93602a5e42c2
3
  size 131146352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e748695332f398e0372a0342f533eda6dda257cbd0c6ff0c31662fd1d9df830
3
  size 67210516
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53f02bf536b4b6c5ec995aa54e017724d8fb20c583e62cdd2047ff066ff5e86d
3
  size 67210516
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2eb5cad9f3cbd36a844058d2bce505f26319b38c69d84d8607ffff4425c91e1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a19b20244c2b147c4db6331bfcd526d49838499d3bb5d4e14d3a604b49d4cdc4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03bf9dc9befb01615f74b39d2b43ebf93f55dc1a1259dddadf80e9de69443c5a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c12a87648583ab623d82ad394450eed417016fd7c996ca538887d063a9458228
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.04432147845707532,
5
  "eval_steps": 500,
6
- "global_step": 1650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -11557,6 +11557,356 @@
11557
  "learning_rate": 9.999999833078691e-05,
11558
  "loss": 3.3308,
11559
  "step": 1650
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11560
  }
11561
  ],
11562
  "logging_steps": 1,
@@ -11576,7 +11926,7 @@
11576
  "attributes": {}
11577
  }
11578
  },
11579
- "total_flos": 2.2518960104669184e+18,
11580
  "train_batch_size": 4,
11581
  "trial_name": null,
11582
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.04566455356183518,
5
  "eval_steps": 500,
6
+ "global_step": 1700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
11557
  "learning_rate": 9.999999833078691e-05,
11558
  "loss": 3.3308,
11559
  "step": 1650
11560
+ },
11561
+ {
11562
+ "epoch": 0.044348339959170514,
11563
+ "grad_norm": 1.4079015254974365,
11564
+ "learning_rate": 9.999999832875686e-05,
11565
+ "loss": 2.9442,
11566
+ "step": 1651
11567
+ },
11568
+ {
11569
+ "epoch": 0.044375201461265716,
11570
+ "grad_norm": 1.4412444829940796,
11571
+ "learning_rate": 9.999999832672557e-05,
11572
+ "loss": 3.038,
11573
+ "step": 1652
11574
+ },
11575
+ {
11576
+ "epoch": 0.04440206296336091,
11577
+ "grad_norm": 1.4149682521820068,
11578
+ "learning_rate": 9.999999832469304e-05,
11579
+ "loss": 2.7261,
11580
+ "step": 1653
11581
+ },
11582
+ {
11583
+ "epoch": 0.044428924465456106,
11584
+ "grad_norm": 1.411676287651062,
11585
+ "learning_rate": 9.99999983226593e-05,
11586
+ "loss": 2.9101,
11587
+ "step": 1654
11588
+ },
11589
+ {
11590
+ "epoch": 0.04445578596755131,
11591
+ "grad_norm": 1.470566987991333,
11592
+ "learning_rate": 9.99999983206243e-05,
11593
+ "loss": 2.77,
11594
+ "step": 1655
11595
+ },
11596
+ {
11597
+ "epoch": 0.0444826474696465,
11598
+ "grad_norm": 1.397863507270813,
11599
+ "learning_rate": 9.999999831858808e-05,
11600
+ "loss": 2.9582,
11601
+ "step": 1656
11602
+ },
11603
+ {
11604
+ "epoch": 0.0445095089717417,
11605
+ "grad_norm": 1.3953698873519897,
11606
+ "learning_rate": 9.999999831655063e-05,
11607
+ "loss": 3.0912,
11608
+ "step": 1657
11609
+ },
11610
+ {
11611
+ "epoch": 0.0445363704738369,
11612
+ "grad_norm": 1.3832037448883057,
11613
+ "learning_rate": 9.999999831451192e-05,
11614
+ "loss": 2.8913,
11615
+ "step": 1658
11616
+ },
11617
+ {
11618
+ "epoch": 0.044563231975932094,
11619
+ "grad_norm": 1.3912192583084106,
11620
+ "learning_rate": 9.9999998312472e-05,
11621
+ "loss": 2.6594,
11622
+ "step": 1659
11623
+ },
11624
+ {
11625
+ "epoch": 0.04459009347802729,
11626
+ "grad_norm": 1.3151278495788574,
11627
+ "learning_rate": 9.999999831043084e-05,
11628
+ "loss": 2.6635,
11629
+ "step": 1660
11630
+ },
11631
+ {
11632
+ "epoch": 0.04461695498012249,
11633
+ "grad_norm": 1.2868192195892334,
11634
+ "learning_rate": 9.999999830838846e-05,
11635
+ "loss": 2.7065,
11636
+ "step": 1661
11637
+ },
11638
+ {
11639
+ "epoch": 0.044643816482217685,
11640
+ "grad_norm": 1.387132167816162,
11641
+ "learning_rate": 9.999999830634483e-05,
11642
+ "loss": 2.6766,
11643
+ "step": 1662
11644
+ },
11645
+ {
11646
+ "epoch": 0.04467067798431288,
11647
+ "grad_norm": 1.4079433679580688,
11648
+ "learning_rate": 9.999999830429997e-05,
11649
+ "loss": 2.8444,
11650
+ "step": 1663
11651
+ },
11652
+ {
11653
+ "epoch": 0.04469753948640808,
11654
+ "grad_norm": 1.4303300380706787,
11655
+ "learning_rate": 9.999999830225387e-05,
11656
+ "loss": 2.9663,
11657
+ "step": 1664
11658
+ },
11659
+ {
11660
+ "epoch": 0.04472440098850328,
11661
+ "grad_norm": 1.39901864528656,
11662
+ "learning_rate": 9.999999830020654e-05,
11663
+ "loss": 2.9923,
11664
+ "step": 1665
11665
+ },
11666
+ {
11667
+ "epoch": 0.04475126249059847,
11668
+ "grad_norm": 1.4272572994232178,
11669
+ "learning_rate": 9.999999829815798e-05,
11670
+ "loss": 2.899,
11671
+ "step": 1666
11672
+ },
11673
+ {
11674
+ "epoch": 0.04477812399269367,
11675
+ "grad_norm": 1.492218017578125,
11676
+ "learning_rate": 9.99999982961082e-05,
11677
+ "loss": 3.0391,
11678
+ "step": 1667
11679
+ },
11680
+ {
11681
+ "epoch": 0.04480498549478887,
11682
+ "grad_norm": 1.450843095779419,
11683
+ "learning_rate": 9.999999829405716e-05,
11684
+ "loss": 2.9127,
11685
+ "step": 1668
11686
+ },
11687
+ {
11688
+ "epoch": 0.04483184699688406,
11689
+ "grad_norm": 1.3440508842468262,
11690
+ "learning_rate": 9.99999982920049e-05,
11691
+ "loss": 3.0001,
11692
+ "step": 1669
11693
+ },
11694
+ {
11695
+ "epoch": 0.044858708498979265,
11696
+ "grad_norm": 1.4402127265930176,
11697
+ "learning_rate": 9.999999828995141e-05,
11698
+ "loss": 2.819,
11699
+ "step": 1670
11700
+ },
11701
+ {
11702
+ "epoch": 0.04488557000107446,
11703
+ "grad_norm": 1.4429413080215454,
11704
+ "learning_rate": 9.999999828789667e-05,
11705
+ "loss": 3.0246,
11706
+ "step": 1671
11707
+ },
11708
+ {
11709
+ "epoch": 0.044912431503169654,
11710
+ "grad_norm": 2.6085400581359863,
11711
+ "learning_rate": 9.999999828584071e-05,
11712
+ "loss": 2.9273,
11713
+ "step": 1672
11714
+ },
11715
+ {
11716
+ "epoch": 0.044939293005264856,
11717
+ "grad_norm": 1.5993056297302246,
11718
+ "learning_rate": 9.999999828378352e-05,
11719
+ "loss": 3.2987,
11720
+ "step": 1673
11721
+ },
11722
+ {
11723
+ "epoch": 0.04496615450736005,
11724
+ "grad_norm": 1.5255417823791504,
11725
+ "learning_rate": 9.999999828172508e-05,
11726
+ "loss": 2.9552,
11727
+ "step": 1674
11728
+ },
11729
+ {
11730
+ "epoch": 0.044993016009455246,
11731
+ "grad_norm": 1.557896614074707,
11732
+ "learning_rate": 9.999999827966542e-05,
11733
+ "loss": 3.123,
11734
+ "step": 1675
11735
+ },
11736
+ {
11737
+ "epoch": 0.04501987751155045,
11738
+ "grad_norm": 1.558122992515564,
11739
+ "learning_rate": 9.999999827760452e-05,
11740
+ "loss": 2.9187,
11741
+ "step": 1676
11742
+ },
11743
+ {
11744
+ "epoch": 0.04504673901364564,
11745
+ "grad_norm": 1.425349473953247,
11746
+ "learning_rate": 9.999999827554239e-05,
11747
+ "loss": 2.899,
11748
+ "step": 1677
11749
+ },
11750
+ {
11751
+ "epoch": 0.04507360051574084,
11752
+ "grad_norm": 1.5926076173782349,
11753
+ "learning_rate": 9.999999827347903e-05,
11754
+ "loss": 3.2055,
11755
+ "step": 1678
11756
+ },
11757
+ {
11758
+ "epoch": 0.04510046201783604,
11759
+ "grad_norm": 1.6083916425704956,
11760
+ "learning_rate": 9.999999827141443e-05,
11761
+ "loss": 2.8889,
11762
+ "step": 1679
11763
+ },
11764
+ {
11765
+ "epoch": 0.045127323519931234,
11766
+ "grad_norm": 1.4613057374954224,
11767
+ "learning_rate": 9.999999826934859e-05,
11768
+ "loss": 2.968,
11769
+ "step": 1680
11770
+ },
11771
+ {
11772
+ "epoch": 0.04515418502202643,
11773
+ "grad_norm": 1.591672420501709,
11774
+ "learning_rate": 9.999999826728153e-05,
11775
+ "loss": 3.0252,
11776
+ "step": 1681
11777
+ },
11778
+ {
11779
+ "epoch": 0.04518104652412163,
11780
+ "grad_norm": 1.539560317993164,
11781
+ "learning_rate": 9.999999826521322e-05,
11782
+ "loss": 2.8637,
11783
+ "step": 1682
11784
+ },
11785
+ {
11786
+ "epoch": 0.045207908026216825,
11787
+ "grad_norm": 1.4949159622192383,
11788
+ "learning_rate": 9.99999982631437e-05,
11789
+ "loss": 3.05,
11790
+ "step": 1683
11791
+ },
11792
+ {
11793
+ "epoch": 0.04523476952831202,
11794
+ "grad_norm": 1.5287278890609741,
11795
+ "learning_rate": 9.999999826107294e-05,
11796
+ "loss": 3.0176,
11797
+ "step": 1684
11798
+ },
11799
+ {
11800
+ "epoch": 0.04526163103040722,
11801
+ "grad_norm": 1.4513976573944092,
11802
+ "learning_rate": 9.999999825900092e-05,
11803
+ "loss": 3.0043,
11804
+ "step": 1685
11805
+ },
11806
+ {
11807
+ "epoch": 0.04528849253250242,
11808
+ "grad_norm": 1.4938799142837524,
11809
+ "learning_rate": 9.99999982569277e-05,
11810
+ "loss": 2.998,
11811
+ "step": 1686
11812
+ },
11813
+ {
11814
+ "epoch": 0.04531535403459761,
11815
+ "grad_norm": 1.6789308786392212,
11816
+ "learning_rate": 9.999999825485323e-05,
11817
+ "loss": 3.108,
11818
+ "step": 1687
11819
+ },
11820
+ {
11821
+ "epoch": 0.04534221553669281,
11822
+ "grad_norm": 1.4213138818740845,
11823
+ "learning_rate": 9.999999825277752e-05,
11824
+ "loss": 3.0397,
11825
+ "step": 1688
11826
+ },
11827
+ {
11828
+ "epoch": 0.04536907703878801,
11829
+ "grad_norm": 1.5696649551391602,
11830
+ "learning_rate": 9.999999825070058e-05,
11831
+ "loss": 3.1145,
11832
+ "step": 1689
11833
+ },
11834
+ {
11835
+ "epoch": 0.0453959385408832,
11836
+ "grad_norm": 1.6566909551620483,
11837
+ "learning_rate": 9.999999824862241e-05,
11838
+ "loss": 3.3197,
11839
+ "step": 1690
11840
+ },
11841
+ {
11842
+ "epoch": 0.045422800042978405,
11843
+ "grad_norm": 1.591908574104309,
11844
+ "learning_rate": 9.999999824654302e-05,
11845
+ "loss": 2.9155,
11846
+ "step": 1691
11847
+ },
11848
+ {
11849
+ "epoch": 0.0454496615450736,
11850
+ "grad_norm": 1.6618692874908447,
11851
+ "learning_rate": 9.999999824446238e-05,
11852
+ "loss": 3.3152,
11853
+ "step": 1692
11854
+ },
11855
+ {
11856
+ "epoch": 0.045476523047168795,
11857
+ "grad_norm": 1.660921573638916,
11858
+ "learning_rate": 9.999999824238051e-05,
11859
+ "loss": 3.1987,
11860
+ "step": 1693
11861
+ },
11862
+ {
11863
+ "epoch": 0.045503384549263996,
11864
+ "grad_norm": 1.680649757385254,
11865
+ "learning_rate": 9.99999982402974e-05,
11866
+ "loss": 3.2139,
11867
+ "step": 1694
11868
+ },
11869
+ {
11870
+ "epoch": 0.04553024605135919,
11871
+ "grad_norm": 1.7884151935577393,
11872
+ "learning_rate": 9.999999823821307e-05,
11873
+ "loss": 3.2991,
11874
+ "step": 1695
11875
+ },
11876
+ {
11877
+ "epoch": 0.045557107553454386,
11878
+ "grad_norm": 1.6756354570388794,
11879
+ "learning_rate": 9.99999982361275e-05,
11880
+ "loss": 3.1816,
11881
+ "step": 1696
11882
+ },
11883
+ {
11884
+ "epoch": 0.04558396905554959,
11885
+ "grad_norm": 1.6229280233383179,
11886
+ "learning_rate": 9.999999823404069e-05,
11887
+ "loss": 3.2563,
11888
+ "step": 1697
11889
+ },
11890
+ {
11891
+ "epoch": 0.04561083055764478,
11892
+ "grad_norm": 1.625819444656372,
11893
+ "learning_rate": 9.999999823195265e-05,
11894
+ "loss": 3.1226,
11895
+ "step": 1698
11896
+ },
11897
+ {
11898
+ "epoch": 0.04563769205973998,
11899
+ "grad_norm": 1.7141685485839844,
11900
+ "learning_rate": 9.999999822986337e-05,
11901
+ "loss": 3.3355,
11902
+ "step": 1699
11903
+ },
11904
+ {
11905
+ "epoch": 0.04566455356183518,
11906
+ "grad_norm": 1.7747595310211182,
11907
+ "learning_rate": 9.999999822777286e-05,
11908
+ "loss": 3.2331,
11909
+ "step": 1700
11910
  }
11911
  ],
11912
  "logging_steps": 1,
 
11926
  "attributes": {}
11927
  }
11928
  },
11929
+ "total_flos": 2.320207374503117e+18,
11930
  "train_batch_size": 4,
11931
  "trial_name": null,
11932
  "trial_params": null