error577 commited on
Commit
4c79341
·
verified ·
1 Parent(s): e6abe79

Training in progress, step 260, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76ac24fa03f98de7b59e79abba929f3aeb5e048bbbf55f2e8d0d38cd880bdbf1
3
  size 1579384
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9b8cba78091a1e79978910e21798429ef88cf6bc423846f62a31e9a53d69c18
3
  size 1579384
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ccba65a6d0400b5d70f2f8e84b9e15fe386318055df0d775c3714c89441b72f
3
- size 857274
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f78a2c758e1a8ad106a78487e5b08224663b24d72d1f9765848b8ef224e1738e
3
+ size 857338
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b79bdfeb2e849b31d0483fff8a99da1b342901bd79b8b6a533accea74829d81
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b587cda8fb42849f8f93a8df320fec083bf8b13c0ae106b51876fc1af3b04ea
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7988081c02e6f2fc7261042902ac98ffd02e6974000f01aefc8230feec810a81
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:982bf3fd7aeed34e3ad4f725e738ae280a74686f38971dfd5b53c022a7b5da3a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.15847468119351243,
5
  "eval_steps": 20,
6
- "global_step": 240,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1791,6 +1791,154 @@
1791
  "eval_samples_per_second": 111.158,
1792
  "eval_steps_per_second": 111.158,
1793
  "step": 240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1794
  }
1795
  ],
1796
  "logging_steps": 1,
@@ -1810,7 +1958,7 @@
1810
  "attributes": {}
1811
  }
1812
  },
1813
- "total_flos": 158384630267904.0,
1814
  "train_batch_size": 1,
1815
  "trial_name": null,
1816
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.17168090462630514,
5
  "eval_steps": 20,
6
+ "global_step": 260,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1791
  "eval_samples_per_second": 111.158,
1792
  "eval_steps_per_second": 111.158,
1793
  "step": 240
1794
+ },
1795
+ {
1796
+ "epoch": 0.15913499236515208,
1797
+ "grad_norm": 81202.03125,
1798
+ "learning_rate": 0.00029999310547054866,
1799
+ "loss": 152.5614,
1800
+ "step": 241
1801
+ },
1802
+ {
1803
+ "epoch": 0.15979530353679172,
1804
+ "grad_norm": 44808.3828125,
1805
+ "learning_rate": 0.00029999304564890986,
1806
+ "loss": 129.6364,
1807
+ "step": 242
1808
+ },
1809
+ {
1810
+ "epoch": 0.16045561470843134,
1811
+ "grad_norm": 43636.87109375,
1812
+ "learning_rate": 0.0002999929855688702,
1813
+ "loss": 113.3872,
1814
+ "step": 243
1815
+ },
1816
+ {
1817
+ "epoch": 0.16111592588007098,
1818
+ "grad_norm": 61974.05859375,
1819
+ "learning_rate": 0.0002999929252304299,
1820
+ "loss": 104.6228,
1821
+ "step": 244
1822
+ },
1823
+ {
1824
+ "epoch": 0.16177623705171063,
1825
+ "grad_norm": 78535.4375,
1826
+ "learning_rate": 0.0002999928646335889,
1827
+ "loss": 116.442,
1828
+ "step": 245
1829
+ },
1830
+ {
1831
+ "epoch": 0.16243654822335024,
1832
+ "grad_norm": 86542.1796875,
1833
+ "learning_rate": 0.00029999280377834755,
1834
+ "loss": 133.5997,
1835
+ "step": 246
1836
+ },
1837
+ {
1838
+ "epoch": 0.16309685939498988,
1839
+ "grad_norm": 115709.4375,
1840
+ "learning_rate": 0.00029999274266470573,
1841
+ "loss": 156.246,
1842
+ "step": 247
1843
+ },
1844
+ {
1845
+ "epoch": 0.16375717056662953,
1846
+ "grad_norm": 134934.859375,
1847
+ "learning_rate": 0.0002999926812926636,
1848
+ "loss": 157.7942,
1849
+ "step": 248
1850
+ },
1851
+ {
1852
+ "epoch": 0.16441748173826917,
1853
+ "grad_norm": 100172.1328125,
1854
+ "learning_rate": 0.00029999261966222134,
1855
+ "loss": 143.3987,
1856
+ "step": 249
1857
+ },
1858
+ {
1859
+ "epoch": 0.1650777929099088,
1860
+ "grad_norm": 307228.875,
1861
+ "learning_rate": 0.00029999255777337904,
1862
+ "loss": 153.6041,
1863
+ "step": 250
1864
+ },
1865
+ {
1866
+ "epoch": 0.16573810408154843,
1867
+ "grad_norm": 30510.705078125,
1868
+ "learning_rate": 0.0002999924956261367,
1869
+ "loss": 202.106,
1870
+ "step": 251
1871
+ },
1872
+ {
1873
+ "epoch": 0.16639841525318808,
1874
+ "grad_norm": 38848.8671875,
1875
+ "learning_rate": 0.0002999924332204946,
1876
+ "loss": 138.0028,
1877
+ "step": 252
1878
+ },
1879
+ {
1880
+ "epoch": 0.1670587264248277,
1881
+ "grad_norm": 47215.265625,
1882
+ "learning_rate": 0.0002999923705564527,
1883
+ "loss": 117.989,
1884
+ "step": 253
1885
+ },
1886
+ {
1887
+ "epoch": 0.16771903759646734,
1888
+ "grad_norm": 43520.44921875,
1889
+ "learning_rate": 0.00029999230763401116,
1890
+ "loss": 145.8618,
1891
+ "step": 254
1892
+ },
1893
+ {
1894
+ "epoch": 0.16837934876810698,
1895
+ "grad_norm": 60958.9453125,
1896
+ "learning_rate": 0.0002999922444531701,
1897
+ "loss": 168.5038,
1898
+ "step": 255
1899
+ },
1900
+ {
1901
+ "epoch": 0.1690396599397466,
1902
+ "grad_norm": 58285.34765625,
1903
+ "learning_rate": 0.0002999921810139296,
1904
+ "loss": 169.5175,
1905
+ "step": 256
1906
+ },
1907
+ {
1908
+ "epoch": 0.16969997111138624,
1909
+ "grad_norm": 62326.203125,
1910
+ "learning_rate": 0.0002999921173162898,
1911
+ "loss": 156.6914,
1912
+ "step": 257
1913
+ },
1914
+ {
1915
+ "epoch": 0.17036028228302588,
1916
+ "grad_norm": 27963.5078125,
1917
+ "learning_rate": 0.0002999920533602508,
1918
+ "loss": 139.1583,
1919
+ "step": 258
1920
+ },
1921
+ {
1922
+ "epoch": 0.1710205934546655,
1923
+ "grad_norm": 28827.8359375,
1924
+ "learning_rate": 0.0002999919891458127,
1925
+ "loss": 142.2265,
1926
+ "step": 259
1927
+ },
1928
+ {
1929
+ "epoch": 0.17168090462630514,
1930
+ "grad_norm": 24848.3828125,
1931
+ "learning_rate": 0.00029999192467297566,
1932
+ "loss": 122.2599,
1933
+ "step": 260
1934
+ },
1935
+ {
1936
+ "epoch": 0.17168090462630514,
1937
+ "eval_loss": 7.328362941741943,
1938
+ "eval_runtime": 4.5883,
1939
+ "eval_samples_per_second": 107.883,
1940
+ "eval_steps_per_second": 107.883,
1941
+ "step": 260
1942
  }
1943
  ],
1944
  "logging_steps": 1,
 
1958
  "attributes": {}
1959
  }
1960
  },
1961
+ "total_flos": 170213574180864.0,
1962
  "train_batch_size": 1,
1963
  "trial_name": null,
1964
  "trial_params": null