error577 commited on
Commit
8acc304
·
verified ·
1 Parent(s): 06129c7

Training in progress, step 120, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16785a61aa31cea9df62cbd18f8208d660df29185f09b9dd0e9aca0d2438e96a
3
  size 1579384
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:393273cdca286ccb40e14205ea36723e7c5c1240080ed35a78dbeb665d070ee9
3
  size 1579384
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9518499bb798c1ce79529bad3ff555316b6ee700915fcec92bf1504941a05032
3
  size 857274
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d54a2e2580a8617019d978feb2d6817db3c1af9507f0f49fc2ef229fd5ccdce
3
  size 857274
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cace2bb20bc8ba875286724acbfadddec3e5175c4ce467dea9a6adf2fcb4cb03
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eed95f9b60d673089274be5847cbb94c802446b0a76989fa02d0940d21190b7e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2036ec7b8a4c0dbbd07ada2b2af7c3be05d304eb60a4492cb7e057daf83ea234
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a705acd53999263a453e7147fb79a4ea1e0e69c6cfafd3b3b9170876c31d6d7f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.06603111716396352,
5
  "eval_steps": 20,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -755,6 +755,154 @@
755
  "eval_samples_per_second": 75.305,
756
  "eval_steps_per_second": 75.305,
757
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
758
  }
759
  ],
760
  "logging_steps": 1,
@@ -774,7 +922,7 @@
774
  "attributes": {}
775
  }
776
  },
777
- "total_flos": 65231696953344.0,
778
  "train_batch_size": 1,
779
  "trial_name": null,
780
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.07923734059675622,
5
  "eval_steps": 20,
6
+ "global_step": 120,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
755
  "eval_samples_per_second": 75.305,
756
  "eval_steps_per_second": 75.305,
757
  "step": 100
758
+ },
759
+ {
760
+ "epoch": 0.06669142833560315,
761
+ "grad_norm": 18817.16796875,
762
+ "learning_rate": 0.00023282818384051866,
763
+ "loss": 113.1262,
764
+ "step": 101
765
+ },
766
+ {
767
+ "epoch": 0.06735173950724278,
768
+ "grad_norm": 24054.869140625,
769
+ "learning_rate": 0.00023146858762206489,
770
+ "loss": 108.1982,
771
+ "step": 102
772
+ },
773
+ {
774
+ "epoch": 0.06801205067888243,
775
+ "grad_norm": 34655.8671875,
776
+ "learning_rate": 0.00023009943070364044,
777
+ "loss": 108.4203,
778
+ "step": 103
779
+ },
780
+ {
781
+ "epoch": 0.06867236185052206,
782
+ "grad_norm": 29425.787109375,
783
+ "learning_rate": 0.0002287208737618801,
784
+ "loss": 107.4073,
785
+ "step": 104
786
+ },
787
+ {
788
+ "epoch": 0.0693326730221617,
789
+ "grad_norm": 13952.1171875,
790
+ "learning_rate": 0.00022733307857655325,
791
+ "loss": 105.1398,
792
+ "step": 105
793
+ },
794
+ {
795
+ "epoch": 0.06999298419380133,
796
+ "grad_norm": 20227.431640625,
797
+ "learning_rate": 0.00022593620801157808,
798
+ "loss": 115.2134,
799
+ "step": 106
800
+ },
801
+ {
802
+ "epoch": 0.07065329536544096,
803
+ "grad_norm": 19999.79296875,
804
+ "learning_rate": 0.00022453042599590882,
805
+ "loss": 113.6159,
806
+ "step": 107
807
+ },
808
+ {
809
+ "epoch": 0.0713136065370806,
810
+ "grad_norm": 18226.33203125,
811
+ "learning_rate": 0.00022311589750429787,
812
+ "loss": 110.2182,
813
+ "step": 108
814
+ },
815
+ {
816
+ "epoch": 0.07197391770872023,
817
+ "grad_norm": 15471.123046875,
818
+ "learning_rate": 0.00022169278853793545,
819
+ "loss": 98.862,
820
+ "step": 109
821
+ },
822
+ {
823
+ "epoch": 0.07263422888035986,
824
+ "grad_norm": 9518.90625,
825
+ "learning_rate": 0.00022026126610496852,
826
+ "loss": 100.519,
827
+ "step": 110
828
+ },
829
+ {
830
+ "epoch": 0.07329454005199951,
831
+ "grad_norm": 12838.0771484375,
832
+ "learning_rate": 0.0002188214982009016,
833
+ "loss": 99.1184,
834
+ "step": 111
835
+ },
836
+ {
837
+ "epoch": 0.07395485122363914,
838
+ "grad_norm": 13236.9697265625,
839
+ "learning_rate": 0.00021737365378888187,
840
+ "loss": 108.3643,
841
+ "step": 112
842
+ },
843
+ {
844
+ "epoch": 0.07461516239527878,
845
+ "grad_norm": 21540.712890625,
846
+ "learning_rate": 0.00021591790277987043,
847
+ "loss": 106.4385,
848
+ "step": 113
849
+ },
850
+ {
851
+ "epoch": 0.07527547356691841,
852
+ "grad_norm": 13282.7333984375,
853
+ "learning_rate": 0.00021445441601270276,
854
+ "loss": 111.6325,
855
+ "step": 114
856
+ },
857
+ {
858
+ "epoch": 0.07593578473855804,
859
+ "grad_norm": 32402.203125,
860
+ "learning_rate": 0.00021298336523403968,
861
+ "loss": 102.4856,
862
+ "step": 115
863
+ },
864
+ {
865
+ "epoch": 0.07659609591019768,
866
+ "grad_norm": 23308.939453125,
867
+ "learning_rate": 0.0002115049230782124,
868
+ "loss": 99.6906,
869
+ "step": 116
870
+ },
871
+ {
872
+ "epoch": 0.07725640708183731,
873
+ "grad_norm": 21524.953125,
874
+ "learning_rate": 0.00021001926304696296,
875
+ "loss": 90.451,
876
+ "step": 117
877
+ },
878
+ {
879
+ "epoch": 0.07791671825347696,
880
+ "grad_norm": 13045.5537109375,
881
+ "learning_rate": 0.00020852655948908316,
882
+ "loss": 93.52,
883
+ "step": 118
884
+ },
885
+ {
886
+ "epoch": 0.07857702942511659,
887
+ "grad_norm": 18377.09375,
888
+ "learning_rate": 0.0002070269875799538,
889
+ "loss": 85.6482,
890
+ "step": 119
891
+ },
892
+ {
893
+ "epoch": 0.07923734059675622,
894
+ "grad_norm": 12025.564453125,
895
+ "learning_rate": 0.00020552072330098716,
896
+ "loss": 89.6598,
897
+ "step": 120
898
+ },
899
+ {
900
+ "epoch": 0.07923734059675622,
901
+ "eval_loss": 12.788580894470215,
902
+ "eval_runtime": 6.5873,
903
+ "eval_samples_per_second": 75.145,
904
+ "eval_steps_per_second": 75.145,
905
+ "step": 120
906
  }
907
  ],
908
  "logging_steps": 1,
 
922
  "attributes": {}
923
  }
924
  },
925
+ "total_flos": 81003622170624.0,
926
  "train_batch_size": 1,
927
  "trial_name": null,
928
  "trial_params": null