eddysang commited on
Commit
a89b718
·
verified ·
1 Parent(s): 68e6550

Training in progress, step 119, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75f8f94bb8b1b985d11e8f332c6c0710694f4b31be603b16301078e94b374122
3
  size 97728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b16313c79bd3f248bf4d438764be663bf73293a82f845bfc82bbdc1c96faa18
3
  size 97728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61ba349b29aac7c82f3a71d2d800915fc6c4987485e29e9dd6c73171fb5a3b0b
3
  size 212298
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8820d9b006cab186b61503299fdb9b87f1c8bf0451bfe4bbedc210c4bc63254a
3
  size 212298
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d57bfe85897bedf916779c7c1c13ce627cb70100032ab6a274d22be811e19abc
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0126cf7d989a7263b97f1fe2ca3d6bc2827ac39dc2b4674586229158dba72ea3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70670c442607259270e13afbef3aac28e38a58ddad6998414f76ed43ab7f41d4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4a2e0d38e4aad78961b54bb99f0a18c11d847ea1d3d3bc12cf223a3f862cf9e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.3455796717840127,
5
  "eval_steps": 50,
6
- "global_step": 102,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -745,6 +745,125 @@
745
  "learning_rate": 8.54379825720049e-05,
746
  "loss": 10.346,
747
  "step": 102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
748
  }
749
  ],
750
  "logging_steps": 1,
@@ -764,7 +883,7 @@
764
  "attributes": {}
765
  }
766
  },
767
- "total_flos": 43690245881856.0,
768
  "train_batch_size": 2,
769
  "trial_name": null,
770
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.40317628374801484,
5
  "eval_steps": 50,
6
+ "global_step": 119,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
745
  "learning_rate": 8.54379825720049e-05,
746
  "loss": 10.346,
747
  "step": 102
748
+ },
749
+ {
750
+ "epoch": 0.3489677077818952,
751
+ "grad_norm": 0.02064266800880432,
752
+ "learning_rate": 8.414020075538605e-05,
753
+ "loss": 10.3461,
754
+ "step": 103
755
+ },
756
+ {
757
+ "epoch": 0.3523557437797777,
758
+ "grad_norm": 0.023022258654236794,
759
+ "learning_rate": 8.2839634745074e-05,
760
+ "loss": 10.3463,
761
+ "step": 104
762
+ },
763
+ {
764
+ "epoch": 0.35574377977766014,
765
+ "grad_norm": 0.017432276159524918,
766
+ "learning_rate": 8.153668070607437e-05,
767
+ "loss": 10.3465,
768
+ "step": 105
769
+ },
770
+ {
771
+ "epoch": 0.3591318157755426,
772
+ "grad_norm": 0.01911778748035431,
773
+ "learning_rate": 8.023173553080938e-05,
774
+ "loss": 10.3459,
775
+ "step": 106
776
+ },
777
+ {
778
+ "epoch": 0.3625198517734251,
779
+ "grad_norm": 0.01831880584359169,
780
+ "learning_rate": 7.89251967182208e-05,
781
+ "loss": 10.3455,
782
+ "step": 107
783
+ },
784
+ {
785
+ "epoch": 0.36590788777130756,
786
+ "grad_norm": 0.018690194934606552,
787
+ "learning_rate": 7.761746225268758e-05,
788
+ "loss": 10.3471,
789
+ "step": 108
790
+ },
791
+ {
792
+ "epoch": 0.36929592376919007,
793
+ "grad_norm": 0.021015219390392303,
794
+ "learning_rate": 7.630893048279627e-05,
795
+ "loss": 10.3453,
796
+ "step": 109
797
+ },
798
+ {
799
+ "epoch": 0.3726839597670725,
800
+ "grad_norm": 0.01983080990612507,
801
+ "learning_rate": 7.5e-05,
802
+ "loss": 10.3462,
803
+ "step": 110
804
+ },
805
+ {
806
+ "epoch": 0.376071995764955,
807
+ "grad_norm": 0.021141625940799713,
808
+ "learning_rate": 7.369106951720373e-05,
809
+ "loss": 10.3457,
810
+ "step": 111
811
+ },
812
+ {
813
+ "epoch": 0.3794600317628375,
814
+ "grad_norm": 0.021793803200125694,
815
+ "learning_rate": 7.238253774731244e-05,
816
+ "loss": 10.3457,
817
+ "step": 112
818
+ },
819
+ {
820
+ "epoch": 0.38284806776071995,
821
+ "grad_norm": 0.019963612779974937,
822
+ "learning_rate": 7.10748032817792e-05,
823
+ "loss": 10.3463,
824
+ "step": 113
825
+ },
826
+ {
827
+ "epoch": 0.38623610375860246,
828
+ "grad_norm": 0.020090965554118156,
829
+ "learning_rate": 6.976826446919059e-05,
830
+ "loss": 10.3458,
831
+ "step": 114
832
+ },
833
+ {
834
+ "epoch": 0.3896241397564849,
835
+ "grad_norm": 0.02083776332437992,
836
+ "learning_rate": 6.846331929392562e-05,
837
+ "loss": 10.3469,
838
+ "step": 115
839
+ },
840
+ {
841
+ "epoch": 0.39301217575436737,
842
+ "grad_norm": 0.01785002276301384,
843
+ "learning_rate": 6.7160365254926e-05,
844
+ "loss": 10.3465,
845
+ "step": 116
846
+ },
847
+ {
848
+ "epoch": 0.3964002117522499,
849
+ "grad_norm": 0.02254386991262436,
850
+ "learning_rate": 6.585979924461394e-05,
851
+ "loss": 10.3452,
852
+ "step": 117
853
+ },
854
+ {
855
+ "epoch": 0.39978824775013233,
856
+ "grad_norm": 0.023428700864315033,
857
+ "learning_rate": 6.45620174279951e-05,
858
+ "loss": 10.3475,
859
+ "step": 118
860
+ },
861
+ {
862
+ "epoch": 0.40317628374801484,
863
+ "grad_norm": 0.01672559231519699,
864
+ "learning_rate": 6.326741512198266e-05,
865
+ "loss": 10.346,
866
+ "step": 119
867
  }
868
  ],
869
  "logging_steps": 1,
 
883
  "attributes": {}
884
  }
885
  },
886
+ "total_flos": 50987546050560.0,
887
  "train_batch_size": 2,
888
  "trial_name": null,
889
  "trial_params": null