eddysang commited on
Commit
4ab7dbb
·
verified ·
1 Parent(s): 18ed8a6

Training in progress, step 119, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:919cf08a7d7abf9880c22fb4dfe4b831d65927f7ae57ae5667378e28e06cd849
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:320fd95f92c74b30e44eea33ed497b690bc39a047f9687bb0de584f2694e20c8
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4138bcc19e199dc967bcd7384f55e319c96ad62e01d12d688ab9eccaa9ae5991
3
  size 671466706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbe1bbd4c0b2029d5651d4bfed85f41431ed9ee8d5961c5f019eed1914818a8c
3
  size 671466706
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df7161f83e2c1b5421911bfc287c46c4380aa9d1a9390b7f6bdd147d920abb38
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a76ce3b7ea5491c369f9a36901d24e79782cda5360c68d2970c580ab23ec498
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70670c442607259270e13afbef3aac28e38a58ddad6998414f76ed43ab7f41d4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4a2e0d38e4aad78961b54bb99f0a18c11d847ea1d3d3bc12cf223a3f862cf9e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.15017253278122844,
5
  "eval_steps": 50,
6
- "global_step": 102,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -745,6 +745,125 @@
745
  "learning_rate": 8.54379825720049e-05,
746
  "loss": 0.0527,
747
  "step": 102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
748
  }
749
  ],
750
  "logging_steps": 1,
@@ -764,7 +883,7 @@
764
  "attributes": {}
765
  }
766
  },
767
- "total_flos": 1.0779851033778586e+18,
768
  "train_batch_size": 2,
769
  "trial_name": null,
770
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.1752012882447665,
5
  "eval_steps": 50,
6
+ "global_step": 119,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
745
  "learning_rate": 8.54379825720049e-05,
746
  "loss": 0.0527,
747
  "step": 102
748
+ },
749
+ {
750
+ "epoch": 0.15164481251437772,
751
+ "grad_norm": 0.10324705392122269,
752
+ "learning_rate": 8.414020075538605e-05,
753
+ "loss": 0.0217,
754
+ "step": 103
755
+ },
756
+ {
757
+ "epoch": 0.15311709224752704,
758
+ "grad_norm": 0.11965189129114151,
759
+ "learning_rate": 8.2839634745074e-05,
760
+ "loss": 0.0518,
761
+ "step": 104
762
+ },
763
+ {
764
+ "epoch": 0.15458937198067632,
765
+ "grad_norm": 0.14544668793678284,
766
+ "learning_rate": 8.153668070607437e-05,
767
+ "loss": 0.0936,
768
+ "step": 105
769
+ },
770
+ {
771
+ "epoch": 0.15606165171382563,
772
+ "grad_norm": 0.11407126486301422,
773
+ "learning_rate": 8.023173553080938e-05,
774
+ "loss": 0.0279,
775
+ "step": 106
776
+ },
777
+ {
778
+ "epoch": 0.15753393144697492,
779
+ "grad_norm": 0.11697705090045929,
780
+ "learning_rate": 7.89251967182208e-05,
781
+ "loss": 0.044,
782
+ "step": 107
783
+ },
784
+ {
785
+ "epoch": 0.15900621118012423,
786
+ "grad_norm": 0.18119023740291595,
787
+ "learning_rate": 7.761746225268758e-05,
788
+ "loss": 0.1192,
789
+ "step": 108
790
+ },
791
+ {
792
+ "epoch": 0.1604784909132735,
793
+ "grad_norm": 0.19359427690505981,
794
+ "learning_rate": 7.630893048279627e-05,
795
+ "loss": 0.1415,
796
+ "step": 109
797
+ },
798
+ {
799
+ "epoch": 0.16195077064642283,
800
+ "grad_norm": 0.11051613837480545,
801
+ "learning_rate": 7.5e-05,
802
+ "loss": 0.0614,
803
+ "step": 110
804
+ },
805
+ {
806
+ "epoch": 0.1634230503795721,
807
+ "grad_norm": 0.11084026098251343,
808
+ "learning_rate": 7.369106951720373e-05,
809
+ "loss": 0.0506,
810
+ "step": 111
811
+ },
812
+ {
813
+ "epoch": 0.16489533011272142,
814
+ "grad_norm": 0.08058993518352509,
815
+ "learning_rate": 7.238253774731244e-05,
816
+ "loss": 0.0232,
817
+ "step": 112
818
+ },
819
+ {
820
+ "epoch": 0.1663676098458707,
821
+ "grad_norm": 0.11398748308420181,
822
+ "learning_rate": 7.10748032817792e-05,
823
+ "loss": 0.077,
824
+ "step": 113
825
+ },
826
+ {
827
+ "epoch": 0.16783988957902002,
828
+ "grad_norm": 0.10695286840200424,
829
+ "learning_rate": 6.976826446919059e-05,
830
+ "loss": 0.0586,
831
+ "step": 114
832
+ },
833
+ {
834
+ "epoch": 0.1693121693121693,
835
+ "grad_norm": 0.06323719769716263,
836
+ "learning_rate": 6.846331929392562e-05,
837
+ "loss": 0.0096,
838
+ "step": 115
839
+ },
840
+ {
841
+ "epoch": 0.17078444904531861,
842
+ "grad_norm": 0.09416350722312927,
843
+ "learning_rate": 6.7160365254926e-05,
844
+ "loss": 0.0552,
845
+ "step": 116
846
+ },
847
+ {
848
+ "epoch": 0.1722567287784679,
849
+ "grad_norm": 0.10175611078739166,
850
+ "learning_rate": 6.585979924461394e-05,
851
+ "loss": 0.0447,
852
+ "step": 117
853
+ },
854
+ {
855
+ "epoch": 0.1737290085116172,
856
+ "grad_norm": 0.0726943388581276,
857
+ "learning_rate": 6.45620174279951e-05,
858
+ "loss": 0.0183,
859
+ "step": 118
860
+ },
861
+ {
862
+ "epoch": 0.1752012882447665,
863
+ "grad_norm": 0.12314460426568985,
864
+ "learning_rate": 6.326741512198266e-05,
865
+ "loss": 0.0912,
866
+ "step": 119
867
  }
868
  ],
869
  "logging_steps": 1,
 
883
  "attributes": {}
884
  }
885
  },
886
+ "total_flos": 1.260198156319064e+18,
887
  "train_batch_size": 2,
888
  "trial_name": null,
889
  "trial_params": null