eddysang commited on
Commit
abcbdfe
·
verified ·
1 Parent(s): 892f183

Training in progress, step 119, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:409506c2e743c0962472a80fbb6dcaf2cbc9c90ce5fb8329688b39a5b3cfa456
3
  size 640009682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7a0675b0ccff9d3ee1f89a5d3ec1b7f2360c88f112fca496ed1e53a1f95999b
3
  size 640009682
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:609ac74381de893b35193cf51e9a6fce221b22031433fbd3b97539f9bac6436f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c587490379a5a51115375645f2d5ea52b7a18285f8ceca78cb845b39b459b45e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70670c442607259270e13afbef3aac28e38a58ddad6998414f76ed43ab7f41d4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4a2e0d38e4aad78961b54bb99f0a18c11d847ea1d3d3bc12cf223a3f862cf9e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.11208021427099787,
5
  "eval_steps": 50,
6
- "global_step": 102,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -745,6 +745,125 @@
745
  "learning_rate": 8.54379825720049e-05,
746
  "loss": 0.0,
747
  "step": 102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
748
  }
749
  ],
750
  "logging_steps": 1,
@@ -764,7 +883,7 @@
764
  "attributes": {}
765
  }
766
  },
767
- "total_flos": 5.979003336114831e+17,
768
  "train_batch_size": 2,
769
  "trial_name": null,
770
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.13076024998283084,
5
  "eval_steps": 50,
6
+ "global_step": 119,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
745
  "learning_rate": 8.54379825720049e-05,
746
  "loss": 0.0,
747
  "step": 102
748
+ },
749
+ {
750
+ "epoch": 0.1131790399011057,
751
+ "grad_norm": NaN,
752
+ "learning_rate": 8.414020075538605e-05,
753
+ "loss": 0.0,
754
+ "step": 103
755
+ },
756
+ {
757
+ "epoch": 0.11427786553121351,
758
+ "grad_norm": NaN,
759
+ "learning_rate": 8.2839634745074e-05,
760
+ "loss": 0.0,
761
+ "step": 104
762
+ },
763
+ {
764
+ "epoch": 0.11537669116132133,
765
+ "grad_norm": NaN,
766
+ "learning_rate": 8.153668070607437e-05,
767
+ "loss": 0.0,
768
+ "step": 105
769
+ },
770
+ {
771
+ "epoch": 0.11647551679142916,
772
+ "grad_norm": NaN,
773
+ "learning_rate": 8.023173553080938e-05,
774
+ "loss": 0.0,
775
+ "step": 106
776
+ },
777
+ {
778
+ "epoch": 0.11757434242153698,
779
+ "grad_norm": NaN,
780
+ "learning_rate": 7.89251967182208e-05,
781
+ "loss": 0.0,
782
+ "step": 107
783
+ },
784
+ {
785
+ "epoch": 0.11867316805164481,
786
+ "grad_norm": NaN,
787
+ "learning_rate": 7.761746225268758e-05,
788
+ "loss": 0.0,
789
+ "step": 108
790
+ },
791
+ {
792
+ "epoch": 0.11977199368175263,
793
+ "grad_norm": NaN,
794
+ "learning_rate": 7.630893048279627e-05,
795
+ "loss": 0.0,
796
+ "step": 109
797
+ },
798
+ {
799
+ "epoch": 0.12087081931186044,
800
+ "grad_norm": NaN,
801
+ "learning_rate": 7.5e-05,
802
+ "loss": 0.0,
803
+ "step": 110
804
+ },
805
+ {
806
+ "epoch": 0.12196964494196827,
807
+ "grad_norm": NaN,
808
+ "learning_rate": 7.369106951720373e-05,
809
+ "loss": 0.0,
810
+ "step": 111
811
+ },
812
+ {
813
+ "epoch": 0.1230684705720761,
814
+ "grad_norm": NaN,
815
+ "learning_rate": 7.238253774731244e-05,
816
+ "loss": 0.0,
817
+ "step": 112
818
+ },
819
+ {
820
+ "epoch": 0.12416729620218392,
821
+ "grad_norm": NaN,
822
+ "learning_rate": 7.10748032817792e-05,
823
+ "loss": 0.0,
824
+ "step": 113
825
+ },
826
+ {
827
+ "epoch": 0.12526612183229174,
828
+ "grad_norm": NaN,
829
+ "learning_rate": 6.976826446919059e-05,
830
+ "loss": 0.0,
831
+ "step": 114
832
+ },
833
+ {
834
+ "epoch": 0.12636494746239957,
835
+ "grad_norm": NaN,
836
+ "learning_rate": 6.846331929392562e-05,
837
+ "loss": 0.0,
838
+ "step": 115
839
+ },
840
+ {
841
+ "epoch": 0.1274637730925074,
842
+ "grad_norm": NaN,
843
+ "learning_rate": 6.7160365254926e-05,
844
+ "loss": 0.0,
845
+ "step": 116
846
+ },
847
+ {
848
+ "epoch": 0.12856259872261522,
849
+ "grad_norm": NaN,
850
+ "learning_rate": 6.585979924461394e-05,
851
+ "loss": 0.0,
852
+ "step": 117
853
+ },
854
+ {
855
+ "epoch": 0.12966142435272301,
856
+ "grad_norm": NaN,
857
+ "learning_rate": 6.45620174279951e-05,
858
+ "loss": 0.0,
859
+ "step": 118
860
+ },
861
+ {
862
+ "epoch": 0.13076024998283084,
863
+ "grad_norm": NaN,
864
+ "learning_rate": 6.326741512198266e-05,
865
+ "loss": 0.0,
866
+ "step": 119
867
  }
868
  ],
869
  "logging_steps": 1,
 
883
  "attributes": {}
884
  }
885
  },
886
+ "total_flos": 6.974956063351112e+17,
887
  "train_batch_size": 2,
888
  "trial_name": null,
889
  "trial_params": null