eddysang commited on
Commit
240b5ca
·
verified ·
1 Parent(s): abcbdfe

Training in progress, step 136, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7a0675b0ccff9d3ee1f89a5d3ec1b7f2360c88f112fca496ed1e53a1f95999b
3
  size 640009682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d16dfef8d2f27178a4d1f4edab1fa39d16a60f03abb441761d33f24ee556f18e
3
  size 640009682
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c587490379a5a51115375645f2d5ea52b7a18285f8ceca78cb845b39b459b45e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29ed09c79b26c5877cb26e4ced9f9a10914f152b078ea4dd8bdd5fffdeeaad84
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4a2e0d38e4aad78961b54bb99f0a18c11d847ea1d3d3bc12cf223a3f862cf9e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6aeb1fbb5e964bbc83fa43b049054867ad1faca8f43271132d241ae074069d5d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.13076024998283084,
5
  "eval_steps": 50,
6
- "global_step": 119,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -864,6 +864,125 @@
864
  "learning_rate": 6.326741512198266e-05,
865
  "loss": 0.0,
866
  "step": 119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
867
  }
868
  ],
869
  "logging_steps": 1,
@@ -883,7 +1002,7 @@
883
  "attributes": {}
884
  }
885
  },
886
- "total_flos": 6.974956063351112e+17,
887
  "train_batch_size": 2,
888
  "trial_name": null,
889
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.14944028569466383,
5
  "eval_steps": 50,
6
+ "global_step": 136,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
864
  "learning_rate": 6.326741512198266e-05,
865
  "loss": 0.0,
866
  "step": 119
867
+ },
868
+ {
869
+ "epoch": 0.13185907561293866,
870
+ "grad_norm": NaN,
871
+ "learning_rate": 6.197638667498022e-05,
872
+ "loss": 0.0,
873
+ "step": 120
874
+ },
875
+ {
876
+ "epoch": 0.1329579012430465,
877
+ "grad_norm": NaN,
878
+ "learning_rate": 6.068932534675913e-05,
879
+ "loss": 0.0,
880
+ "step": 121
881
+ },
882
+ {
883
+ "epoch": 0.1340567268731543,
884
+ "grad_norm": NaN,
885
+ "learning_rate": 5.9406623188668055e-05,
886
+ "loss": 0.0,
887
+ "step": 122
888
+ },
889
+ {
890
+ "epoch": 0.13515555250326214,
891
+ "grad_norm": NaN,
892
+ "learning_rate": 5.812867092421013e-05,
893
+ "loss": 0.0,
894
+ "step": 123
895
+ },
896
+ {
897
+ "epoch": 0.13625437813336996,
898
+ "grad_norm": NaN,
899
+ "learning_rate": 5.685585783002493e-05,
900
+ "loss": 0.0,
901
+ "step": 124
902
+ },
903
+ {
904
+ "epoch": 0.1373532037634778,
905
+ "grad_norm": NaN,
906
+ "learning_rate": 5.558857161731093e-05,
907
+ "loss": 0.0,
908
+ "step": 125
909
+ },
910
+ {
911
+ "epoch": 0.1384520293935856,
912
+ "grad_norm": NaN,
913
+ "learning_rate": 5.4327198313725064e-05,
914
+ "loss": 0.0,
915
+ "step": 126
916
+ },
917
+ {
918
+ "epoch": 0.13955085502369344,
919
+ "grad_norm": NaN,
920
+ "learning_rate": 5.307212214579474e-05,
921
+ "loss": 0.0,
922
+ "step": 127
923
+ },
924
+ {
925
+ "epoch": 0.14064968065380126,
926
+ "grad_norm": NaN,
927
+ "learning_rate": 5.182372542187895e-05,
928
+ "loss": 0.0,
929
+ "step": 128
930
+ },
931
+ {
932
+ "epoch": 0.14174850628390906,
933
+ "grad_norm": NaN,
934
+ "learning_rate": 5.058238841571326e-05,
935
+ "loss": 0.0,
936
+ "step": 129
937
+ },
938
+ {
939
+ "epoch": 0.14284733191401688,
940
+ "grad_norm": NaN,
941
+ "learning_rate": 4.934848925057484e-05,
942
+ "loss": 0.0,
943
+ "step": 130
944
+ },
945
+ {
946
+ "epoch": 0.1439461575441247,
947
+ "grad_norm": NaN,
948
+ "learning_rate": 4.812240378410248e-05,
949
+ "loss": 0.0,
950
+ "step": 131
951
+ },
952
+ {
953
+ "epoch": 0.14504498317423253,
954
+ "grad_norm": NaN,
955
+ "learning_rate": 4.690450549380659e-05,
956
+ "loss": 0.0,
957
+ "step": 132
958
+ },
959
+ {
960
+ "epoch": 0.14614380880434036,
961
+ "grad_norm": NaN,
962
+ "learning_rate": 4.569516536330447e-05,
963
+ "loss": 0.0,
964
+ "step": 133
965
+ },
966
+ {
967
+ "epoch": 0.14724263443444818,
968
+ "grad_norm": NaN,
969
+ "learning_rate": 4.449475176931499e-05,
970
+ "loss": 0.0,
971
+ "step": 134
972
+ },
973
+ {
974
+ "epoch": 0.148341460064556,
975
+ "grad_norm": NaN,
976
+ "learning_rate": 4.3303630369447554e-05,
977
+ "loss": 0.0,
978
+ "step": 135
979
+ },
980
+ {
981
+ "epoch": 0.14944028569466383,
982
+ "grad_norm": NaN,
983
+ "learning_rate": 4.212216399081918e-05,
984
+ "loss": 0.0,
985
+ "step": 136
986
  }
987
  ],
988
  "logging_steps": 1,
 
1002
  "attributes": {}
1003
  }
1004
  },
1005
+ "total_flos": 7.965978331541668e+17,
1006
  "train_batch_size": 2,
1007
  "trial_name": null,
1008
  "trial_params": null