eddysang commited on
Commit
e858296
·
verified ·
1 Parent(s): 240b5ca

Training in progress, step 153, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d16dfef8d2f27178a4d1f4edab1fa39d16a60f03abb441761d33f24ee556f18e
3
  size 640009682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b06fefafb99b72eb9a89a4f22d4957163eefd15702bb61db525be3f17d1edf9c
3
  size 640009682
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29ed09c79b26c5877cb26e4ced9f9a10914f152b078ea4dd8bdd5fffdeeaad84
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fcdf6ad7bcb0673e059b6db1cf40c1b430a276d46df182e42ba7b0335dbf78a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6aeb1fbb5e964bbc83fa43b049054867ad1faca8f43271132d241ae074069d5d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e76ff8614026ec7c5c2d9793615ca4e2f707e550ce0b5a4376af475431afe3f1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.14944028569466383,
5
  "eval_steps": 50,
6
- "global_step": 136,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -983,6 +983,133 @@
983
  "learning_rate": 4.212216399081918e-05,
984
  "loss": 0.0,
985
  "step": 136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
986
  }
987
  ],
988
  "logging_steps": 1,
@@ -1002,7 +1129,7 @@
1002
  "attributes": {}
1003
  }
1004
  },
1005
- "total_flos": 7.965978331541668e+17,
1006
  "train_batch_size": 2,
1007
  "trial_name": null,
1008
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.1681203214064968,
5
  "eval_steps": 50,
6
+ "global_step": 153,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
983
  "learning_rate": 4.212216399081918e-05,
984
  "loss": 0.0,
985
  "step": 136
986
+ },
987
+ {
988
+ "epoch": 0.15053911132477166,
989
+ "grad_norm": NaN,
990
+ "learning_rate": 4.095071251953399e-05,
991
+ "loss": 0.0,
992
+ "step": 137
993
+ },
994
+ {
995
+ "epoch": 0.15163793695487948,
996
+ "grad_norm": NaN,
997
+ "learning_rate": 3.978963279105821e-05,
998
+ "loss": 0.0,
999
+ "step": 138
1000
+ },
1001
+ {
1002
+ "epoch": 0.1527367625849873,
1003
+ "grad_norm": NaN,
1004
+ "learning_rate": 3.863927848152472e-05,
1005
+ "loss": 0.0,
1006
+ "step": 139
1007
+ },
1008
+ {
1009
+ "epoch": 0.1538355882150951,
1010
+ "grad_norm": NaN,
1011
+ "learning_rate": 3.750000000000001e-05,
1012
+ "loss": 0.0,
1013
+ "step": 140
1014
+ },
1015
+ {
1016
+ "epoch": 0.15493441384520293,
1017
+ "grad_norm": NaN,
1018
+ "learning_rate": 3.637214438174593e-05,
1019
+ "loss": 0.0,
1020
+ "step": 141
1021
+ },
1022
+ {
1023
+ "epoch": 0.15603323947531075,
1024
+ "grad_norm": NaN,
1025
+ "learning_rate": 3.525605518250964e-05,
1026
+ "loss": 0.0,
1027
+ "step": 142
1028
+ },
1029
+ {
1030
+ "epoch": 0.15713206510541858,
1031
+ "grad_norm": NaN,
1032
+ "learning_rate": 3.415207237387297e-05,
1033
+ "loss": 0.0,
1034
+ "step": 143
1035
+ },
1036
+ {
1037
+ "epoch": 0.1582308907355264,
1038
+ "grad_norm": NaN,
1039
+ "learning_rate": 3.3060532239693994e-05,
1040
+ "loss": 0.0,
1041
+ "step": 144
1042
+ },
1043
+ {
1044
+ "epoch": 0.15932971636563423,
1045
+ "grad_norm": NaN,
1046
+ "learning_rate": 3.198176727367156e-05,
1047
+ "loss": 0.0,
1048
+ "step": 145
1049
+ },
1050
+ {
1051
+ "epoch": 0.16042854199574205,
1052
+ "grad_norm": NaN,
1053
+ "learning_rate": 3.091610607806452e-05,
1054
+ "loss": 0.0,
1055
+ "step": 146
1056
+ },
1057
+ {
1058
+ "epoch": 0.16152736762584988,
1059
+ "grad_norm": NaN,
1060
+ "learning_rate": 2.986387326359637e-05,
1061
+ "loss": 0.0,
1062
+ "step": 147
1063
+ },
1064
+ {
1065
+ "epoch": 0.1626261932559577,
1066
+ "grad_norm": NaN,
1067
+ "learning_rate": 2.8825389350575624e-05,
1068
+ "loss": 0.0,
1069
+ "step": 148
1070
+ },
1071
+ {
1072
+ "epoch": 0.16372501888606553,
1073
+ "grad_norm": NaN,
1074
+ "learning_rate": 2.78009706712622e-05,
1075
+ "loss": 0.0,
1076
+ "step": 149
1077
+ },
1078
+ {
1079
+ "epoch": 0.16482384451617335,
1080
+ "grad_norm": NaN,
1081
+ "learning_rate": 2.6790929273509545e-05,
1082
+ "loss": 0.0,
1083
+ "step": 150
1084
+ },
1085
+ {
1086
+ "epoch": 0.16482384451617335,
1087
+ "eval_loss": NaN,
1088
+ "eval_runtime": 585.8565,
1089
+ "eval_samples_per_second": 5.233,
1090
+ "eval_steps_per_second": 2.617,
1091
+ "step": 150
1092
+ },
1093
+ {
1094
+ "epoch": 0.16592267014628115,
1095
+ "grad_norm": NaN,
1096
+ "learning_rate": 2.579557282571196e-05,
1097
+ "loss": 0.0,
1098
+ "step": 151
1099
+ },
1100
+ {
1101
+ "epoch": 0.16702149577638897,
1102
+ "grad_norm": NaN,
1103
+ "learning_rate": 2.4815204523085654e-05,
1104
+ "loss": 0.0,
1105
+ "step": 152
1106
+ },
1107
+ {
1108
+ "epoch": 0.1681203214064968,
1109
+ "grad_norm": NaN,
1110
+ "learning_rate": 2.385012299531262e-05,
1111
+ "loss": 0.0,
1112
+ "step": 153
1113
  }
1114
  ],
1115
  "logging_steps": 1,
 
1129
  "attributes": {}
1130
  }
1131
  },
1132
+ "total_flos": 8.953713627035075e+17,
1133
  "train_batch_size": 2,
1134
  "trial_name": null,
1135
  "trial_params": null