eddysang commited on
Commit
de0945d
·
verified ·
1 Parent(s): 35ef6c7

Training in progress, step 153, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5fcff8219bc1ba7c5267761e9d8e58b26e76bd77b4498ffe3eb4be61d378e5a
3
  size 97728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d95728915b1a394ce434bdab126026dc91a8c0cfe1828df436c3542e7b8ca363
3
  size 97728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f9cc90d01583459809b77feb1f6a0bc3c1c8d2e63c24f720751cb3d88d193fe
3
  size 212298
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc85074e0a2def2208651d7aade37ecfc07595bf637d37454f6fd978cc2b7550
3
  size 212298
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9288f3ba37e7c624f2b8517ced4aae32ee804313f3587dcfb02d5f13209458f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56d39fb12ea6f2c20be6b5f3a72a7252abad25cc5b58318a939a5ab64512566d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6aeb1fbb5e964bbc83fa43b049054867ad1faca8f43271132d241ae074069d5d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e76ff8614026ec7c5c2d9793615ca4e2f707e550ce0b5a4376af475431afe3f1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.46077289571201696,
5
  "eval_steps": 50,
6
- "global_step": 136,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -983,6 +983,133 @@
983
  "learning_rate": 4.212216399081918e-05,
984
  "loss": 10.3439,
985
  "step": 136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
986
  }
987
  ],
988
  "logging_steps": 1,
@@ -1002,7 +1129,7 @@
1002
  "attributes": {}
1003
  }
1004
  },
1005
- "total_flos": 58258116182016.0,
1006
  "train_batch_size": 2,
1007
  "trial_name": null,
1008
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5183695076760191,
5
  "eval_steps": 50,
6
+ "global_step": 153,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
983
  "learning_rate": 4.212216399081918e-05,
984
  "loss": 10.3439,
985
  "step": 136
986
+ },
987
+ {
988
+ "epoch": 0.4641609317098994,
989
+ "grad_norm": 0.026069054380059242,
990
+ "learning_rate": 4.095071251953399e-05,
991
+ "loss": 10.3461,
992
+ "step": 137
993
+ },
994
+ {
995
+ "epoch": 0.46754896770778187,
996
+ "grad_norm": 0.020276112481951714,
997
+ "learning_rate": 3.978963279105821e-05,
998
+ "loss": 10.3445,
999
+ "step": 138
1000
+ },
1001
+ {
1002
+ "epoch": 0.4709370037056644,
1003
+ "grad_norm": 0.021585950627923012,
1004
+ "learning_rate": 3.863927848152472e-05,
1005
+ "loss": 10.3442,
1006
+ "step": 139
1007
+ },
1008
+ {
1009
+ "epoch": 0.47432503970354684,
1010
+ "grad_norm": 0.025700606405735016,
1011
+ "learning_rate": 3.750000000000001e-05,
1012
+ "loss": 10.3458,
1013
+ "step": 140
1014
+ },
1015
+ {
1016
+ "epoch": 0.47771307570142935,
1017
+ "grad_norm": 0.020632240921258926,
1018
+ "learning_rate": 3.637214438174593e-05,
1019
+ "loss": 10.3446,
1020
+ "step": 141
1021
+ },
1022
+ {
1023
+ "epoch": 0.4811011116993118,
1024
+ "grad_norm": 0.020391074940562248,
1025
+ "learning_rate": 3.525605518250964e-05,
1026
+ "loss": 10.3442,
1027
+ "step": 142
1028
+ },
1029
+ {
1030
+ "epoch": 0.48448914769719426,
1031
+ "grad_norm": 0.021430406719446182,
1032
+ "learning_rate": 3.415207237387297e-05,
1033
+ "loss": 10.3444,
1034
+ "step": 143
1035
+ },
1036
+ {
1037
+ "epoch": 0.48787718369507677,
1038
+ "grad_norm": 0.01741032488644123,
1039
+ "learning_rate": 3.3060532239693994e-05,
1040
+ "loss": 10.3448,
1041
+ "step": 144
1042
+ },
1043
+ {
1044
+ "epoch": 0.4912652196929592,
1045
+ "grad_norm": 0.026816904544830322,
1046
+ "learning_rate": 3.198176727367156e-05,
1047
+ "loss": 10.3467,
1048
+ "step": 145
1049
+ },
1050
+ {
1051
+ "epoch": 0.49465325569084173,
1052
+ "grad_norm": 0.0245627723634243,
1053
+ "learning_rate": 3.091610607806452e-05,
1054
+ "loss": 10.3448,
1055
+ "step": 146
1056
+ },
1057
+ {
1058
+ "epoch": 0.4980412916887242,
1059
+ "grad_norm": 0.024026039987802505,
1060
+ "learning_rate": 2.986387326359637e-05,
1061
+ "loss": 10.3459,
1062
+ "step": 147
1063
+ },
1064
+ {
1065
+ "epoch": 0.5014293276866066,
1066
+ "grad_norm": 0.023174704983830452,
1067
+ "learning_rate": 2.8825389350575624e-05,
1068
+ "loss": 10.3454,
1069
+ "step": 148
1070
+ },
1071
+ {
1072
+ "epoch": 0.5048173636844892,
1073
+ "grad_norm": 0.029328398406505585,
1074
+ "learning_rate": 2.78009706712622e-05,
1075
+ "loss": 10.3446,
1076
+ "step": 149
1077
+ },
1078
+ {
1079
+ "epoch": 0.5082053996823717,
1080
+ "grad_norm": 0.021034657955169678,
1081
+ "learning_rate": 2.6790929273509545e-05,
1082
+ "loss": 10.344,
1083
+ "step": 150
1084
+ },
1085
+ {
1086
+ "epoch": 0.5082053996823717,
1087
+ "eval_loss": 10.344643592834473,
1088
+ "eval_runtime": 3.8713,
1089
+ "eval_samples_per_second": 257.02,
1090
+ "eval_steps_per_second": 128.639,
1091
+ "step": 150
1092
+ },
1093
+ {
1094
+ "epoch": 0.5115934356802541,
1095
+ "grad_norm": 0.02339405193924904,
1096
+ "learning_rate": 2.579557282571196e-05,
1097
+ "loss": 10.3449,
1098
+ "step": 151
1099
+ },
1100
+ {
1101
+ "epoch": 0.5149814716781366,
1102
+ "grad_norm": 0.02014802023768425,
1103
+ "learning_rate": 2.4815204523085654e-05,
1104
+ "loss": 10.3447,
1105
+ "step": 152
1106
+ },
1107
+ {
1108
+ "epoch": 0.5183695076760191,
1109
+ "grad_norm": 0.02366970293223858,
1110
+ "learning_rate": 2.385012299531262e-05,
1111
+ "loss": 10.3456,
1112
+ "step": 153
1113
  }
1114
  ],
1115
  "logging_steps": 1,
 
1129
  "attributes": {}
1130
  }
1131
  },
1132
+ "total_flos": 65528686313472.0,
1133
  "train_batch_size": 2,
1134
  "trial_name": null,
1135
  "trial_params": null