eddysang commited on
Commit
c618736
·
verified ·
1 Parent(s): f2b4772

Training in progress, step 153, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79c87b15f36b81c5771744e3848e2cdd7e46e29d4a7555f5c16cee7aed4f179f
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b745462f26110d8f5e6c0bcef91055e14492965f4fda21faf63549fad0fcc448
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90b4b10f5005e7a8b6f4ef7581dc4126c0128527231b5202113db9bfacecb9be
3
  size 671466706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aedeeacab12f5e39f365b4749f7ef795c5b68cd1bab0b91369367d075cadfc66
3
  size 671466706
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c985768cd88156b279865b4af398ed65c42e2eb474c28476bc122fd2d648fd1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afb210574debeca01beef086494f1b0da6d8ee8853b3831ec7094b554157981c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6aeb1fbb5e964bbc83fa43b049054867ad1faca8f43271132d241ae074069d5d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e76ff8614026ec7c5c2d9793615ca4e2f707e550ce0b5a4376af475431afe3f1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6302679217958002,
5
  "eval_steps": 50,
6
- "global_step": 136,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -983,6 +983,133 @@
983
  "learning_rate": 4.212216399081918e-05,
984
  "loss": 0.2116,
985
  "step": 136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
986
  }
987
  ],
988
  "logging_steps": 1,
@@ -1002,7 +1129,7 @@
1002
  "attributes": {}
1003
  }
1004
  },
1005
- "total_flos": 8.116593719550935e+17,
1006
  "train_batch_size": 2,
1007
  "trial_name": null,
1008
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7090514120202752,
5
  "eval_steps": 50,
6
+ "global_step": 153,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
983
  "learning_rate": 4.212216399081918e-05,
984
  "loss": 0.2116,
985
  "step": 136
986
+ },
987
+ {
988
+ "epoch": 0.634902244750181,
989
+ "grad_norm": 1.092250943183899,
990
+ "learning_rate": 4.095071251953399e-05,
991
+ "loss": 0.2411,
992
+ "step": 137
993
+ },
994
+ {
995
+ "epoch": 0.6395365677045619,
996
+ "grad_norm": 1.3056756258010864,
997
+ "learning_rate": 3.978963279105821e-05,
998
+ "loss": 0.3435,
999
+ "step": 138
1000
+ },
1001
+ {
1002
+ "epoch": 0.6441708906589428,
1003
+ "grad_norm": 1.1005077362060547,
1004
+ "learning_rate": 3.863927848152472e-05,
1005
+ "loss": 0.2612,
1006
+ "step": 139
1007
+ },
1008
+ {
1009
+ "epoch": 0.6488052136133237,
1010
+ "grad_norm": 1.237870454788208,
1011
+ "learning_rate": 3.750000000000001e-05,
1012
+ "loss": 0.3284,
1013
+ "step": 140
1014
+ },
1015
+ {
1016
+ "epoch": 0.6534395365677046,
1017
+ "grad_norm": 1.3307782411575317,
1018
+ "learning_rate": 3.637214438174593e-05,
1019
+ "loss": 0.2536,
1020
+ "step": 141
1021
+ },
1022
+ {
1023
+ "epoch": 0.6580738595220854,
1024
+ "grad_norm": 1.4346413612365723,
1025
+ "learning_rate": 3.525605518250964e-05,
1026
+ "loss": 0.2911,
1027
+ "step": 142
1028
+ },
1029
+ {
1030
+ "epoch": 0.6627081824764663,
1031
+ "grad_norm": 1.2083615064620972,
1032
+ "learning_rate": 3.415207237387297e-05,
1033
+ "loss": 0.233,
1034
+ "step": 143
1035
+ },
1036
+ {
1037
+ "epoch": 0.6673425054308472,
1038
+ "grad_norm": 1.4748581647872925,
1039
+ "learning_rate": 3.3060532239693994e-05,
1040
+ "loss": 0.319,
1041
+ "step": 144
1042
+ },
1043
+ {
1044
+ "epoch": 0.6719768283852281,
1045
+ "grad_norm": 1.2144207954406738,
1046
+ "learning_rate": 3.198176727367156e-05,
1047
+ "loss": 0.1959,
1048
+ "step": 145
1049
+ },
1050
+ {
1051
+ "epoch": 0.676611151339609,
1052
+ "grad_norm": 1.5745162963867188,
1053
+ "learning_rate": 3.091610607806452e-05,
1054
+ "loss": 0.3077,
1055
+ "step": 146
1056
+ },
1057
+ {
1058
+ "epoch": 0.6812454742939898,
1059
+ "grad_norm": 1.1295483112335205,
1060
+ "learning_rate": 2.986387326359637e-05,
1061
+ "loss": 0.2328,
1062
+ "step": 147
1063
+ },
1064
+ {
1065
+ "epoch": 0.6858797972483708,
1066
+ "grad_norm": 1.218430757522583,
1067
+ "learning_rate": 2.8825389350575624e-05,
1068
+ "loss": 0.2504,
1069
+ "step": 148
1070
+ },
1071
+ {
1072
+ "epoch": 0.6905141202027516,
1073
+ "grad_norm": 1.1782724857330322,
1074
+ "learning_rate": 2.78009706712622e-05,
1075
+ "loss": 0.2519,
1076
+ "step": 149
1077
+ },
1078
+ {
1079
+ "epoch": 0.6951484431571325,
1080
+ "grad_norm": 1.3294053077697754,
1081
+ "learning_rate": 2.6790929273509545e-05,
1082
+ "loss": 0.249,
1083
+ "step": 150
1084
+ },
1085
+ {
1086
+ "epoch": 0.6951484431571325,
1087
+ "eval_loss": 0.25855064392089844,
1088
+ "eval_runtime": 135.0303,
1089
+ "eval_samples_per_second": 5.384,
1090
+ "eval_steps_per_second": 2.696,
1091
+ "step": 150
1092
+ },
1093
+ {
1094
+ "epoch": 0.6997827661115134,
1095
+ "grad_norm": 0.8559562563896179,
1096
+ "learning_rate": 2.579557282571196e-05,
1097
+ "loss": 0.1331,
1098
+ "step": 151
1099
+ },
1100
+ {
1101
+ "epoch": 0.7044170890658943,
1102
+ "grad_norm": 1.5178470611572266,
1103
+ "learning_rate": 2.4815204523085654e-05,
1104
+ "loss": 0.3093,
1105
+ "step": 152
1106
+ },
1107
+ {
1108
+ "epoch": 0.7090514120202752,
1109
+ "grad_norm": 1.3319201469421387,
1110
+ "learning_rate": 2.385012299531262e-05,
1111
+ "loss": 0.3123,
1112
+ "step": 153
1113
  }
1114
  ],
1115
  "logging_steps": 1,
 
1129
  "attributes": {}
1130
  }
1131
  },
1132
+ "total_flos": 9.131167934494802e+17,
1133
  "train_batch_size": 2,
1134
  "trial_name": null,
1135
  "trial_params": null