alchemist69 commited on
Commit
7138b9c
·
verified ·
1 Parent(s): d5aae12

Training in progress, step 169, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19c1114cf9de6f90bb1940716efc651b9605c6686d762ccf3b799f2811954900
3
  size 34456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f22aaec3cac7dbfc15d4391d11c107f762520e2a37f8f5c2d98b942859cfa265
3
  size 34456
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6288c9042eef800d7a76ae542ddc00508d162489808b760e57c411c54be14bbb
3
  size 73222
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:507707f766c154dbb2ac88fd1a5769dacf95d66b6840c7348b9c16e3b5637d22
3
  size 73222
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91d3d40ed55247e1860a6d2a1fe1407fda345f4df777e8940c311c2a22735bb4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f2e708e3d8d8f020284c0e4a1ec7c83f8e6b27b3fb83632b95fc16e110a981b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd6d06d8784fc6f54a8110cd39f154f94ee6f23e5166c1919457b2653a531f69
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72b7ae54645fe0d156a539f19196af82ec47c612b8d4b4a4f763b2303308dc68
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 11.5,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
- "epoch": 0.8875739644970414,
5
  "eval_steps": 150,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1073,6 +1073,139 @@
1073
  "eval_samples_per_second": 199.677,
1074
  "eval_steps_per_second": 50.445,
1075
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1076
  }
1077
  ],
1078
  "logging_steps": 1,
@@ -1096,12 +1229,12 @@
1096
  "should_evaluate": false,
1097
  "should_log": false,
1098
  "should_save": true,
1099
- "should_training_stop": false
1100
  },
1101
  "attributes": {}
1102
  }
1103
  },
1104
- "total_flos": 23656569176064.0,
1105
  "train_batch_size": 8,
1106
  "trial_name": null,
1107
  "trial_params": null
 
1
  {
2
  "best_metric": 11.5,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
+ "epoch": 1.0,
5
  "eval_steps": 150,
6
+ "global_step": 169,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1073
  "eval_samples_per_second": 199.677,
1074
  "eval_steps_per_second": 50.445,
1075
  "step": 150
1076
+ },
1077
+ {
1078
+ "epoch": 0.893491124260355,
1079
+ "grad_norm": 0.0005075408262200654,
1080
+ "learning_rate": 3.557894822359864e-06,
1081
+ "loss": 46.0,
1082
+ "step": 151
1083
+ },
1084
+ {
1085
+ "epoch": 0.8994082840236687,
1086
+ "grad_norm": 0.00039445646689273417,
1087
+ "learning_rate": 3.1776812947865385e-06,
1088
+ "loss": 46.0,
1089
+ "step": 152
1090
+ },
1091
+ {
1092
+ "epoch": 0.9053254437869822,
1093
+ "grad_norm": 0.00035393863799981773,
1094
+ "learning_rate": 2.8182821739164534e-06,
1095
+ "loss": 46.0,
1096
+ "step": 153
1097
+ },
1098
+ {
1099
+ "epoch": 0.9112426035502958,
1100
+ "grad_norm": 0.0005202327738516033,
1101
+ "learning_rate": 2.4798572271356846e-06,
1102
+ "loss": 46.0,
1103
+ "step": 154
1104
+ },
1105
+ {
1106
+ "epoch": 0.9171597633136095,
1107
+ "grad_norm": 0.0002673721464816481,
1108
+ "learning_rate": 2.1625568979651014e-06,
1109
+ "loss": 46.0,
1110
+ "step": 155
1111
+ },
1112
+ {
1113
+ "epoch": 0.9230769230769231,
1114
+ "grad_norm": 0.0005175816477276385,
1115
+ "learning_rate": 1.8665222391821169e-06,
1116
+ "loss": 46.0,
1117
+ "step": 156
1118
+ },
1119
+ {
1120
+ "epoch": 0.9289940828402367,
1121
+ "grad_norm": 0.0004950053407810628,
1122
+ "learning_rate": 1.5918848501170647e-06,
1123
+ "loss": 46.0,
1124
+ "step": 157
1125
+ },
1126
+ {
1127
+ "epoch": 0.9349112426035503,
1128
+ "grad_norm": 0.0003163626533932984,
1129
+ "learning_rate": 1.338766818151982e-06,
1130
+ "loss": 46.0,
1131
+ "step": 158
1132
+ },
1133
+ {
1134
+ "epoch": 0.9408284023668639,
1135
+ "grad_norm": 0.0004616921942215413,
1136
+ "learning_rate": 1.1072806644478739e-06,
1137
+ "loss": 46.0,
1138
+ "step": 159
1139
+ },
1140
+ {
1141
+ "epoch": 0.9467455621301775,
1142
+ "grad_norm": 0.0003856797411572188,
1143
+ "learning_rate": 8.975292939244928e-07,
1144
+ "loss": 46.0,
1145
+ "step": 160
1146
+ },
1147
+ {
1148
+ "epoch": 0.9526627218934911,
1149
+ "grad_norm": 0.0004804205091204494,
1150
+ "learning_rate": 7.096059495149854e-07,
1151
+ "loss": 46.0,
1152
+ "step": 161
1153
+ },
1154
+ {
1155
+ "epoch": 0.9585798816568047,
1156
+ "grad_norm": 0.0006341671105474234,
1157
+ "learning_rate": 5.435941707156389e-07,
1158
+ "loss": 46.0,
1159
+ "step": 162
1160
+ },
1161
+ {
1162
+ "epoch": 0.9644970414201184,
1163
+ "grad_norm": 0.0005707453237846494,
1164
+ "learning_rate": 3.9956775644920395e-07,
1165
+ "loss": 46.0,
1166
+ "step": 163
1167
+ },
1168
+ {
1169
+ "epoch": 0.9704142011834319,
1170
+ "grad_norm": 0.0003669565194286406,
1171
+ "learning_rate": 2.77590732258326e-07,
1172
+ "loss": 46.0,
1173
+ "step": 164
1174
+ },
1175
+ {
1176
+ "epoch": 0.9763313609467456,
1177
+ "grad_norm": 0.0006033536046743393,
1178
+ "learning_rate": 1.7771732184357904e-07,
1179
+ "loss": 46.0,
1180
+ "step": 165
1181
+ },
1182
+ {
1183
+ "epoch": 0.9822485207100592,
1184
+ "grad_norm": 0.00046527519589290023,
1185
+ "learning_rate": 9.999192295886972e-08,
1186
+ "loss": 46.0,
1187
+ "step": 166
1188
+ },
1189
+ {
1190
+ "epoch": 0.9881656804733728,
1191
+ "grad_norm": 0.0005233949050307274,
1192
+ "learning_rate": 4.4449087674847125e-08,
1193
+ "loss": 46.0,
1194
+ "step": 167
1195
+ },
1196
+ {
1197
+ "epoch": 0.9940828402366864,
1198
+ "grad_norm": 0.0007509227143600583,
1199
+ "learning_rate": 1.111350701909486e-08,
1200
+ "loss": 46.0,
1201
+ "step": 168
1202
+ },
1203
+ {
1204
+ "epoch": 1.0,
1205
+ "grad_norm": 0.00042899660184048116,
1206
+ "learning_rate": 0.0,
1207
+ "loss": 46.0,
1208
+ "step": 169
1209
  }
1210
  ],
1211
  "logging_steps": 1,
 
1229
  "should_evaluate": false,
1230
  "should_log": false,
1231
  "should_save": true,
1232
+ "should_training_stop": true
1233
  },
1234
  "attributes": {}
1235
  }
1236
  },
1237
+ "total_flos": 26545806065664.0,
1238
  "train_batch_size": 8,
1239
  "trial_name": null,
1240
  "trial_params": null