leixa commited on
Commit
0b18c8a
·
verified ·
1 Parent(s): 0a0a34c

Training in progress, step 102, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43bb5a29d334edbc3d32a698dd4e9aff97d5f6d6e815b2aaf90cb0cc29849d64
3
  size 692136856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f48e5e504da172c01e6ee3da031e8a9cae7d2213f5cb8705cf8cc99dc402c40
3
  size 692136856
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5eb8e128771e16ec08f4fe017fd8d46665043ad4ca09b570f14b1e8bb18dabf2
3
  size 85723284
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edaa271176ce862e004df1f0dc773f6ce9648c87c722df370b11c98cad47b292
3
  size 85723284
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8aa42479a448d19a558a303ea6bb44fbbe582f6a25a171397769feee5dda3588
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b472766fb64a3faca824302324f58abb5e35dea39174d7a492c6d7c52b86b41c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57465ffa9dc280f2ea6034fe61064b0208bf36c7b5f569218c0e1296778ee099
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:baea37709b071cdabc5712824dc3f783ecd30d18a1e0d85c4d56cfebfa22b7e0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.004581901489117984,
5
  "eval_steps": 34,
6
- "global_step": 68,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -185,6 +185,98 @@
185
  "eval_samples_per_second": 14.03,
186
  "eval_steps_per_second": 1.754,
187
  "step": 68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  }
189
  ],
190
  "logging_steps": 3,
@@ -204,7 +296,7 @@
204
  "attributes": {}
205
  }
206
  },
207
- "total_flos": 9.56256665814958e+16,
208
  "train_batch_size": 8,
209
  "trial_name": null,
210
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.006872852233676976,
5
  "eval_steps": 34,
6
+ "global_step": 102,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
185
  "eval_samples_per_second": 14.03,
186
  "eval_steps_per_second": 1.754,
187
  "step": 68
188
+ },
189
+ {
190
+ "epoch": 0.004649282393369719,
191
+ "grad_norm": 0.7278714776039124,
192
+ "learning_rate": 4.7229269307953235e-05,
193
+ "loss": 0.741,
194
+ "step": 69
195
+ },
196
+ {
197
+ "epoch": 0.004851425106124924,
198
+ "grad_norm": 0.9090484380722046,
199
+ "learning_rate": 4.694636022946012e-05,
200
+ "loss": 0.8075,
201
+ "step": 72
202
+ },
203
+ {
204
+ "epoch": 0.00505356781888013,
205
+ "grad_norm": 0.8673194050788879,
206
+ "learning_rate": 4.665063509461097e-05,
207
+ "loss": 0.8395,
208
+ "step": 75
209
+ },
210
+ {
211
+ "epoch": 0.005255710531635335,
212
+ "grad_norm": 0.7307804822921753,
213
+ "learning_rate": 4.6342266598556814e-05,
214
+ "loss": 0.7995,
215
+ "step": 78
216
+ },
217
+ {
218
+ "epoch": 0.005457853244390539,
219
+ "grad_norm": 0.9184255003929138,
220
+ "learning_rate": 4.6021434819815555e-05,
221
+ "loss": 0.8318,
222
+ "step": 81
223
+ },
224
+ {
225
+ "epoch": 0.005659995957145745,
226
+ "grad_norm": 0.7924419045448303,
227
+ "learning_rate": 4.568832711511125e-05,
228
+ "loss": 0.8095,
229
+ "step": 84
230
+ },
231
+ {
232
+ "epoch": 0.00586213866990095,
233
+ "grad_norm": 0.7330142259597778,
234
+ "learning_rate": 4.534313800996299e-05,
235
+ "loss": 0.7674,
236
+ "step": 87
237
+ },
238
+ {
239
+ "epoch": 0.006064281382656155,
240
+ "grad_norm": 0.7591224908828735,
241
+ "learning_rate": 4.498606908508754e-05,
242
+ "loss": 0.8409,
243
+ "step": 90
244
+ },
245
+ {
246
+ "epoch": 0.006266424095411361,
247
+ "grad_norm": 0.7741730213165283,
248
+ "learning_rate": 4.46173288586818e-05,
249
+ "loss": 0.8541,
250
+ "step": 93
251
+ },
252
+ {
253
+ "epoch": 0.006468566808166565,
254
+ "grad_norm": 0.7202086448669434,
255
+ "learning_rate": 4.4237132664654154e-05,
256
+ "loss": 0.85,
257
+ "step": 96
258
+ },
259
+ {
260
+ "epoch": 0.0066707095209217705,
261
+ "grad_norm": 0.7738878726959229,
262
+ "learning_rate": 4.384570252687542e-05,
263
+ "loss": 0.8571,
264
+ "step": 99
265
+ },
266
+ {
267
+ "epoch": 0.006872852233676976,
268
+ "grad_norm": 0.7431773543357849,
269
+ "learning_rate": 4.344326702952326e-05,
270
+ "loss": 0.8264,
271
+ "step": 102
272
+ },
273
+ {
274
+ "epoch": 0.006872852233676976,
275
+ "eval_loss": 0.8239989280700684,
276
+ "eval_runtime": 1781.4353,
277
+ "eval_samples_per_second": 14.031,
278
+ "eval_steps_per_second": 1.754,
279
+ "step": 102
280
  }
281
  ],
282
  "logging_steps": 3,
 
296
  "attributes": {}
297
  }
298
  },
299
+ "total_flos": 1.4343849987224371e+17,
300
  "train_batch_size": 8,
301
  "trial_name": null,
302
  "trial_params": null