auxyus commited on
Commit
10b9218
·
verified ·
1 Parent(s): d79846f

Training in progress, step 99, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6eb2a7d1cd209b09e519aebd5c504a2bb79a6c247a725137edfba7e1054921c9
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1219f5894cdc662327e89dff0679318fa3da5e1878889f123cd2b73e79af67b
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:681afb8fbb04b1289b320a58f2525c41f018444605d64eca984a764854b64345
3
  size 85723284
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f428f6095473bd81a123b2f9d1618581ac1d7e2ecd929aaf93b581b053326f64
3
  size 85723284
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eaef23ad6b9358048cfa2eb81866d8f39693a1c7df686b619cd9d9ea3051d3a5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ca5ed7077b7e80b15b5b40bb67c00bb6aeb10626f97d2892620cc3f79d1da45
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90f7a57653bcfdeb8d3e27706e79cc5f6e1f14bbd0ff72a8e33edaaa89ef8274
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43444b0a2aaeda40531bf13a694e8a3eeeb489f81435e9521c012609a5be4dc4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.014322341083900628,
5
  "eval_steps": 9,
6
- "global_step": 81,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -276,6 +276,64 @@
276
  "eval_samples_per_second": 14.066,
277
  "eval_steps_per_second": 1.759,
278
  "step": 81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
  }
280
  ],
281
  "logging_steps": 3,
@@ -295,7 +353,7 @@
295
  "attributes": {}
296
  }
297
  },
298
- "total_flos": 1.1390710924600934e+17,
299
  "train_batch_size": 8,
300
  "trial_name": null,
301
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.017505083546989656,
5
  "eval_steps": 9,
6
+ "global_step": 99,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
276
  "eval_samples_per_second": 14.066,
277
  "eval_steps_per_second": 1.759,
278
  "step": 81
279
+ },
280
+ {
281
+ "epoch": 0.014852798161082132,
282
+ "grad_norm": 3.490804672241211,
283
+ "learning_rate": 7.597595192178702e-06,
284
+ "loss": 2.6716,
285
+ "step": 84
286
+ },
287
+ {
288
+ "epoch": 0.015383255238263636,
289
+ "grad_norm": 3.8302175998687744,
290
+ "learning_rate": 5.060297685041659e-06,
291
+ "loss": 2.7132,
292
+ "step": 87
293
+ },
294
+ {
295
+ "epoch": 0.015913712315445142,
296
+ "grad_norm": 4.030756950378418,
297
+ "learning_rate": 3.0153689607045845e-06,
298
+ "loss": 2.7684,
299
+ "step": 90
300
+ },
301
+ {
302
+ "epoch": 0.015913712315445142,
303
+ "eval_loss": 0.6902585625648499,
304
+ "eval_runtime": 677.2291,
305
+ "eval_samples_per_second": 14.065,
306
+ "eval_steps_per_second": 1.759,
307
+ "step": 90
308
+ },
309
+ {
310
+ "epoch": 0.016444169392626648,
311
+ "grad_norm": 3.537766218185425,
312
+ "learning_rate": 1.4852136862001764e-06,
313
+ "loss": 2.7386,
314
+ "step": 93
315
+ },
316
+ {
317
+ "epoch": 0.01697462646980815,
318
+ "grad_norm": 3.5246758460998535,
319
+ "learning_rate": 4.865965629214819e-07,
320
+ "loss": 2.8188,
321
+ "step": 96
322
+ },
323
+ {
324
+ "epoch": 0.017505083546989656,
325
+ "grad_norm": 3.6730434894561768,
326
+ "learning_rate": 3.04586490452119e-08,
327
+ "loss": 2.7731,
328
+ "step": 99
329
+ },
330
+ {
331
+ "epoch": 0.017505083546989656,
332
+ "eval_loss": 0.6894702911376953,
333
+ "eval_runtime": 676.8252,
334
+ "eval_samples_per_second": 14.073,
335
+ "eval_steps_per_second": 1.76,
336
+ "step": 99
337
  }
338
  ],
339
  "logging_steps": 3,
 
353
  "attributes": {}
354
  }
355
  },
356
+ "total_flos": 1.3921980018956698e+17,
357
  "train_batch_size": 8,
358
  "trial_name": null,
359
  "trial_params": null