dixedus commited on
Commit
fa6ea34
·
verified ·
1 Parent(s): 0bc36c0

Training in progress, step 136, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:246bf8076c518304bac7d2e4a017f4c3112377a1aa2dc942314ee72e66aad1c1
3
  size 34793120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e476837c264c187d86e0fad575c9ce12c9e61fb9e5ad9867160b79e2ffd22dd1
3
  size 34793120
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5b03ad2888d0d966800d8a482ae7057a420d358a09e6227fbf94fc5b3dc03c3
3
  size 18132116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbcd527c4a25607f123a9175463e52fecf4cbf19113043db3c3d264cf07577b9
3
  size 18132116
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51b20048a8c1ca3b5378d455de4604d2f5e3f3bf98e06ac306e6b0abcffe9c8c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:691c06ffc8e47879d3e6ee441edbd180275d54de37278fc931ad6385e3470814
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c91934808157be4b4581cbac88c1dcb8ab73e7092f7b8aa05c4fbac8ab77615f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffd0ef2a827b219b75915f5a88a30c53ebe86f536eec93a6252baab983329eb7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6565517241379311,
5
  "eval_steps": 17,
6
- "global_step": 119,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -344,6 +344,56 @@
344
  "eval_samples_per_second": 68.518,
345
  "eval_steps_per_second": 8.761,
346
  "step": 119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
  }
348
  ],
349
  "logging_steps": 3,
@@ -363,7 +413,7 @@
363
  "attributes": {}
364
  }
365
  },
366
- "total_flos": 7564478581309440.0,
367
  "train_batch_size": 8,
368
  "trial_name": null,
369
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7503448275862069,
5
  "eval_steps": 17,
6
+ "global_step": 136,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
344
  "eval_samples_per_second": 68.518,
345
  "eval_steps_per_second": 8.761,
346
  "step": 119
347
+ },
348
+ {
349
+ "epoch": 0.6620689655172414,
350
+ "grad_norm": 0.5721688270568848,
351
+ "learning_rate": 3.772572564296005e-05,
352
+ "loss": 2.0894,
353
+ "step": 120
354
+ },
355
+ {
356
+ "epoch": 0.6786206896551724,
357
+ "grad_norm": 0.5939153432846069,
358
+ "learning_rate": 3.533749813077677e-05,
359
+ "loss": 2.0367,
360
+ "step": 123
361
+ },
362
+ {
363
+ "epoch": 0.6951724137931035,
364
+ "grad_norm": 0.5539590716362,
365
+ "learning_rate": 3.298534127791785e-05,
366
+ "loss": 2.1166,
367
+ "step": 126
368
+ },
369
+ {
370
+ "epoch": 0.7117241379310345,
371
+ "grad_norm": 0.5420939326286316,
372
+ "learning_rate": 3.0675041535377405e-05,
373
+ "loss": 1.9757,
374
+ "step": 129
375
+ },
376
+ {
377
+ "epoch": 0.7282758620689656,
378
+ "grad_norm": 0.5419110059738159,
379
+ "learning_rate": 2.8412282383075363e-05,
380
+ "loss": 1.9603,
381
+ "step": 132
382
+ },
383
+ {
384
+ "epoch": 0.7448275862068966,
385
+ "grad_norm": 0.5101600289344788,
386
+ "learning_rate": 2.6202630348146324e-05,
387
+ "loss": 1.9635,
388
+ "step": 135
389
+ },
390
+ {
391
+ "epoch": 0.7503448275862069,
392
+ "eval_loss": 2.1131086349487305,
393
+ "eval_runtime": 4.4579,
394
+ "eval_samples_per_second": 68.417,
395
+ "eval_steps_per_second": 8.748,
396
+ "step": 136
397
  }
398
  ],
399
  "logging_steps": 3,
 
413
  "attributes": {}
414
  }
415
  },
416
+ "total_flos": 8645118378639360.0,
417
  "train_batch_size": 8,
418
  "trial_name": null,
419
  "trial_params": null