dixedus commited on
Commit
3ec1afe
·
verified ·
1 Parent(s): 6902ebb

Training in progress, step 153, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e476837c264c187d86e0fad575c9ce12c9e61fb9e5ad9867160b79e2ffd22dd1
3
  size 34793120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76166dd110f948403481a165ffeebc91d567372862f2190fa405fdccf5b75ac8
3
  size 34793120
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fbcd527c4a25607f123a9175463e52fecf4cbf19113043db3c3d264cf07577b9
3
  size 18132116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fe2a73336e970b01cddbe4a75cec8fbc38f1a771c15ecd6bb8a9c4d423c3af4
3
  size 18132116
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:691c06ffc8e47879d3e6ee441edbd180275d54de37278fc931ad6385e3470814
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a54b2066ddb168e0e8a2c8b49e5058dff33453160b388ac489494504a9cf08e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ffd0ef2a827b219b75915f5a88a30c53ebe86f536eec93a6252baab983329eb7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d99dc7a150ff6ed818c8735e9e9061e757b4b841b8d74bde2c9d7a2195ff136
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7503448275862069,
5
  "eval_steps": 17,
6
- "global_step": 136,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -394,6 +394,56 @@
394
  "eval_samples_per_second": 68.417,
395
  "eval_steps_per_second": 8.748,
396
  "step": 136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
  }
398
  ],
399
  "logging_steps": 3,
@@ -413,7 +463,7 @@
413
  "attributes": {}
414
  }
415
  },
416
- "total_flos": 8645118378639360.0,
417
  "train_batch_size": 8,
418
  "trial_name": null,
419
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8441379310344828,
5
  "eval_steps": 17,
6
+ "global_step": 153,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
394
  "eval_samples_per_second": 68.417,
395
  "eval_steps_per_second": 8.748,
396
  "step": 136
397
+ },
398
+ {
399
+ "epoch": 0.7613793103448275,
400
+ "grad_norm": 0.5550276637077332,
401
+ "learning_rate": 2.405152131093926e-05,
402
+ "loss": 2.0373,
403
+ "step": 138
404
+ },
405
+ {
406
+ "epoch": 0.7779310344827586,
407
+ "grad_norm": 0.5338502526283264,
408
+ "learning_rate": 2.196424713241637e-05,
409
+ "loss": 2.1343,
410
+ "step": 141
411
+ },
412
+ {
413
+ "epoch": 0.7944827586206896,
414
+ "grad_norm": 0.47092920541763306,
415
+ "learning_rate": 1.9945942635848748e-05,
416
+ "loss": 1.9688,
417
+ "step": 144
418
+ },
419
+ {
420
+ "epoch": 0.8110344827586207,
421
+ "grad_norm": 0.5254806876182556,
422
+ "learning_rate": 1.800157297483417e-05,
423
+ "loss": 2.0718,
424
+ "step": 147
425
+ },
426
+ {
427
+ "epoch": 0.8275862068965517,
428
+ "grad_norm": 0.5177105069160461,
429
+ "learning_rate": 1.6135921418712956e-05,
430
+ "loss": 2.188,
431
+ "step": 150
432
+ },
433
+ {
434
+ "epoch": 0.8441379310344828,
435
+ "grad_norm": 0.49027958512306213,
436
+ "learning_rate": 1.435357758543015e-05,
437
+ "loss": 2.1538,
438
+ "step": 153
439
+ },
440
+ {
441
+ "epoch": 0.8441379310344828,
442
+ "eval_loss": 2.107172966003418,
443
+ "eval_runtime": 4.4092,
444
+ "eval_samples_per_second": 69.174,
445
+ "eval_steps_per_second": 8.845,
446
+ "step": 153
447
  }
448
  ],
449
  "logging_steps": 3,
 
463
  "attributes": {}
464
  }
465
  },
466
+ "total_flos": 9725758175969280.0,
467
  "train_batch_size": 8,
468
  "trial_name": null,
469
  "trial_params": null