leixa commited on
Commit
ec8d69a
·
verified ·
1 Parent(s): 0f39cd7

Training in progress, step 272, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f9fbcddc5470d728295fa3156e8cc8e85bdb3e075c721f1a6f3de138e6afd3f
3
  size 692136856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88a03878f231ac4f2358b60bf20d360b61603d0bb56e3e2f9850289b513afff8
3
  size 692136856
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6828c635537ff06d969e1f45a0902d6456cd737e4146dcf425fbba25e04bed82
3
- size 85723284
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fff1faaf9379ceb848ff70a702d34c2578307cb69867f7f0f71e26ac5eb84b0
3
+ size 85723732
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:872fb8e64e364c7f25bf0433e726bc19c70aafc58093b42c433869870b0988fd
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57f4bf658c60e52b9df3eae773aa32b69c9f388c2f92794862084b3c8ac4cabd
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbcef9424696e41c7961bd91f0570d39d59ef33af28ed19a0eb9e4f50ed1b09a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3520b6e9bfde48b403dd6f4096e526132e910f4d92bd802fb2e831d46f8ad41f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.016036655211912942,
5
  "eval_steps": 34,
6
- "global_step": 238,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -624,6 +624,91 @@
624
  "eval_samples_per_second": 14.027,
625
  "eval_steps_per_second": 1.754,
626
  "step": 238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
627
  }
628
  ],
629
  "logging_steps": 3,
@@ -643,7 +728,7 @@
643
  "attributes": {}
644
  }
645
  },
646
- "total_flos": 3.346898330352353e+17,
647
  "train_batch_size": 8,
648
  "trial_name": null,
649
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.018327605956471937,
5
  "eval_steps": 34,
6
+ "global_step": 272,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
624
  "eval_samples_per_second": 14.027,
625
  "eval_steps_per_second": 1.754,
626
  "step": 238
627
+ },
628
+ {
629
+ "epoch": 0.016171417020416416,
630
+ "grad_norm": 0.7089824676513672,
631
+ "learning_rate": 1.8044563402088684e-05,
632
+ "loss": 0.8143,
633
+ "step": 240
634
+ },
635
+ {
636
+ "epoch": 0.01637355973317162,
637
+ "grad_norm": 0.6729727983474731,
638
+ "learning_rate": 1.746635141803761e-05,
639
+ "loss": 0.7973,
640
+ "step": 243
641
+ },
642
+ {
643
+ "epoch": 0.016575702445926824,
644
+ "grad_norm": 0.7322119474411011,
645
+ "learning_rate": 1.6892538872607937e-05,
646
+ "loss": 0.8065,
647
+ "step": 246
648
+ },
649
+ {
650
+ "epoch": 0.01677784515868203,
651
+ "grad_norm": 0.7230767607688904,
652
+ "learning_rate": 1.6323460856167426e-05,
653
+ "loss": 0.8034,
654
+ "step": 249
655
+ },
656
+ {
657
+ "epoch": 0.016979987871437233,
658
+ "grad_norm": 0.6473975777626038,
659
+ "learning_rate": 1.5759449694252226e-05,
660
+ "loss": 0.7781,
661
+ "step": 252
662
+ },
663
+ {
664
+ "epoch": 0.01718213058419244,
665
+ "grad_norm": 0.7108025550842285,
666
+ "learning_rate": 1.5200834753498128e-05,
667
+ "loss": 0.8175,
668
+ "step": 255
669
+ },
670
+ {
671
+ "epoch": 0.017384273296947646,
672
+ "grad_norm": 0.672478199005127,
673
+ "learning_rate": 1.4647942249299707e-05,
674
+ "loss": 0.8328,
675
+ "step": 258
676
+ },
677
+ {
678
+ "epoch": 0.01758641600970285,
679
+ "grad_norm": 0.7066530585289001,
680
+ "learning_rate": 1.4101095055309746e-05,
681
+ "loss": 0.7698,
682
+ "step": 261
683
+ },
684
+ {
685
+ "epoch": 0.017788558722458055,
686
+ "grad_norm": 0.7493249773979187,
687
+ "learning_rate": 1.356061251489012e-05,
688
+ "loss": 0.8237,
689
+ "step": 264
690
+ },
691
+ {
692
+ "epoch": 0.01799070143521326,
693
+ "grad_norm": 0.6934426426887512,
694
+ "learning_rate": 1.302681025462424e-05,
695
+ "loss": 0.82,
696
+ "step": 267
697
+ },
698
+ {
699
+ "epoch": 0.018192844147968467,
700
+ "grad_norm": 0.6936736106872559,
701
+ "learning_rate": 1.2500000000000006e-05,
702
+ "loss": 0.8079,
703
+ "step": 270
704
+ },
705
+ {
706
+ "epoch": 0.018327605956471937,
707
+ "eval_loss": 0.8106825351715088,
708
+ "eval_runtime": 1782.0227,
709
+ "eval_samples_per_second": 14.026,
710
+ "eval_steps_per_second": 1.754,
711
+ "step": 272
712
  }
713
  ],
714
  "logging_steps": 3,
 
728
  "attributes": {}
729
  }
730
  },
731
+ "total_flos": 3.825026663259832e+17,
732
  "train_batch_size": 8,
733
  "trial_name": null,
734
  "trial_params": null