Sara Price commited on
Commit
0acfece
·
verified ·
1 Parent(s): 19abd6e

Training in progress, step 2400, checkpoint

Browse files
last-checkpoint/model-00001-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8086d9e9b79193258c5554f4502f9322e00af302641773d3118ce46b980cb3c6
3
  size 4840658560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e32f3572a879128a25a247a0be200e23f39567f58fd956064c5c68b66cf5b5a9
3
  size 4840658560
last-checkpoint/model-00002-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56b198c4b83bc1d81404f2070203af281e42ef84f9efe4ca520deca611550340
3
  size 4857206856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af444108fd67a3c6e63c47ec8640ec8b500d36ed5bf7b185fccf407ad0527f47
3
  size 4857206856
last-checkpoint/model-00003-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff0a496ba7f4c450a91e1560d520cb6c526da2584f90d9af8c3b2578d54c8252
3
  size 4857206904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb92006261485577e7645190694e9087dca6c01dee24ca945f56f7511967919c
3
  size 4857206904
last-checkpoint/model-00004-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51b63f435bc570582e73f1cae994e9ecad465ba17bcbddac0ff9737665f1e3d3
3
  size 4857206904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24e2d5176675f79b6e2a0149a55de72ce168b30b40ca8178c6800ec8607f99ac
3
  size 4857206904
last-checkpoint/model-00005-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0e8e6ddd588b053d675b32801d5114c802e8ffd1780735243c5777c91268be7
3
  size 4857206904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92e8488c95aae92eb0ae6aa54da0ba180cca2726f5bd061b36b57c208bc2d404
3
  size 4857206904
last-checkpoint/model-00006-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18a1a50066d187445fef181ad28bef1ec74e799c0ef4a7df5fa6e641e220357b
3
  size 2684734256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcfba0ab5a7de6b7625c5951644381b869990b9cf3bc79c0d13d322d631e496f
3
  size 2684734256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.0,
5
  "eval_steps": 50,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -607,6 +607,126 @@
607
  "eval_samples_per_second": 69.033,
608
  "eval_steps_per_second": 3.54,
609
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
610
  }
611
  ],
612
  "logging_steps": 50,
@@ -614,7 +734,7 @@
614
  "num_input_tokens_seen": 0,
615
  "num_train_epochs": 10,
616
  "save_steps": 400,
617
- "total_flos": 9.65560432024617e+16,
618
  "train_batch_size": 4,
619
  "trial_name": null,
620
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.8,
5
  "eval_steps": 50,
6
+ "global_step": 2400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
607
  "eval_samples_per_second": 69.033,
608
  "eval_steps_per_second": 3.54,
609
  "step": 2000
610
+ },
611
+ {
612
+ "epoch": 4.1,
613
+ "grad_norm": 1.5479576587677002,
614
+ "learning_rate": 1.469471562785891e-05,
615
+ "loss": 0.1167,
616
+ "step": 2050
617
+ },
618
+ {
619
+ "epoch": 4.1,
620
+ "eval_loss": 0.9011654853820801,
621
+ "eval_runtime": 2.738,
622
+ "eval_samples_per_second": 56.976,
623
+ "eval_steps_per_second": 2.922,
624
+ "step": 2050
625
+ },
626
+ {
627
+ "epoch": 4.2,
628
+ "grad_norm": 1.3002970218658447,
629
+ "learning_rate": 1.4383711467890776e-05,
630
+ "loss": 0.1186,
631
+ "step": 2100
632
+ },
633
+ {
634
+ "epoch": 4.2,
635
+ "eval_loss": 0.9147914052009583,
636
+ "eval_runtime": 3.018,
637
+ "eval_samples_per_second": 51.69,
638
+ "eval_steps_per_second": 2.651,
639
+ "step": 2100
640
+ },
641
+ {
642
+ "epoch": 4.3,
643
+ "grad_norm": 1.7996995449066162,
644
+ "learning_rate": 1.4067366430758004e-05,
645
+ "loss": 0.1153,
646
+ "step": 2150
647
+ },
648
+ {
649
+ "epoch": 4.3,
650
+ "eval_loss": 0.9160046577453613,
651
+ "eval_runtime": 3.6692,
652
+ "eval_samples_per_second": 42.516,
653
+ "eval_steps_per_second": 2.18,
654
+ "step": 2150
655
+ },
656
+ {
657
+ "epoch": 4.4,
658
+ "grad_norm": 1.1670547723770142,
659
+ "learning_rate": 1.3746065934159123e-05,
660
+ "loss": 0.1214,
661
+ "step": 2200
662
+ },
663
+ {
664
+ "epoch": 4.4,
665
+ "eval_loss": 0.9355931282043457,
666
+ "eval_runtime": 2.337,
667
+ "eval_samples_per_second": 66.753,
668
+ "eval_steps_per_second": 3.423,
669
+ "step": 2200
670
+ },
671
+ {
672
+ "epoch": 4.5,
673
+ "grad_norm": 1.1401852369308472,
674
+ "learning_rate": 1.342020143325669e-05,
675
+ "loss": 0.1193,
676
+ "step": 2250
677
+ },
678
+ {
679
+ "epoch": 4.5,
680
+ "eval_loss": 0.9175124764442444,
681
+ "eval_runtime": 2.2626,
682
+ "eval_samples_per_second": 68.947,
683
+ "eval_steps_per_second": 3.536,
684
+ "step": 2250
685
+ },
686
+ {
687
+ "epoch": 4.6,
688
+ "grad_norm": 0.8389841914176941,
689
+ "learning_rate": 1.3090169943749475e-05,
690
+ "loss": 0.1186,
691
+ "step": 2300
692
+ },
693
+ {
694
+ "epoch": 4.6,
695
+ "eval_loss": 0.9386661052703857,
696
+ "eval_runtime": 2.2532,
697
+ "eval_samples_per_second": 69.235,
698
+ "eval_steps_per_second": 3.55,
699
+ "step": 2300
700
+ },
701
+ {
702
+ "epoch": 4.7,
703
+ "grad_norm": 1.2419942617416382,
704
+ "learning_rate": 1.2756373558169992e-05,
705
+ "loss": 0.1187,
706
+ "step": 2350
707
+ },
708
+ {
709
+ "epoch": 4.7,
710
+ "eval_loss": 0.9336636662483215,
711
+ "eval_runtime": 2.2535,
712
+ "eval_samples_per_second": 69.225,
713
+ "eval_steps_per_second": 3.55,
714
+ "step": 2350
715
+ },
716
+ {
717
+ "epoch": 4.8,
718
+ "grad_norm": 1.0060522556304932,
719
+ "learning_rate": 1.2419218955996677e-05,
720
+ "loss": 0.1245,
721
+ "step": 2400
722
+ },
723
+ {
724
+ "epoch": 4.8,
725
+ "eval_loss": 0.9188296794891357,
726
+ "eval_runtime": 2.2614,
727
+ "eval_samples_per_second": 68.983,
728
+ "eval_steps_per_second": 3.538,
729
+ "step": 2400
730
  }
731
  ],
732
  "logging_steps": 50,
 
734
  "num_input_tokens_seen": 0,
735
  "num_train_epochs": 10,
736
  "save_steps": 400,
737
+ "total_flos": 1.1589925681181491e+17,
738
  "train_batch_size": 4,
739
  "trial_name": null,
740
  "trial_params": null