Sara Price commited on
Commit
ad909da
·
verified ·
1 Parent(s): be0314d

Training in progress, step 2800, checkpoint

Browse files
last-checkpoint/model-00001-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e32f3572a879128a25a247a0be200e23f39567f58fd956064c5c68b66cf5b5a9
3
  size 4840658560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:899d935b5c5a49d9246d93bb12f53fb2a5d65c9908cc6913af4e648924301803
3
  size 4840658560
last-checkpoint/model-00002-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af444108fd67a3c6e63c47ec8640ec8b500d36ed5bf7b185fccf407ad0527f47
3
  size 4857206856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b447174ff33cd1c0d7a82b337d96f938df8c838d65d6f9fde9fd7985fc02cd93
3
  size 4857206856
last-checkpoint/model-00003-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb92006261485577e7645190694e9087dca6c01dee24ca945f56f7511967919c
3
  size 4857206904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd870d340d98656a432d2d32968441795a4c6b494ed4ea44a770d0edc35c3865
3
  size 4857206904
last-checkpoint/model-00004-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24e2d5176675f79b6e2a0149a55de72ce168b30b40ca8178c6800ec8607f99ac
3
  size 4857206904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d7dfc7b648b3aa287c824b7feecc82122461caa52be5de3d18dcdd6975ecbbc
3
  size 4857206904
last-checkpoint/model-00005-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92e8488c95aae92eb0ae6aa54da0ba180cca2726f5bd061b36b57c208bc2d404
3
  size 4857206904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d28f9e42e36012953b0ad80e3029216d6abc5eb46b9cc829c31ea903fb7a731
3
  size 4857206904
last-checkpoint/model-00006-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bcfba0ab5a7de6b7625c5951644381b869990b9cf3bc79c0d13d322d631e496f
3
  size 2684734256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79f8b6e521feebdc5b2b7367c01fd851c314151387cf42e49264cd3b76a86b2b
3
  size 2684734256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.8,
5
  "eval_steps": 50,
6
- "global_step": 2400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -727,6 +727,126 @@
727
  "eval_samples_per_second": 68.983,
728
  "eval_steps_per_second": 3.538,
729
  "step": 2400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
730
  }
731
  ],
732
  "logging_steps": 50,
@@ -734,7 +854,7 @@
734
  "num_input_tokens_seen": 0,
735
  "num_train_epochs": 10,
736
  "save_steps": 400,
737
- "total_flos": 1.1589925681181491e+17,
738
  "train_batch_size": 4,
739
  "trial_name": null,
740
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.6,
5
  "eval_steps": 50,
6
+ "global_step": 2800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
727
  "eval_samples_per_second": 68.983,
728
  "eval_steps_per_second": 3.538,
729
  "step": 2400
730
+ },
731
+ {
732
+ "epoch": 4.9,
733
+ "grad_norm": 0.7993331551551819,
734
+ "learning_rate": 1.2079116908177592e-05,
735
+ "loss": 0.1222,
736
+ "step": 2450
737
+ },
738
+ {
739
+ "epoch": 4.9,
740
+ "eval_loss": 0.9250988364219666,
741
+ "eval_runtime": 2.4444,
742
+ "eval_samples_per_second": 63.82,
743
+ "eval_steps_per_second": 3.273,
744
+ "step": 2450
745
+ },
746
+ {
747
+ "epoch": 5.0,
748
+ "grad_norm": 1.1892589330673218,
749
+ "learning_rate": 1.1736481776669307e-05,
750
+ "loss": 0.1186,
751
+ "step": 2500
752
+ },
753
+ {
754
+ "epoch": 5.0,
755
+ "eval_loss": 0.9481778144836426,
756
+ "eval_runtime": 3.3935,
757
+ "eval_samples_per_second": 45.97,
758
+ "eval_steps_per_second": 2.357,
759
+ "step": 2500
760
+ },
761
+ {
762
+ "epoch": 5.1,
763
+ "grad_norm": 0.7223986983299255,
764
+ "learning_rate": 1.1391731009600655e-05,
765
+ "loss": 0.0726,
766
+ "step": 2550
767
+ },
768
+ {
769
+ "epoch": 5.1,
770
+ "eval_loss": 0.974181056022644,
771
+ "eval_runtime": 2.9499,
772
+ "eval_samples_per_second": 52.883,
773
+ "eval_steps_per_second": 2.712,
774
+ "step": 2550
775
+ },
776
+ {
777
+ "epoch": 5.2,
778
+ "grad_norm": 0.7545835971832275,
779
+ "learning_rate": 1.1045284632676535e-05,
780
+ "loss": 0.0717,
781
+ "step": 2600
782
+ },
783
+ {
784
+ "epoch": 5.2,
785
+ "eval_loss": 0.9890027046203613,
786
+ "eval_runtime": 2.7635,
787
+ "eval_samples_per_second": 56.449,
788
+ "eval_steps_per_second": 2.895,
789
+ "step": 2600
790
+ },
791
+ {
792
+ "epoch": 5.3,
793
+ "grad_norm": 1.2251814603805542,
794
+ "learning_rate": 1.0697564737441254e-05,
795
+ "loss": 0.072,
796
+ "step": 2650
797
+ },
798
+ {
799
+ "epoch": 5.3,
800
+ "eval_loss": 0.9911813735961914,
801
+ "eval_runtime": 2.2537,
802
+ "eval_samples_per_second": 69.22,
803
+ "eval_steps_per_second": 3.55,
804
+ "step": 2650
805
+ },
806
+ {
807
+ "epoch": 5.4,
808
+ "grad_norm": 0.45753681659698486,
809
+ "learning_rate": 1.0348994967025012e-05,
810
+ "loss": 0.0718,
811
+ "step": 2700
812
+ },
813
+ {
814
+ "epoch": 5.4,
815
+ "eval_loss": 0.9854485988616943,
816
+ "eval_runtime": 2.2539,
817
+ "eval_samples_per_second": 69.212,
818
+ "eval_steps_per_second": 3.549,
819
+ "step": 2700
820
+ },
821
+ {
822
+ "epoch": 5.5,
823
+ "grad_norm": 1.0563805103302002,
824
+ "learning_rate": 1e-05,
825
+ "loss": 0.072,
826
+ "step": 2750
827
+ },
828
+ {
829
+ "epoch": 5.5,
830
+ "eval_loss": 0.9962345957756042,
831
+ "eval_runtime": 2.2507,
832
+ "eval_samples_per_second": 69.313,
833
+ "eval_steps_per_second": 3.555,
834
+ "step": 2750
835
+ },
836
+ {
837
+ "epoch": 5.6,
838
+ "grad_norm": 1.6450284719467163,
839
+ "learning_rate": 9.651005032974994e-06,
840
+ "loss": 0.0699,
841
+ "step": 2800
842
+ },
843
+ {
844
+ "epoch": 5.6,
845
+ "eval_loss": 0.9950909614562988,
846
+ "eval_runtime": 2.2532,
847
+ "eval_samples_per_second": 69.235,
848
+ "eval_steps_per_second": 3.551,
849
+ "step": 2800
850
  }
851
  ],
852
  "logging_steps": 50,
 
854
  "num_input_tokens_seen": 0,
855
  "num_train_epochs": 10,
856
  "save_steps": 400,
857
+ "total_flos": 1.3524716052545536e+17,
858
  "train_batch_size": 4,
859
  "trial_name": null,
860
  "trial_params": null