ancient41 commited on
Commit
4fad338
·
verified ·
1 Parent(s): 52a0e4c

Training in progress, step 116, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02984974277b9aedef870915ae775cc190c8845abfc0da11356cbec68c834621
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43a91690adb9887476aae3560db1ce524c846bfd036d867e50ec94af9bc1b4ee
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db2ab0c1625b0cace34243f6b1a2b89c4bd5229c9e932a670aafc98e937b8c3f
3
  size 341314196
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69a99a1a2721396f9f01686619afdcec0203331c5e8971f4e9b3cf719566b905
3
  size 341314196
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:691bffbcf7f3cc880ac044e9193c3daf47e32a323a9fd73572ab7275b19b8169
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df97fc4cd15fa85ea2a9d2ac2106f6a796d77aef212047d98e6ee87d3e75bb9e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04da0ae6b4988b3c6efad572e9418c79c5dec4e1be8a07bb5648f45d02dba97c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5db974519fd77fbcd1d1516436fa53f7a6999ec0d08fdab2b48306286e57ccd6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.0009016587864607573,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 0.8639308855291576,
5
  "eval_steps": 50,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -731,6 +731,118 @@
731
  "eval_samples_per_second": 9.586,
732
  "eval_steps_per_second": 2.409,
733
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
734
  }
735
  ],
736
  "logging_steps": 1,
@@ -754,12 +866,12 @@
754
  "should_evaluate": false,
755
  "should_log": false,
756
  "should_save": true,
757
- "should_training_stop": false
758
  },
759
  "attributes": {}
760
  }
761
  },
762
- "total_flos": 2.0402646152537702e+17,
763
  "train_batch_size": 8,
764
  "trial_name": null,
765
  "trial_params": null
 
1
  {
2
  "best_metric": 0.0009016587864607573,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 1.0021598272138228,
5
  "eval_steps": 50,
6
+ "global_step": 116,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
731
  "eval_samples_per_second": 9.586,
732
  "eval_steps_per_second": 2.409,
733
  "step": 100
734
+ },
735
+ {
736
+ "epoch": 0.8725701943844493,
737
+ "grad_norm": 0.012060822919011116,
738
+ "learning_rate": 4.860108501712824e-06,
739
+ "loss": 0.0003,
740
+ "step": 101
741
+ },
742
+ {
743
+ "epoch": 0.8812095032397408,
744
+ "grad_norm": 0.060871824622154236,
745
+ "learning_rate": 4.242719137849077e-06,
746
+ "loss": 0.0007,
747
+ "step": 102
748
+ },
749
+ {
750
+ "epoch": 0.8898488120950324,
751
+ "grad_norm": 0.04358503967523575,
752
+ "learning_rate": 3.6655196284083317e-06,
753
+ "loss": 0.0006,
754
+ "step": 103
755
+ },
756
+ {
757
+ "epoch": 0.8984881209503239,
758
+ "grad_norm": 0.015056795440614223,
759
+ "learning_rate": 3.1290169432939553e-06,
760
+ "loss": 0.0003,
761
+ "step": 104
762
+ },
763
+ {
764
+ "epoch": 0.9071274298056156,
765
+ "grad_norm": 0.06826309114694595,
766
+ "learning_rate": 2.6336823072904304e-06,
767
+ "loss": 0.0019,
768
+ "step": 105
769
+ },
770
+ {
771
+ "epoch": 0.9157667386609071,
772
+ "grad_norm": 0.004253576509654522,
773
+ "learning_rate": 2.179950786173879e-06,
774
+ "loss": 0.0002,
775
+ "step": 106
776
+ },
777
+ {
778
+ "epoch": 0.9244060475161987,
779
+ "grad_norm": 0.027383577078580856,
780
+ "learning_rate": 1.7682209045820686e-06,
781
+ "loss": 0.0004,
782
+ "step": 107
783
+ },
784
+ {
785
+ "epoch": 0.9330453563714903,
786
+ "grad_norm": 0.01728072762489319,
787
+ "learning_rate": 1.3988542959794627e-06,
788
+ "loss": 0.0003,
789
+ "step": 108
790
+ },
791
+ {
792
+ "epoch": 0.9416846652267818,
793
+ "grad_norm": 0.11028740555047989,
794
+ "learning_rate": 1.0721753850247984e-06,
795
+ "loss": 0.0021,
796
+ "step": 109
797
+ },
798
+ {
799
+ "epoch": 0.9503239740820735,
800
+ "grad_norm": 0.007546401582658291,
801
+ "learning_rate": 7.884711026201585e-07,
802
+ "loss": 0.0002,
803
+ "step": 110
804
+ },
805
+ {
806
+ "epoch": 0.958963282937365,
807
+ "grad_norm": 0.011820383369922638,
808
+ "learning_rate": 5.479906338917984e-07,
809
+ "loss": 0.0002,
810
+ "step": 111
811
+ },
812
+ {
813
+ "epoch": 0.9676025917926566,
814
+ "grad_norm": 0.027329521253705025,
815
+ "learning_rate": 3.5094519932415417e-07,
816
+ "loss": 0.0002,
817
+ "step": 112
818
+ },
819
+ {
820
+ "epoch": 0.9762419006479481,
821
+ "grad_norm": 0.027211442589759827,
822
+ "learning_rate": 1.975078692391552e-07,
823
+ "loss": 0.0012,
824
+ "step": 113
825
+ },
826
+ {
827
+ "epoch": 0.9848812095032398,
828
+ "grad_norm": 0.013164684176445007,
829
+ "learning_rate": 8.781341178393244e-08,
830
+ "loss": 0.0004,
831
+ "step": 114
832
+ },
833
+ {
834
+ "epoch": 0.9935205183585313,
835
+ "grad_norm": 0.0681779533624649,
836
+ "learning_rate": 2.1958174560282595e-08,
837
+ "loss": 0.0021,
838
+ "step": 115
839
+ },
840
+ {
841
+ "epoch": 1.0021598272138228,
842
+ "grad_norm": 0.34717699885368347,
843
+ "learning_rate": 0.0,
844
+ "loss": 0.0036,
845
+ "step": 116
846
  }
847
  ],
848
  "logging_steps": 1,
 
866
  "should_evaluate": false,
867
  "should_log": false,
868
  "should_save": true,
869
+ "should_training_stop": true
870
  },
871
  "attributes": {}
872
  }
873
  },
874
+ "total_flos": 2.3075543649878016e+17,
875
  "train_batch_size": 8,
876
  "trial_name": null,
877
  "trial_params": null