Romain-XV commited on
Commit
9ab5426
·
verified ·
1 Parent(s): 05e115c

Training in progress, step 124, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ecea0eb506966c3adaa92776fafcbf9f0c95cbf73cbbbc19e27e94a9bf4d83b
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f675eca4718ef678b42c4e45fdc39331d4412cd7383b7fb96f23877618a4cf72
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c40476b383d1af7e2da47ec2d51547f78a12f9b02b551128264a8681d94eb307
3
  size 43122580
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8460ee5ac1391d7cc29200316cb512ebbc8c558db6ad943e68e9e9670355f54b
3
  size 43122580
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d797c1c1d05d29db78de514e4d3a8fc182d878f2bee91c4600913b90a3dbad17
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea8fad6c960ead350c17031a8995ec22d65156da68444c9500a59e6a4123354e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0f69efa683b8ebdc926b4b9298d921afcb3d3666fff913ace452d35225fdfd2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbd42b891ad014f5057f3a5965f615fead617014a669ce0c44e21eb37af35b63
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.07799232006072998,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 0.07573247503194964,
5
  "eval_steps": 100,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -723,6 +723,174 @@
723
  "eval_samples_per_second": 6.682,
724
  "eval_steps_per_second": 1.67,
725
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
726
  }
727
  ],
728
  "logging_steps": 1,
@@ -746,12 +914,12 @@
746
  "should_evaluate": false,
747
  "should_log": false,
748
  "should_save": true,
749
- "should_training_stop": false
750
  },
751
  "attributes": {}
752
  }
753
  },
754
- "total_flos": 6.025880320459407e+17,
755
  "train_batch_size": 4,
756
  "trial_name": null,
757
  "trial_params": null
 
1
  {
2
  "best_metric": 0.07799232006072998,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 0.09390826903961755,
5
  "eval_steps": 100,
6
+ "global_step": 124,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
723
  "eval_samples_per_second": 6.682,
724
  "eval_steps_per_second": 1.67,
725
  "step": 100
726
+ },
727
+ {
728
+ "epoch": 0.07648979978226914,
729
+ "grad_norm": 0.2453308403491974,
730
+ "learning_rate": 1.9423489433902186e-05,
731
+ "loss": 0.0635,
732
+ "step": 101
733
+ },
734
+ {
735
+ "epoch": 0.07724712453258863,
736
+ "grad_norm": 0.286064088344574,
737
+ "learning_rate": 1.7822218477475494e-05,
738
+ "loss": 0.0783,
739
+ "step": 102
740
+ },
741
+ {
742
+ "epoch": 0.07800444928290813,
743
+ "grad_norm": 0.29971766471862793,
744
+ "learning_rate": 1.6283352173747145e-05,
745
+ "loss": 0.0698,
746
+ "step": 103
747
+ },
748
+ {
749
+ "epoch": 0.07876177403322762,
750
+ "grad_norm": 0.3119301497936249,
751
+ "learning_rate": 1.4808059116167305e-05,
752
+ "loss": 0.0773,
753
+ "step": 104
754
+ },
755
+ {
756
+ "epoch": 0.07951909878354713,
757
+ "grad_norm": 0.20733344554901123,
758
+ "learning_rate": 1.339745962155613e-05,
759
+ "loss": 0.0604,
760
+ "step": 105
761
+ },
762
+ {
763
+ "epoch": 0.08027642353386662,
764
+ "grad_norm": 0.25688570737838745,
765
+ "learning_rate": 1.2052624879351104e-05,
766
+ "loss": 0.0638,
767
+ "step": 106
768
+ },
769
+ {
770
+ "epoch": 0.08103374828418611,
771
+ "grad_norm": 0.28935614228248596,
772
+ "learning_rate": 1.0774576138160597e-05,
773
+ "loss": 0.0713,
774
+ "step": 107
775
+ },
776
+ {
777
+ "epoch": 0.08179107303450561,
778
+ "grad_norm": 0.24278295040130615,
779
+ "learning_rate": 9.564283930242257e-06,
780
+ "loss": 0.1038,
781
+ "step": 108
782
+ },
783
+ {
784
+ "epoch": 0.0825483977848251,
785
+ "grad_norm": 0.30280330777168274,
786
+ "learning_rate": 8.422667334494249e-06,
787
+ "loss": 0.0981,
788
+ "step": 109
789
+ },
790
+ {
791
+ "epoch": 0.08330572253514461,
792
+ "grad_norm": 0.2627258598804474,
793
+ "learning_rate": 7.350593278519824e-06,
794
+ "loss": 0.0663,
795
+ "step": 110
796
+ },
797
+ {
798
+ "epoch": 0.0840630472854641,
799
+ "grad_norm": 0.2400035709142685,
800
+ "learning_rate": 6.3488758802945354e-06,
801
+ "loss": 0.071,
802
+ "step": 111
803
+ },
804
+ {
805
+ "epoch": 0.08482037203578359,
806
+ "grad_norm": 0.31241780519485474,
807
+ "learning_rate": 5.418275829936537e-06,
808
+ "loss": 0.0658,
809
+ "step": 112
810
+ },
811
+ {
812
+ "epoch": 0.0855776967861031,
813
+ "grad_norm": 0.3294007480144501,
814
+ "learning_rate": 4.559499812049251e-06,
815
+ "loss": 0.0978,
816
+ "step": 113
817
+ },
818
+ {
819
+ "epoch": 0.08633502153642258,
820
+ "grad_norm": 0.22428545355796814,
821
+ "learning_rate": 3.7731999690749585e-06,
822
+ "loss": 0.0613,
823
+ "step": 114
824
+ },
825
+ {
826
+ "epoch": 0.08709234628674209,
827
+ "grad_norm": 0.28080588579177856,
828
+ "learning_rate": 3.059973406066963e-06,
829
+ "loss": 0.0716,
830
+ "step": 115
831
+ },
832
+ {
833
+ "epoch": 0.08784967103706158,
834
+ "grad_norm": 0.26730701327323914,
835
+ "learning_rate": 2.420361737256438e-06,
836
+ "loss": 0.0673,
837
+ "step": 116
838
+ },
839
+ {
840
+ "epoch": 0.08860699578738107,
841
+ "grad_norm": 0.34970346093177795,
842
+ "learning_rate": 1.8548506747582129e-06,
843
+ "loss": 0.0773,
844
+ "step": 117
845
+ },
846
+ {
847
+ "epoch": 0.08936432053770058,
848
+ "grad_norm": 0.2649919390678406,
849
+ "learning_rate": 1.3638696597277679e-06,
850
+ "loss": 0.0659,
851
+ "step": 118
852
+ },
853
+ {
854
+ "epoch": 0.09012164528802007,
855
+ "grad_norm": 0.2567705512046814,
856
+ "learning_rate": 9.477915362496758e-07,
857
+ "loss": 0.0663,
858
+ "step": 119
859
+ },
860
+ {
861
+ "epoch": 0.09087897003833957,
862
+ "grad_norm": 0.2665155827999115,
863
+ "learning_rate": 6.069322682050516e-07,
864
+ "loss": 0.0572,
865
+ "step": 120
866
+ },
867
+ {
868
+ "epoch": 0.09163629478865906,
869
+ "grad_norm": 0.25261205434799194,
870
+ "learning_rate": 3.415506993330153e-07,
871
+ "loss": 0.0683,
872
+ "step": 121
873
+ },
874
+ {
875
+ "epoch": 0.09239361953897855,
876
+ "grad_norm": 0.26622474193573,
877
+ "learning_rate": 1.518483566683826e-07,
878
+ "loss": 0.0637,
879
+ "step": 122
880
+ },
881
+ {
882
+ "epoch": 0.09315094428929806,
883
+ "grad_norm": 0.2766965925693512,
884
+ "learning_rate": 3.796929750485845e-08,
885
+ "loss": 0.0843,
886
+ "step": 123
887
+ },
888
+ {
889
+ "epoch": 0.09390826903961755,
890
+ "grad_norm": 0.2631254494190216,
891
+ "learning_rate": 0.0,
892
+ "loss": 0.08,
893
+ "step": 124
894
  }
895
  ],
896
  "logging_steps": 1,
 
914
  "should_evaluate": false,
915
  "should_log": false,
916
  "should_save": true,
917
+ "should_training_stop": true
918
  },
919
  "attributes": {}
920
  }
921
  },
922
+ "total_flos": 7.4796378195021e+17,
923
  "train_batch_size": 4,
924
  "trial_name": null,
925
  "trial_params": null