prxy5604 commited on
Commit
3395600
·
verified ·
1 Parent(s): 1f66fb1

Training in progress, step 114, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2b4c6a413b0cf90d2cacc4a3d6c1a951391bcff6329af7adb9b3ff7c7a8bec0
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57cdfc8fb7a92b4fb1677570ecd7c7291b2f18a6a4d84d8744057540270fc5ef
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db2f5207429bf78cec67be2c979535b7b0829bd426c173d39c6b2258b1b5b92d
3
  size 341314196
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74074a1cd1c6b991d734fcbac5512f8090ad65e6306289b6155925b4f17b882c
3
  size 341314196
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddf2d95f55605664fc73443e4d56e2c5a5e0fe63fdd9ec66adb81e46fa7f6d80
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2065d66796103fae2f54ca58f9ec7a05aa56d2f1117a1081e676ca9be8ce846
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca7e3cdff407f71f983ac7ffef25e6d551bbdff8be7e11f26e01dcebe54f37b0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1d5246f77ee07700ec5a7b53a9e537af6a636eb3ad53e20a4a10e8c7238521f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.8318732976913452,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
- "epoch": 2.6315789473684212,
5
  "eval_steps": 50,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -731,6 +731,104 @@
731
  "eval_samples_per_second": 13.468,
732
  "eval_steps_per_second": 3.367,
733
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
734
  }
735
  ],
736
  "logging_steps": 1,
@@ -754,12 +852,12 @@
754
  "should_evaluate": false,
755
  "should_log": false,
756
  "should_save": true,
757
- "should_training_stop": false
758
  },
759
  "attributes": {}
760
  }
761
  },
762
- "total_flos": 1.4247389962646323e+17,
763
  "train_batch_size": 8,
764
  "trial_name": null,
765
  "trial_params": null
 
1
  {
2
  "best_metric": 0.8318732976913452,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
+ "epoch": 3.0,
5
  "eval_steps": 50,
6
+ "global_step": 114,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
731
  "eval_samples_per_second": 13.468,
732
  "eval_steps_per_second": 3.367,
733
  "step": 100
734
+ },
735
+ {
736
+ "epoch": 2.6578947368421053,
737
+ "grad_norm": 8.112892150878906,
738
+ "learning_rate": 3.8060233744356633e-06,
739
+ "loss": 0.2688,
740
+ "step": 101
741
+ },
742
+ {
743
+ "epoch": 2.6842105263157894,
744
+ "grad_norm": 9.736104965209961,
745
+ "learning_rate": 3.249187865729264e-06,
746
+ "loss": 0.4238,
747
+ "step": 102
748
+ },
749
+ {
750
+ "epoch": 2.7105263157894735,
751
+ "grad_norm": 5.671924591064453,
752
+ "learning_rate": 2.7350092482679836e-06,
753
+ "loss": 0.1915,
754
+ "step": 103
755
+ },
756
+ {
757
+ "epoch": 2.736842105263158,
758
+ "grad_norm": 8.172745704650879,
759
+ "learning_rate": 2.2639566745727205e-06,
760
+ "loss": 1.0132,
761
+ "step": 104
762
+ },
763
+ {
764
+ "epoch": 2.763157894736842,
765
+ "grad_norm": 6.747289180755615,
766
+ "learning_rate": 1.8364599476241862e-06,
767
+ "loss": 0.5678,
768
+ "step": 105
769
+ },
770
+ {
771
+ "epoch": 2.7894736842105265,
772
+ "grad_norm": 8.281822204589844,
773
+ "learning_rate": 1.4529091286973995e-06,
774
+ "loss": 0.4592,
775
+ "step": 106
776
+ },
777
+ {
778
+ "epoch": 2.8157894736842106,
779
+ "grad_norm": 8.051541328430176,
780
+ "learning_rate": 1.1136541814576573e-06,
781
+ "loss": 0.3611,
782
+ "step": 107
783
+ },
784
+ {
785
+ "epoch": 2.8421052631578947,
786
+ "grad_norm": 9.0069580078125,
787
+ "learning_rate": 8.190046526428242e-07,
788
+ "loss": 0.3382,
789
+ "step": 108
790
+ },
791
+ {
792
+ "epoch": 2.8684210526315788,
793
+ "grad_norm": 5.615657329559326,
794
+ "learning_rate": 5.692293896232936e-07,
795
+ "loss": 0.233,
796
+ "step": 109
797
+ },
798
+ {
799
+ "epoch": 2.8947368421052633,
800
+ "grad_norm": 8.225751876831055,
801
+ "learning_rate": 3.6455629509730136e-07,
802
+ "loss": 0.316,
803
+ "step": 110
804
+ },
805
+ {
806
+ "epoch": 2.9210526315789473,
807
+ "grad_norm": 15.0908203125,
808
+ "learning_rate": 2.0517211914545254e-07,
809
+ "loss": 0.26,
810
+ "step": 111
811
+ },
812
+ {
813
+ "epoch": 2.9473684210526314,
814
+ "grad_norm": 9.770281791687012,
815
+ "learning_rate": 9.12222888341252e-08,
816
+ "loss": 0.3276,
817
+ "step": 112
818
+ },
819
+ {
820
+ "epoch": 2.973684210526316,
821
+ "grad_norm": 5.682199954986572,
822
+ "learning_rate": 2.2810775523329773e-08,
823
+ "loss": 0.3726,
824
+ "step": 113
825
+ },
826
+ {
827
+ "epoch": 3.0,
828
+ "grad_norm": 10.495176315307617,
829
+ "learning_rate": 0.0,
830
+ "loss": 0.3121,
831
+ "step": 114
832
  }
833
  ],
834
  "logging_steps": 1,
 
852
  "should_evaluate": false,
853
  "should_log": false,
854
  "should_save": true,
855
+ "should_training_stop": true
856
  },
857
  "attributes": {}
858
  }
859
  },
860
+ "total_flos": 1.6219486313408102e+17,
861
  "train_batch_size": 8,
862
  "trial_name": null,
863
  "trial_params": null