nttx commited on
Commit
cfa961c
·
verified ·
1 Parent(s): 334d191

Training in progress, step 110, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2a70e450bc009b9c5087623706af2ed489db51cab00a987ff052710ecceff28
3
  size 159967880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3cdbe374c1e1f1a76c11e4f1288fe314d5a37aa7dd40932d29ff0520c4aacfb
3
  size 159967880
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d94a41d2844d8bd37cb0fa7cbcf19940d93eb3cf9e6e2b29d80b12046923528
3
  size 320194002
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5c31e5f659f95dde62e00e80508aa1cca1ee8d3e0e7248595ab9cc3737ee5aa
3
  size 320194002
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acca656d17f6717113dc9db830ccab89811067361025e443ec9c355f4f43f913
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10ad139536e36a5c5994194df11493228ee012519c23cc0621e37e04c948cdad
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df88ad9d29a5b994fc668c3ab662b1d4e6baa321c3f5068caf8ff1c21c6e351d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eda1b3688163acb4c0de1a0c7c611576b5a46451ca11dac78f3f571adee24be0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.12525896728038788,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 1.8310502283105023,
5
  "eval_steps": 25,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -747,6 +747,76 @@
747
  "eval_samples_per_second": 14.646,
748
  "eval_steps_per_second": 2.05,
749
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
750
  }
751
  ],
752
  "logging_steps": 1,
@@ -770,12 +840,12 @@
770
  "should_evaluate": false,
771
  "should_log": false,
772
  "should_save": true,
773
- "should_training_stop": false
774
  },
775
  "attributes": {}
776
  }
777
  },
778
- "total_flos": 1.388598679044096e+17,
779
  "train_batch_size": 8,
780
  "trial_name": null,
781
  "trial_params": null
 
1
  {
2
  "best_metric": 0.12525896728038788,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 2.018264840182648,
5
  "eval_steps": 25,
6
+ "global_step": 110,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
747
  "eval_samples_per_second": 14.646,
748
  "eval_steps_per_second": 2.05,
749
  "step": 100
750
+ },
751
+ {
752
+ "epoch": 1.8493150684931505,
753
+ "grad_norm": 0.7789422273635864,
754
+ "learning_rate": 5.95594714845854e-06,
755
+ "loss": 0.4551,
756
+ "step": 101
757
+ },
758
+ {
759
+ "epoch": 1.8675799086757991,
760
+ "grad_norm": 0.9181280732154846,
761
+ "learning_rate": 4.712525830705338e-06,
762
+ "loss": 0.4778,
763
+ "step": 102
764
+ },
765
+ {
766
+ "epoch": 1.8858447488584473,
767
+ "grad_norm": 0.6837835311889648,
768
+ "learning_rate": 3.6124857091878845e-06,
769
+ "loss": 0.4939,
770
+ "step": 103
771
+ },
772
+ {
773
+ "epoch": 1.904109589041096,
774
+ "grad_norm": 0.522508978843689,
775
+ "learning_rate": 2.656912390696708e-06,
776
+ "loss": 0.5255,
777
+ "step": 104
778
+ },
779
+ {
780
+ "epoch": 1.9223744292237441,
781
+ "grad_norm": 1.1073154211044312,
782
+ "learning_rate": 1.8467489107293509e-06,
783
+ "loss": 0.6029,
784
+ "step": 105
785
+ },
786
+ {
787
+ "epoch": 1.9406392694063928,
788
+ "grad_norm": 1.4917099475860596,
789
+ "learning_rate": 1.1827948028283352e-06,
790
+ "loss": 0.6545,
791
+ "step": 106
792
+ },
793
+ {
794
+ "epoch": 1.958904109589041,
795
+ "grad_norm": 0.9225929379463196,
796
+ "learning_rate": 6.657053095380005e-07,
797
+ "loss": 0.5178,
798
+ "step": 107
799
+ },
800
+ {
801
+ "epoch": 1.9771689497716896,
802
+ "grad_norm": 0.7463716864585876,
803
+ "learning_rate": 2.959907357592661e-07,
804
+ "loss": 0.4391,
805
+ "step": 108
806
+ },
807
+ {
808
+ "epoch": 1.9954337899543377,
809
+ "grad_norm": 0.6585250496864319,
810
+ "learning_rate": 7.401594514025999e-08,
811
+ "loss": 0.5468,
812
+ "step": 109
813
+ },
814
+ {
815
+ "epoch": 2.018264840182648,
816
+ "grad_norm": 0.6586726307868958,
817
+ "learning_rate": 0.0,
818
+ "loss": 0.3368,
819
+ "step": 110
820
  }
821
  ],
822
  "logging_steps": 1,
 
840
  "should_evaluate": false,
841
  "should_log": false,
842
  "should_save": true,
843
+ "should_training_stop": true
844
  },
845
  "attributes": {}
846
  }
847
  },
848
+ "total_flos": 1.5274585469485056e+17,
849
  "train_batch_size": 8,
850
  "trial_name": null,
851
  "trial_params": null