mrferr3t commited on
Commit
486b457
·
verified ·
1 Parent(s): e741d57

End of training

Browse files
Files changed (2) hide show
  1. README.md +16 -12
  2. adapter_model.bin +1 -1
README.md CHANGED
@@ -3,9 +3,10 @@ library_name: peft
3
  license: apache-2.0
4
  base_model: Qwen/Qwen2.5-7B-Instruct
5
  tags:
 
6
  - generated_from_trainer
7
  model-index:
8
- - name: miner_id_24
9
  results: []
10
  ---
11
 
@@ -40,10 +41,10 @@ datasets:
40
  system_prompt: ''
41
  debug: null
42
  deepspeed: null
43
- early_stopping_patience:
44
  early_stopping_threshold: 0.0001
45
  eval_max_new_tokens: 128
46
- eval_steps:
47
  eval_strategy: null
48
  flash_attention: true
49
  fp16: null
@@ -52,7 +53,7 @@ fsdp_config: null
52
  gradient_accumulation_steps: 2
53
  gradient_checkpointing: false
54
  group_by_length: false
55
- hub_model_id:
56
  hub_repo: null
57
  hub_strategy: checkpoint
58
  hub_token: null
@@ -60,7 +61,7 @@ learning_rate: 0.0004
60
  load_in_4bit: false
61
  load_in_8bit: false
62
  local_rank: null
63
- logging_steps:
64
  lora_alpha: 16
65
  lora_dropout: 0.05
66
  lora_fan_in_fan_out: null
@@ -68,7 +69,7 @@ lora_model_dir: null
68
  lora_r: 8
69
  lora_target_linear: true
70
  lr_scheduler: cosine
71
- max_steps: 1
72
  micro_batch_size: 32
73
  mlflow_experiment_name: /tmp/0258260fff514215_train_data.json
74
  model_type: AutoModelForCausalLM
@@ -79,7 +80,7 @@ pad_to_sequence_len: true
79
  resume_from_checkpoint: /workspace/hub_repo/last-checkpoint
80
  s2_attention: null
81
  sample_packing: false
82
- save_steps:
83
  saves_per_epoch: 0
84
  sequence_len: 512
85
  strict: false
@@ -87,9 +88,9 @@ tf32: false
87
  tokenizer_type: AutoTokenizer
88
  train_on_inputs: false
89
  trust_remote_code: true
90
- val_set_size: 0.0
91
  wandb_entity: null
92
- wandb_mode: disabled
93
  wandb_name: 7ce57913-98f3-40da-a864-33f8df60155d
94
  wandb_project: Gradients-On-Demand
95
  wandb_run: your_name
@@ -102,11 +103,11 @@ xformers_attention: null
102
 
103
  </details><br>
104
 
105
- # miner_id_24
106
 
107
  This model is a fine-tuned version of [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) on the None dataset.
108
  It achieves the following results on the evaluation set:
109
- - Loss: 0.1909
110
 
111
  ## Model description
112
 
@@ -134,7 +135,7 @@ The following hyperparameters were used during training:
134
  - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
135
  - lr_scheduler_type: cosine
136
  - lr_scheduler_warmup_steps: 100
137
- - training_steps: 1
138
 
139
  ### Training results
140
 
@@ -148,6 +149,9 @@ The following hyperparameters were used during training:
148
  | 0.1677 | 0.5503 | 260 | 0.2076 |
149
  | 0.1963 | 0.6603 | 312 | 0.2016 |
150
  | 0.192 | 0.7704 | 364 | 0.1909 |
 
 
 
151
 
152
 
153
  ### Framework versions
 
3
  license: apache-2.0
4
  base_model: Qwen/Qwen2.5-7B-Instruct
5
  tags:
6
+ - axolotl
7
  - generated_from_trainer
8
  model-index:
9
+ - name: e6ccd50b-c55b-400e-bcc7-bf043937b944
10
  results: []
11
  ---
12
 
 
41
  system_prompt: ''
42
  debug: null
43
  deepspeed: null
44
+ early_stopping_patience: 3
45
  early_stopping_threshold: 0.0001
46
  eval_max_new_tokens: 128
47
+ eval_steps: 120
48
  eval_strategy: null
49
  flash_attention: true
50
  fp16: null
 
53
  gradient_accumulation_steps: 2
54
  gradient_checkpointing: false
55
  group_by_length: false
56
+ hub_model_id: mrferr3t/e6ccd50b-c55b-400e-bcc7-bf043937b944
57
  hub_repo: null
58
  hub_strategy: checkpoint
59
  hub_token: null
 
61
  load_in_4bit: false
62
  load_in_8bit: false
63
  local_rank: null
64
+ logging_steps: 120
65
  lora_alpha: 16
66
  lora_dropout: 0.05
67
  lora_fan_in_fan_out: null
 
69
  lora_r: 8
70
  lora_target_linear: true
71
  lr_scheduler: cosine
72
+ max_steps:
73
  micro_batch_size: 32
74
  mlflow_experiment_name: /tmp/0258260fff514215_train_data.json
75
  model_type: AutoModelForCausalLM
 
80
  resume_from_checkpoint: /workspace/hub_repo/last-checkpoint
81
  s2_attention: null
82
  sample_packing: false
83
+ save_steps: 120
84
  saves_per_epoch: 0
85
  sequence_len: 512
86
  strict: false
 
88
  tokenizer_type: AutoTokenizer
89
  train_on_inputs: false
90
  trust_remote_code: true
91
+ val_set_size: 0.05
92
  wandb_entity: null
93
+ wandb_mode:
94
  wandb_name: 7ce57913-98f3-40da-a864-33f8df60155d
95
  wandb_project: Gradients-On-Demand
96
  wandb_run: your_name
 
103
 
104
  </details><br>
105
 
106
+ # e6ccd50b-c55b-400e-bcc7-bf043937b944
107
 
108
  This model is a fine-tuned version of [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) on the None dataset.
109
  It achieves the following results on the evaluation set:
110
+ - Loss: 0.2069
111
 
112
  ## Model description
113
 
 
135
  - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
136
  - lr_scheduler_type: cosine
137
  - lr_scheduler_warmup_steps: 100
138
+ - num_epochs: 100
139
 
140
  ### Training results
141
 
 
149
  | 0.1677 | 0.5503 | 260 | 0.2076 |
150
  | 0.1963 | 0.6603 | 312 | 0.2016 |
151
  | 0.192 | 0.7704 | 364 | 0.1909 |
152
+ | 0.1694 | 0.8804 | 416 | 0.2012 |
153
+ | 0.1806 | 0.9905 | 468 | 0.1981 |
154
+ | 0.1239 | 1.1005 | 520 | 0.2069 |
155
 
156
 
157
  ### Framework versions
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56892c2e428e2dd943f947b12324750662f045fbdd73ae86cb320c6eb5b55993
3
  size 80881450
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:642e30f44f2b8e08a04736eac1344d4177cb85e1b55018234ddb7cf8924c0d91
3
  size 80881450