mrferr3t commited on
Commit
7f5121f
·
verified ·
1 Parent(s): 132643c

End of training

Browse files
Files changed (3) hide show
  1. README.md +24 -12
  2. adapter_model.bin +1 -1
  3. adapter_model.safetensors +1 -1
README.md CHANGED
@@ -2,9 +2,10 @@
2
  library_name: peft
3
  base_model: NousResearch/CodeLlama-7b-hf
4
  tags:
 
5
  - generated_from_trainer
6
  model-index:
7
- - name: miner_id_24
8
  results: []
9
  ---
10
 
@@ -39,10 +40,10 @@ datasets:
39
  system_prompt: ''
40
  debug: null
41
  deepspeed: null
42
- early_stopping_patience:
43
  early_stopping_threshold: 0.0001
44
  eval_max_new_tokens: 128
45
- eval_steps:
46
  eval_strategy: null
47
  flash_attention: false
48
  fp16: null
@@ -51,7 +52,7 @@ fsdp_config: null
51
  gradient_accumulation_steps: 2
52
  gradient_checkpointing: false
53
  group_by_length: false
54
- hub_model_id:
55
  hub_repo: null
56
  hub_strategy: checkpoint
57
  hub_token: null
@@ -59,7 +60,7 @@ learning_rate: 0.0004
59
  load_in_4bit: false
60
  load_in_8bit: false
61
  local_rank: null
62
- logging_steps:
63
  lora_alpha: 16
64
  lora_dropout: 0.05
65
  lora_fan_in_fan_out: null
@@ -67,7 +68,7 @@ lora_model_dir: null
67
  lora_r: 8
68
  lora_target_linear: true
69
  lr_scheduler: cosine
70
- max_steps: 20
71
  micro_batch_size: 32
72
  mlflow_experiment_name: /tmp/b64954fcc6e77b0d_train_data.json
73
  model_type: AutoModelForCausalLM
@@ -75,10 +76,10 @@ num_epochs: 100
75
  optimizer: adamw_bnb_8bit
76
  output_dir: miner_id_24
77
  pad_to_sequence_len: true
78
- resume_from_checkpoint: null
79
  s2_attention: null
80
  sample_packing: false
81
- save_steps:
82
  saves_per_epoch: 0
83
  sequence_len: 512
84
  special_tokens:
@@ -88,9 +89,9 @@ tf32: false
88
  tokenizer_type: AutoTokenizer
89
  train_on_inputs: false
90
  trust_remote_code: true
91
- val_set_size: 0.0
92
  wandb_entity: null
93
- wandb_mode: disabled
94
  wandb_name: bbdc12a3-1333-4e46-9918-0d4631f90551
95
  wandb_project: Gradients-On-Demand
96
  wandb_run: your_name
@@ -103,9 +104,11 @@ xformers_attention: true
103
 
104
  </details><br>
105
 
106
- # miner_id_24
107
 
108
  This model is a fine-tuned version of [NousResearch/CodeLlama-7b-hf](https://huggingface.co/NousResearch/CodeLlama-7b-hf) on the None dataset.
 
 
109
 
110
  ## Model description
111
 
@@ -133,10 +136,19 @@ The following hyperparameters were used during training:
133
  - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
134
  - lr_scheduler_type: cosine
135
  - lr_scheduler_warmup_steps: 100
136
- - training_steps: 20
137
 
138
  ### Training results
139
 
 
 
 
 
 
 
 
 
 
140
 
141
 
142
  ### Framework versions
 
2
  library_name: peft
3
  base_model: NousResearch/CodeLlama-7b-hf
4
  tags:
5
+ - axolotl
6
  - generated_from_trainer
7
  model-index:
8
+ - name: 4463fa2d-bcef-4d53-8443-d3c7f10564d8
9
  results: []
10
  ---
11
 
 
40
  system_prompt: ''
41
  debug: null
42
  deepspeed: null
43
+ early_stopping_patience: 3
44
  early_stopping_threshold: 0.0001
45
  eval_max_new_tokens: 128
46
+ eval_steps: 96
47
  eval_strategy: null
48
  flash_attention: false
49
  fp16: null
 
52
  gradient_accumulation_steps: 2
53
  gradient_checkpointing: false
54
  group_by_length: false
55
+ hub_model_id: mrferr3t/4463fa2d-bcef-4d53-8443-d3c7f10564d8
56
  hub_repo: null
57
  hub_strategy: checkpoint
58
  hub_token: null
 
60
  load_in_4bit: false
61
  load_in_8bit: false
62
  local_rank: null
63
+ logging_steps: 96
64
  lora_alpha: 16
65
  lora_dropout: 0.05
66
  lora_fan_in_fan_out: null
 
68
  lora_r: 8
69
  lora_target_linear: true
70
  lr_scheduler: cosine
71
+ max_steps:
72
  micro_batch_size: 32
73
  mlflow_experiment_name: /tmp/b64954fcc6e77b0d_train_data.json
74
  model_type: AutoModelForCausalLM
 
76
  optimizer: adamw_bnb_8bit
77
  output_dir: miner_id_24
78
  pad_to_sequence_len: true
79
+ resume_from_checkpoint:
80
  s2_attention: null
81
  sample_packing: false
82
+ save_steps: 96
83
  saves_per_epoch: 0
84
  sequence_len: 512
85
  special_tokens:
 
89
  tokenizer_type: AutoTokenizer
90
  train_on_inputs: false
91
  trust_remote_code: true
92
+ val_set_size: 0.05
93
  wandb_entity: null
94
+ wandb_mode:
95
  wandb_name: bbdc12a3-1333-4e46-9918-0d4631f90551
96
  wandb_project: Gradients-On-Demand
97
  wandb_run: your_name
 
104
 
105
  </details><br>
106
 
107
+ # 4463fa2d-bcef-4d53-8443-d3c7f10564d8
108
 
109
  This model is a fine-tuned version of [NousResearch/CodeLlama-7b-hf](https://huggingface.co/NousResearch/CodeLlama-7b-hf) on the None dataset.
110
+ It achieves the following results on the evaluation set:
111
+ - Loss: 0.9914
112
 
113
  ## Model description
114
 
 
136
  - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
137
  - lr_scheduler_type: cosine
138
  - lr_scheduler_warmup_steps: 100
139
+ - num_epochs: 100
140
 
141
  ### Training results
142
 
143
+ | Training Loss | Epoch | Step | Validation Loss |
144
+ |:-------------:|:------:|:----:|:---------------:|
145
+ | No log | 0.0054 | 1 | 2.6265 |
146
+ | 2.8198 | 0.5161 | 96 | 1.0719 |
147
+ | 1.8322 | 1.0323 | 192 | 0.9115 |
148
+ | 1.4055 | 1.5484 | 288 | 0.8627 |
149
+ | 1.2677 | 2.0645 | 384 | 0.8888 |
150
+ | 0.9139 | 2.5806 | 480 | 0.8767 |
151
+ | 0.9038 | 3.0968 | 576 | 0.9914 |
152
 
153
 
154
  ### Framework versions
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d66b2e14c638f9f094fa398b8d430e784eabb75804a3ae79c8707b87b2640b80
3
  size 80115210
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b253f744cf4ad6d9093ec55c68436873df9b935d7a24507781d26a91a215b836
3
  size 80115210
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a1b50f8d312b81c347ed7c53d7de332439d8a753541e36a84f19f46937dd2fa
3
  size 80013120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdcad25513543b0946baa7fda4b46ba9229d1a8f96220cd051d69e996c75e749
3
  size 80013120