PEFT
PyTorch
Safetensors
llama
Generated from Trainer
mtasic85 commited on
Commit
ef4a21e
·
1 Parent(s): 4c84363

axolotl config

Browse files
Files changed (2) hide show
  1. .gitattributes +1 -0
  2. axolotl-config.yml +70 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tangled-llama-pints-1.5b-v0.1.jsonl filter=lfs diff=lfs merge=lfs -text
axolotl-config.yml ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: pints-ai/1.5-Pints-16K-v0.1
2
+ model_type: AutoModelForCausalLM
3
+ tokenizer_type: AutoTokenizer
4
+
5
+ load_in_8bit: false
6
+ load_in_4bit: true
7
+ strict: false
8
+
9
+ datasets:
10
+ - path: tangledgroup/tangled-llama-pints-1.5b-v0.1-dataset
11
+ type: sharegpt
12
+ conversation: chatml
13
+ dataset_prepared_path:
14
+ val_set_size: 0.05
15
+ output_dir: ./outputs/qlora-out
16
+
17
+ adapter: qlora
18
+ lora_model_dir:
19
+
20
+ sequence_len: 16384
21
+ sample_packing: true
22
+ pad_to_sequence_len: true
23
+
24
+ lora_r: 32
25
+ lora_alpha: 16
26
+ lora_dropout: 0.05
27
+ lora_target_modules:
28
+ lora_target_linear: true
29
+ lora_fan_in_fan_out:
30
+
31
+ wandb_project:
32
+ wandb_entity:
33
+ wandb_watch:
34
+ wandb_name:
35
+ wandb_log_model:
36
+
37
+ gradient_accumulation_steps: 4
38
+ micro_batch_size: 2
39
+ num_epochs: 3
40
+ optimizer: paged_adamw_32bit
41
+ lr_scheduler: cosine
42
+ learning_rate: 0.0002
43
+
44
+ train_on_inputs: false
45
+ group_by_length: false
46
+ bf16: auto
47
+ fp16:
48
+ tf32: false
49
+
50
+ gradient_checkpointing: true
51
+ early_stopping_patience:
52
+ resume_from_checkpoint:
53
+ local_rank:
54
+ logging_steps: 1
55
+ xformers_attention:
56
+ flash_attention: true
57
+
58
+ loss_watchdog_threshold: 15.0
59
+ loss_watchdog_patience: 3
60
+
61
+ warmup_steps: 10
62
+ evals_per_epoch: 4
63
+ eval_table_size:
64
+ saves_per_epoch: 1
65
+ debug:
66
+ deepspeed:
67
+ weight_decay: 0.0
68
+ fsdp:
69
+ fsdp_config:
70
+ special_tokens: