maxidl commited on
Commit
00e8646
·
verified ·
1 Parent(s): da9d88c

Create axolotl_config.yaml

Browse files
Files changed (1) hide show
  1. axolotl_config.yaml +68 -0
axolotl_config.yaml ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: meta-llama/Llama-3.1-8B-Instruct
2
+
3
+ plugins:
4
+ - axolotl.integrations.liger.LigerPlugin
5
+ liger_rope: true
6
+ liger_rms_norm: true
7
+ liger_glu_activation: true
8
+ liger_fused_linear_cross_entropy: true
9
+
10
+ strict: false
11
+
12
+ chat_template: llama3
13
+ datasets:
14
+ - path: ""
15
+ type:
16
+ ds_type: parquet
17
+
18
+ dataset_prepared_path:
19
+ val_set_size: 0.00
20
+ output_dir: ./outputs/llama31-8B-liger-ds-full
21
+
22
+ dataset_processes: 16
23
+
24
+ sequence_len: 131072
25
+ sample_packing: false
26
+ pad_to_sequence_len: true
27
+
28
+ wandb_project:
29
+ wandb_entity:
30
+ wandb_watch:
31
+ wandb_name:
32
+ wandb_log_model:
33
+
34
+ gradient_accumulation_steps: 1
35
+ micro_batch_size: 1
36
+ num_epochs: 3
37
+ optimizer: adamw_torch
38
+ lr_scheduler: cosine
39
+ learning_rate: 2e-5
40
+
41
+ train_on_inputs: false
42
+ group_by_length: false
43
+ bf16: auto
44
+ fp16:
45
+ tf32: true
46
+
47
+ gradient_checkpointing: true
48
+ gradient_checkpointing_kwargs:
49
+ use_reentrant: false
50
+ early_stopping_patience:
51
+ resume_from_checkpoint:
52
+ logging_steps: 1
53
+ xformers_attention:
54
+ flash_attention: true
55
+
56
+ warmup_steps: 50
57
+ evals_per_epoch: 0
58
+ eval_table_size:
59
+ saves_per_epoch: 2
60
+ save_only_model: true
61
+ debug:
62
+ deepspeed: deepspeed_configs/zero3_bf16.json
63
+ weight_decay: 0.0
64
+ fsdp:
65
+ fsdp_config:
66
+ special_tokens:
67
+ pad_token: <|finetune_right_pad_id|>
68
+ eos_token: <|eot_id|>