|
model_name: "google/gemma-2-2b-it" |
|
new_model_name: "gemma-2-2b-ft" |
|
|
|
lora_r: 4 |
|
lora_alpha: 16 |
|
lora_dropout: 0.1 |
|
|
|
use_4bit: True |
|
bnb_4bit_compute_dtype: "float16" |
|
bnb_4bit_quant_type: "nf4" |
|
use_nested_quant: False |
|
|
|
output_dir: "./results" |
|
num_train_epochs: 1 |
|
fp16: False |
|
bf16: False |
|
per_device_train_batch_size: 2 |
|
per_device_eval_batch_size: 2 |
|
gradient_accumulation_steps: 1 |
|
gradient_checkpointing: True |
|
max_grad_norm: 0.3 |
|
learning_rate: 2e-4 |
|
weight_decay: 0.001 |
|
optimizer: "paged_adamw_32bit" |
|
lr_scheduler_type: "constant" |
|
max_steps: -1 |
|
warmup_ratio: 0.03 |
|
group_by_length: True |
|
save_steps: 25 |
|
logging_steps: 25 |
|
|
|
max_seq_length: 40 |
|
packing: True |
|
device_map: "auto" |
|
|