MyLlmPlayground / LlmEngChap6.py
PhillHenry's picture
Upload LlmEngChap6.py
9aa17f3 verified
import comet_ml
from unsloth import PatchDPOTrainer
from accelerate import Accelerator
from config import SAVED_MODEL
PatchDPOTrainer()
import torch
from transformers import TextStreamer, AutoTokenizer
from datasets import load_dataset
from unsloth import FastLanguageModel, is_bfloat16_supported
from trl import DPOConfig, DPOTrainer
from accelerate import init_empty_weights
class MyLlamaModel:
max_seq_length = 256
NUM_TRAIN_EPOCHS = 6
beta = 0.5
LOAD_IN_4BIT = False
device_map = "auto"
save_method = "lora" # merged_X just means the whole model is saved, not just the transformer
lora_dropout = 0.
lora_alpha = 32
learning_rate=2e-5
r = 32
base_output_dir = f"{SAVED_MODEL}/{max_seq_length}maxSeqLen_{NUM_TRAIN_EPOCHS}Epochs_{device_map}devmap_4Bit{LOAD_IN_4BIT}_{save_method}_beta{beta}_loraDropout{lora_dropout}_r{r}_lora_alpha{lora_alpha}_lr{learning_rate}/"
def __init__(self):
self.model_name="unsloth/DeepSeek-R1-GGUF"
self.model_path = f"{self.base_output_dir}/{self.model_name}"
def get_model_tokenizer(self, model_name: str):
print(f"Using model {model_name}")
self.model_name = model_name
self.model_path = f"{self.base_output_dir}/{model_name}"
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=self.model_name,
# max_seq_length=self.max_seq_length,
load_in_4bit=self.LOAD_IN_4BIT, # "You can activate QLoRA by setting load_in_4bit to True" LLMEngineering, p251
# quantization_config=bnb_config, # helped with memory but caused non-zero probabilities when demoed
# # device_map=self.device_map, # try this
trust_remote_code=True,
)
return model, tokenizer
def train_and_save(self):
model, tokenizer = self.get_model_tokenizer(self.model_name)
with init_empty_weights():
model = FastLanguageModel.get_peft_model(
model,
r=self.r,
lora_alpha=self.lora_alpha,
lora_dropout=self.lora_dropout,
target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"],
)
torch.nn.Module.to_empty(model, device=torch.device("cuda")) # this eliminates 'NotImplementedError: Cannot copy out of meta tensor'
accelerator = Accelerator(mixed_precision="bf16", cpu=True) # Enable mixed precision for memory efficiency
device = accelerator.device
# model.to(device)
# optimizer = AdamW(params=model.parameters(), lr=3e-2)
# Move the model to the appropriate device
model = accelerator.prepare(model)
self.do_dpo(model, tokenizer)
def do_dpo(self, model, tokenizer):
dataset = self.load_prepared_dataset(tokenizer.eos_token)
trainer = DPOTrainer(
model=model,
ref_model=None,
tokenizer=tokenizer,
beta=self.beta,
train_dataset=dataset["train"],
eval_dataset=dataset["test"],
max_length=self.max_seq_length // 2,
max_prompt_length=self.max_seq_length // 2,
args=DPOConfig(
learning_rate=self.learning_rate,
lr_scheduler_type="linear",
per_device_train_batch_size=1,
per_device_eval_batch_size=1,
gradient_accumulation_steps=8,
num_train_epochs=self.NUM_TRAIN_EPOCHS,
fp16=not is_bfloat16_supported(),
bf16=is_bfloat16_supported(),
weight_decay=0.01,
warmup_steps=10,
output_dir="output",
eval_strategy="steps",
eval_steps=0.2,
logging_steps=1,
report_to="comet_ml",
seed=0,
),
)
trainer.train()
model.save_pretrained_merged(self.model_path, tokenizer=tokenizer, save_method=self.save_method) # merged_4bit_forced
generate_text_using(model, tokenizer)
@staticmethod
def load_prepared_dataset(eos_token):
alpaca_template = """Below is an instruction that describes a task.
Write a response that appropriately completes the request.
### Instruction:
{}
### Response:
"""
def format_samples(example):
example["prompt"] = alpaca_template.format(example["prompt"])
example["chosen"] = example['chosen'] + eos_token
example["rejected"] = example['rejected'] + eos_token
return {"prompt": example["prompt"], "chosen":
example["chosen"], "rejected": example["rejected"]}
dataset = load_dataset("mlabonne/llmtwin-dpo", split="train")
dataset = dataset.map(format_samples)
dataset = dataset.train_test_split(test_size=0.05)
return dataset
def generate_text_using(model, tokenizer):
print(f"Model of type {type(model)}, tokenizer of type {type(tokenizer)}")
#"pt", "tf", "np", "jax", "mlx"
inputs = tokenizer(["Who are the creators of the course that is under the 'Decoding ML' umbrella?"], return_tensors="pt").to("cuda")
text_streamer = TextStreamer(tokenizer)
FastLanguageModel.for_inference(model)
_ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=MyLlamaModel.max_seq_length, use_cache=True)
if __name__ == "__main__":
my_model = MyLlamaModel()
my_model.train_and_save()