PhillHenry commited on
Commit
9aa17f3
·
verified ·
1 Parent(s): 440412f

Upload LlmEngChap6.py

Browse files
Files changed (1) hide show
  1. LlmEngChap6.py +135 -0
LlmEngChap6.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import comet_ml
2
+ from unsloth import PatchDPOTrainer
3
+ from accelerate import Accelerator
4
+ from config import SAVED_MODEL
5
+
6
+ PatchDPOTrainer()
7
+
8
+ import torch
9
+ from transformers import TextStreamer, AutoTokenizer
10
+ from datasets import load_dataset
11
+ from unsloth import FastLanguageModel, is_bfloat16_supported
12
+ from trl import DPOConfig, DPOTrainer
13
+ from accelerate import init_empty_weights
14
+
15
+
16
+ class MyLlamaModel:
17
+ max_seq_length = 256
18
+ NUM_TRAIN_EPOCHS = 6
19
+ beta = 0.5
20
+ LOAD_IN_4BIT = False
21
+ device_map = "auto"
22
+ save_method = "lora" # merged_X just means the whole model is saved, not just the transformer
23
+ lora_dropout = 0.
24
+ lora_alpha = 32
25
+ learning_rate=2e-5
26
+ r = 32
27
+ base_output_dir = f"{SAVED_MODEL}/{max_seq_length}maxSeqLen_{NUM_TRAIN_EPOCHS}Epochs_{device_map}devmap_4Bit{LOAD_IN_4BIT}_{save_method}_beta{beta}_loraDropout{lora_dropout}_r{r}_lora_alpha{lora_alpha}_lr{learning_rate}/"
28
+
29
+ def __init__(self):
30
+ self.model_name="unsloth/DeepSeek-R1-GGUF"
31
+ self.model_path = f"{self.base_output_dir}/{self.model_name}"
32
+
33
+ def get_model_tokenizer(self, model_name: str):
34
+ print(f"Using model {model_name}")
35
+ self.model_name = model_name
36
+ self.model_path = f"{self.base_output_dir}/{model_name}"
37
+ model, tokenizer = FastLanguageModel.from_pretrained(
38
+ model_name=self.model_name,
39
+ # max_seq_length=self.max_seq_length,
40
+ load_in_4bit=self.LOAD_IN_4BIT, # "You can activate QLoRA by setting load_in_4bit to True" LLMEngineering, p251
41
+ # quantization_config=bnb_config, # helped with memory but caused non-zero probabilities when demoed
42
+ # # device_map=self.device_map, # try this
43
+ trust_remote_code=True,
44
+ )
45
+ return model, tokenizer
46
+
47
+ def train_and_save(self):
48
+ model, tokenizer = self.get_model_tokenizer(self.model_name)
49
+ with init_empty_weights():
50
+ model = FastLanguageModel.get_peft_model(
51
+ model,
52
+ r=self.r,
53
+ lora_alpha=self.lora_alpha,
54
+ lora_dropout=self.lora_dropout,
55
+ target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"],
56
+ )
57
+ torch.nn.Module.to_empty(model, device=torch.device("cuda")) # this eliminates 'NotImplementedError: Cannot copy out of meta tensor'
58
+ accelerator = Accelerator(mixed_precision="bf16", cpu=True) # Enable mixed precision for memory efficiency
59
+ device = accelerator.device
60
+ # model.to(device)
61
+ # optimizer = AdamW(params=model.parameters(), lr=3e-2)
62
+
63
+ # Move the model to the appropriate device
64
+ model = accelerator.prepare(model)
65
+ self.do_dpo(model, tokenizer)
66
+
67
+ def do_dpo(self, model, tokenizer):
68
+ dataset = self.load_prepared_dataset(tokenizer.eos_token)
69
+ trainer = DPOTrainer(
70
+ model=model,
71
+ ref_model=None,
72
+ tokenizer=tokenizer,
73
+ beta=self.beta,
74
+ train_dataset=dataset["train"],
75
+ eval_dataset=dataset["test"],
76
+ max_length=self.max_seq_length // 2,
77
+ max_prompt_length=self.max_seq_length // 2,
78
+ args=DPOConfig(
79
+ learning_rate=self.learning_rate,
80
+ lr_scheduler_type="linear",
81
+ per_device_train_batch_size=1,
82
+ per_device_eval_batch_size=1,
83
+ gradient_accumulation_steps=8,
84
+ num_train_epochs=self.NUM_TRAIN_EPOCHS,
85
+ fp16=not is_bfloat16_supported(),
86
+ bf16=is_bfloat16_supported(),
87
+ weight_decay=0.01,
88
+ warmup_steps=10,
89
+ output_dir="output",
90
+ eval_strategy="steps",
91
+ eval_steps=0.2,
92
+ logging_steps=1,
93
+ report_to="comet_ml",
94
+ seed=0,
95
+ ),
96
+ )
97
+ trainer.train()
98
+ model.save_pretrained_merged(self.model_path, tokenizer=tokenizer, save_method=self.save_method) # merged_4bit_forced
99
+ generate_text_using(model, tokenizer)
100
+
101
+
102
+ @staticmethod
103
+ def load_prepared_dataset(eos_token):
104
+ alpaca_template = """Below is an instruction that describes a task.
105
+ Write a response that appropriately completes the request.
106
+ ### Instruction:
107
+ {}
108
+ ### Response:
109
+ """
110
+
111
+ def format_samples(example):
112
+ example["prompt"] = alpaca_template.format(example["prompt"])
113
+ example["chosen"] = example['chosen'] + eos_token
114
+ example["rejected"] = example['rejected'] + eos_token
115
+ return {"prompt": example["prompt"], "chosen":
116
+ example["chosen"], "rejected": example["rejected"]}
117
+
118
+ dataset = load_dataset("mlabonne/llmtwin-dpo", split="train")
119
+ dataset = dataset.map(format_samples)
120
+ dataset = dataset.train_test_split(test_size=0.05)
121
+ return dataset
122
+
123
+
124
+ def generate_text_using(model, tokenizer):
125
+ print(f"Model of type {type(model)}, tokenizer of type {type(tokenizer)}")
126
+ #"pt", "tf", "np", "jax", "mlx"
127
+ inputs = tokenizer(["Who are the creators of the course that is under the 'Decoding ML' umbrella?"], return_tensors="pt").to("cuda")
128
+ text_streamer = TextStreamer(tokenizer)
129
+ FastLanguageModel.for_inference(model)
130
+ _ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=MyLlamaModel.max_seq_length, use_cache=True)
131
+
132
+
133
+ if __name__ == "__main__":
134
+ my_model = MyLlamaModel()
135
+ my_model.train_and_save()