Spaces:
Runtime error
Runtime error
File size: 8,775 Bytes
6b0420b a244f90 6b0420b a35a06c 6b0420b b17f3a7 6b0420b 7948b59 6b0420b b17f3a7 7948b59 6b0420b b17f3a7 6b0420b b17f3a7 6b0420b b17f3a7 7948b59 6b0420b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 |
#0.1 Install Dependencies
#!pip install unsloth torch transformers datasets trl huggingface_hub
#0.2 Import Dependencies
from unsloth import FastLanguageModel
import torch
import os
from transformers import TextStreamer
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported
# 1. Configuration
max_seq_length = 1024
dtype = None
load_in_4bit = True
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{}
### Input:
{}
### Response:
{}"""
instruction = """This assistant is trained to code executive ranks and roles along the following categories with 1 or 0.
Ranks:
- VP: 1 if Vice President (VP), 0 otherwise
- SVP: 1 if Senior Vice President (SVP), 0 otherwise
- EVP: 1 if Executive Vice President (EVP), 0 otherwise
- SEVP: 1 if Senior Executive Vice President (SEVP), 0 otherwise
- Director: 1 if Director, 0 otherwise
- Senior Director: 1 if Senior Director, 0 otherwise
- MD: 1 if Managing Director (MD), 0 otherwise
- SMD: 1 if Senior Managing Director (SMD), 0 otherwise
- SE: 1 if Senior Executive, 0 otherwise
- VC: 1 if Vice Chair (VC), 0 otherwise
- SVC: 1 if Senior Vice Chair (SVC), 0 otherwise
- President: 1 if President of the parent company, 0 when President of subsidiary or division but not parent company.
Roles:
- Board: 1 when role suggests person is a member of the board of directors, 0 otherwise
- CEO: 1 when Chief Executive Officer of parent company, 0 when Chief Executive Officer of a subsidiary but not parent company.
- CXO: 1 when C-Suite title, i.e., Chief X Officer, where X can be any type of designation, 0 otherwise. Chief Executive Officer of the parent company. Not Chief AND Officer, e.g., only officer of a function.
- Primary: 1 when responsible for primary activity of value chain, i.e., Supply Chain, Manufacturing, Operations, Marketing & Sales, Customer Service and alike, 0 when not a primary value chain activity.
- Support: 1 when responsible for a support activity of the value chain, i.e., Procurement, IT, HR, Management, Strategy, HR, Finance, Legal, R&D, Investor Relations, Technology, General Counsel and alike, 0 when not support activity of the value.
- BU: 1 when involved with an entity/distinct unit responsible for Product, Customer, or Geographical domain/unit; or role is about a subsidiary, 0 when responsibility is not for a specific product/customer/geography area but, for example, for the entire parent company."""
input = "In 2015 the company 'cms' had an executive with the name david mengebier, whose official role title was: 'senior vice president, cms energy and consumers energy'."
# 2. Before Training
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "unsloth/Meta-Llama-3.1-8B-bnb-4bit",
max_seq_length = max_seq_length,
dtype = dtype,
load_in_4bit = load_in_4bit,
token = os.getenv("HF_TOKEN")
)
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
alpaca_prompt.format(
instruction, # instruction
input, # input
"", # output - leave this blank for generation!
)
], return_tensors = "pt").to("cuda")
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 1000)
# 3. Load data
EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
instructions = examples["instruction"]
inputs = examples["input"]
outputs = examples["output"]
texts = []
for instruction, input, output in zip(instructions, inputs, outputs):
text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
texts.append(text)
return { "text" : texts, }
pass
#dataset = load_dataset("daresearch/orgdatabase-training0-data", split = "train")
#dataset = dataset.map(formatting_prompts_func, batched = True,)
# Load train and validation datasets
train_dataset = load_dataset("csv", data_files="train.csv", split="train")
valid_dataset = load_dataset("csv", data_files="valid.csv", split="train")
# Apply formatting to both datasets
train_dataset = train_dataset.map(formatting_prompts_func, batched=True)
valid_dataset = valid_dataset.map(formatting_prompts_func, batched=True)
# 4. Training
model = FastLanguageModel.get_peft_model(
model,
r=16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj"],
lora_alpha=16,
lora_dropout=0.05, # Supports any, but = 0 is optimized
bias="none", # Supports any, but = "none" is optimized
use_gradient_checkpointing="unsloth", # True or "unsloth" for very long context
random_state=3407,
use_rslora=False, # We support rank stabilized LoRA
loftq_config=None, # And LoftQ
)
trainer = SFTTrainer(
model=model,
tokenizer=tokenizer,
train_dataset=train_dataset,
eval_dataset=valid_dataset,
dataset_text_field="text",
max_seq_length=max_seq_length,
dataset_num_proc=8, # Increase parallelism
packing=True, # Enable sequence packing
args=TrainingArguments(
per_device_train_batch_size=32, # Lower batch size to prevent memory issues
gradient_accumulation_steps=1, # Maintain effective batch size
warmup_steps=5,
max_steps=-1, # Train in smaller chunks
num_train_epochs=3, # Test with fewer epochs
learning_rate=2e-4,
fp16=not is_bfloat16_supported(),
bf16=is_bfloat16_supported(),
logging_steps=10, # Log less frequently
evaluation_strategy="steps",
eval_steps=50, # Evaluate less frequently
max_grad_norm=1.0, # Add gradient clipping
optim="adamw_8bit",
weight_decay=0.01,
lr_scheduler_type="linear",
seed=3407,
output_dir="outputs",
),
)
# Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")
trainer_stats = trainer.train()
# Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime'] / 60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")
# Optionally evaluate after training if desired
eval_stats = trainer.evaluate(eval_dataset=valid_dataset)
print(f"Validation Loss: {eval_stats['eval_loss']}")
if "eval_accuracy" in eval_stats:
print(f"Validation Accuracy: {eval_stats['eval_accuracy']}")
# 5. After Training
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
alpaca_prompt.format(
instruction, # instruction
input, # input
"", # output - leave this blank for generation!
)
], return_tensors = "pt").to("cuda")
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 1000)
# 6. Saving
#model.save_pretrained("lora_model") # Local saving
#tokenizer.save_pretrained("lora_model")
huggingface_model_name = "daresearch/Llama-3.1-8B-bnb-4bit-exec-roles"
model.push_to_hub(huggingface_model_name, token = os.getenv("HF_TOKEN"))
tokenizer.push_to_hub(huggingface_model_name, token = os.getenv("HF_TOKEN"))
merged_huggingface_model_name = "daresearch/Llama-3.1-8B-bnb-4bit-M-exec-roles"
# Merge to 16bit
if True: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",)
if True: model.push_to_hub_merged(merged_huggingface_model_name, tokenizer, save_method = "merged_16bit", token = os.getenv("HF_TOKEN"))
# # Merge to 4bit
#if True: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",)
#if True: model.push_to_hub_merged(huggingface_model_name, tokenizer, save_method = "merged_4bit", token = os.getenv("HF_TOKEN")) |