File size: 8,775 Bytes
6b0420b
a244f90
6b0420b
 
 
 
 
 
 
 
 
 
 
 
a35a06c
6b0420b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b17f3a7
6b0420b
 
 
 
 
 
 
 
 
 
7948b59
 
6b0420b
 
b17f3a7
7948b59
6b0420b
b17f3a7
 
6b0420b
b17f3a7
 
6b0420b
b17f3a7
 
 
7948b59
 
 
6b0420b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
#0.1 Install Dependencies
#!pip install unsloth torch transformers datasets trl huggingface_hub

#0.2 Import Dependencies
from unsloth import FastLanguageModel
import torch
import os
from transformers import TextStreamer
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported 

# 1. Configuration
max_seq_length = 1024
dtype = None
load_in_4bit = True

alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

instruction = """This assistant is trained to code executive ranks and roles along the following categories with 1 or 0.

Ranks:
- VP: 1 if Vice President (VP), 0 otherwise
- SVP: 1 if Senior Vice President (SVP), 0 otherwise
- EVP: 1 if Executive Vice President (EVP), 0 otherwise
- SEVP: 1 if Senior Executive Vice President (SEVP), 0 otherwise
- Director: 1 if Director, 0 otherwise
- Senior Director: 1 if Senior Director, 0 otherwise
- MD: 1 if Managing Director (MD), 0 otherwise
- SMD: 1 if Senior Managing Director (SMD), 0 otherwise
- SE: 1 if Senior Executive, 0 otherwise
- VC: 1 if Vice Chair (VC), 0 otherwise
- SVC: 1 if Senior Vice Chair (SVC), 0 otherwise
- President: 1 if President of the parent company, 0 when President of subsidiary or division but not parent company.

Roles:
- Board: 1 when role suggests person is a member of the board of directors, 0 otherwise
- CEO: 1 when Chief Executive Officer of parent company, 0 when Chief Executive Officer of a subsidiary but not parent company.
- CXO: 1 when C-Suite title, i.e., Chief X Officer, where X can be any type of designation, 0 otherwise. Chief Executive Officer of the parent company. Not Chief AND Officer, e.g., only officer of a function.
- Primary: 1 when responsible for primary activity of value chain, i.e., Supply Chain, Manufacturing, Operations, Marketing & Sales, Customer Service and alike, 0 when not a primary value chain activity.
- Support: 1 when responsible for a support activity of the value chain, i.e., Procurement, IT, HR, Management, Strategy, HR, Finance, Legal, R&D, Investor Relations, Technology, General Counsel and alike, 0 when not support activity of the value.
- BU: 1 when involved with an entity/distinct unit responsible for Product, Customer, or Geographical domain/unit; or role is about a subsidiary, 0 when responsibility is not for a specific product/customer/geography area but, for example, for the entire parent company."""
input = "In 2015 the company 'cms' had an executive with the name david mengebier, whose official role title was: 'senior vice president, cms energy and consumers energy'."



# 2. Before Training
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token = os.getenv("HF_TOKEN")
)

FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    alpaca_prompt.format(
        instruction, # instruction
        input, # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 1000)

# 3. Load data

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass
#dataset = load_dataset("daresearch/orgdatabase-training0-data", split = "train")
#dataset = dataset.map(formatting_prompts_func, batched = True,)


# Load train and validation datasets
train_dataset = load_dataset("csv", data_files="train.csv", split="train")
valid_dataset = load_dataset("csv", data_files="valid.csv", split="train")

# Apply formatting to both datasets
train_dataset = train_dataset.map(formatting_prompts_func, batched=True)
valid_dataset = valid_dataset.map(formatting_prompts_func, batched=True)


# 4. Training
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0.05,  # Supports any, but = 0 is optimized
    bias="none",  # Supports any, but = "none" is optimized
    use_gradient_checkpointing="unsloth",  # True or "unsloth" for very long context
    random_state=3407,
    use_rslora=False,  # We support rank stabilized LoRA
    loftq_config=None,  # And LoftQ
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=8,  # Increase parallelism
    packing=True,  # Enable sequence packing
    args=TrainingArguments(
        per_device_train_batch_size=32,  # Lower batch size to prevent memory issues
        gradient_accumulation_steps=1,  # Maintain effective batch size
        warmup_steps=5,
        max_steps=-1,  # Train in smaller chunks
        num_train_epochs=3,  # Test with fewer epochs
        learning_rate=2e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=10,  # Log less frequently
        evaluation_strategy="steps",
        eval_steps=50,  # Evaluate less frequently
        max_grad_norm=1.0,  # Add gradient clipping
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
    ),
)

# Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

trainer_stats = trainer.train()

# Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime'] / 60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

# Optionally evaluate after training if desired
eval_stats = trainer.evaluate(eval_dataset=valid_dataset)
print(f"Validation Loss: {eval_stats['eval_loss']}")
if "eval_accuracy" in eval_stats:
    print(f"Validation Accuracy: {eval_stats['eval_accuracy']}")


# 5. After Training
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    alpaca_prompt.format(
        instruction, # instruction
        input, # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 1000)


# 6. Saving
#model.save_pretrained("lora_model") # Local saving
#tokenizer.save_pretrained("lora_model")

huggingface_model_name = "daresearch/Llama-3.1-8B-bnb-4bit-exec-roles"
model.push_to_hub(huggingface_model_name, token = os.getenv("HF_TOKEN"))
tokenizer.push_to_hub(huggingface_model_name, token = os.getenv("HF_TOKEN"))

merged_huggingface_model_name = "daresearch/Llama-3.1-8B-bnb-4bit-M-exec-roles"
# Merge to 16bit
if True: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",)
if True: model.push_to_hub_merged(merged_huggingface_model_name, tokenizer, save_method = "merged_16bit", token = os.getenv("HF_TOKEN"))

# # Merge to 4bit
#if True: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",)
#if True: model.push_to_hub_merged(huggingface_model_name, tokenizer, save_method = "merged_4bit", token = os.getenv("HF_TOKEN"))