Spaces:
Running
on
Zero
Running
on
Zero
Daemontatox
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -5,10 +5,6 @@ subprocess.run(
|
|
5 |
env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
|
6 |
shell=True
|
7 |
)
|
8 |
-
subprocess.run(
|
9 |
-
'pip install autoawq',
|
10 |
-
shell=True
|
11 |
-
)
|
12 |
import os
|
13 |
import re
|
14 |
import time
|
@@ -22,10 +18,17 @@ from transformers import (
|
|
22 |
BitsAndBytesConfig,
|
23 |
TextIteratorStreamer
|
24 |
)
|
25 |
-
from awq import AutoAWQForCausalLM
|
26 |
|
27 |
# Configuration Constants
|
28 |
-
MODEL_ID = "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
DEFAULT_SYSTEM_PROMPT ="""
|
31 |
You are an expert mathematician with extensive experience in mathematical competitions. You approach problems through systematic thinking and rigorous reasoning. When solving problems, follow these thought processes:
|
@@ -73,7 +76,6 @@ When you're ready, present your complete solution with:
|
|
73 |
- Thorough verification
|
74 |
Focus on clear, logical progression of ideas and thorough explanation of your mathematical reasoning. Provide answers in the same language as the user asking the question, repeat the final answer using a '\\boxed{}' without any units, you have [[8192]] tokens to complete the answer.
|
75 |
"""
|
76 |
-
|
77 |
# UI Configuration
|
78 |
TITLE = "<h1><center>AI Reasoning Assistant</center></h1>"
|
79 |
PLACEHOLDER = "Ask me anything! I'll think through it step by step."
|
@@ -117,16 +119,26 @@ h3 {
|
|
117 |
"""
|
118 |
|
119 |
def initialize_model():
|
120 |
-
"""Initialize the model with
|
121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
if tokenizer.pad_token_id is None:
|
123 |
tokenizer.pad_token_id = tokenizer.eos_token_id
|
124 |
|
125 |
-
model =
|
126 |
MODEL_ID,
|
127 |
torch_dtype=torch.float16,
|
128 |
device_map="cuda",
|
129 |
-
|
|
|
|
|
|
|
130 |
)
|
131 |
|
132 |
return model, tokenizer
|
|
|
5 |
env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
|
6 |
shell=True
|
7 |
)
|
|
|
|
|
|
|
|
|
8 |
import os
|
9 |
import re
|
10 |
import time
|
|
|
18 |
BitsAndBytesConfig,
|
19 |
TextIteratorStreamer
|
20 |
)
|
|
|
21 |
|
22 |
# Configuration Constants
|
23 |
+
MODEL_ID = "Daemontatox/PathFinderAi3.0"
|
24 |
+
|
25 |
+
|
26 |
+
# Understand]: Analyze the question to identify key details and clarify the goal.
|
27 |
+
# [Plan]: Outline a logical, step-by-step approach to address the question or problem.
|
28 |
+
# [Reason]: Execute the plan, applying logical reasoning, calculations, or analysis to reach a conclusion. Document each step clearly.
|
29 |
+
# [Reflect]: Review the reasoning and the final answer to ensure it is accurate, complete, and adheres to the principle of openness.
|
30 |
+
# [Respond]: Present a well-structured and transparent answer, enriched with supporting details as needed.
|
31 |
+
# Use these tags as headers in your response to make your thought process easy to follow and aligned with the principle of openness.
|
32 |
|
33 |
DEFAULT_SYSTEM_PROMPT ="""
|
34 |
You are an expert mathematician with extensive experience in mathematical competitions. You approach problems through systematic thinking and rigorous reasoning. When solving problems, follow these thought processes:
|
|
|
76 |
- Thorough verification
|
77 |
Focus on clear, logical progression of ideas and thorough explanation of your mathematical reasoning. Provide answers in the same language as the user asking the question, repeat the final answer using a '\\boxed{}' without any units, you have [[8192]] tokens to complete the answer.
|
78 |
"""
|
|
|
79 |
# UI Configuration
|
80 |
TITLE = "<h1><center>AI Reasoning Assistant</center></h1>"
|
81 |
PLACEHOLDER = "Ask me anything! I'll think through it step by step."
|
|
|
119 |
"""
|
120 |
|
121 |
def initialize_model():
|
122 |
+
"""Initialize the model with appropriate configurations"""
|
123 |
+
quantization_config = BitsAndBytesConfig(
|
124 |
+
load_in_8bit=True,
|
125 |
+
bnb_8bit_compute_dtype=torch.bfloat16,
|
126 |
+
bnb_8bit_quant_type="nf4",
|
127 |
+
bnb_8bit_use_double_quant=True
|
128 |
+
)
|
129 |
+
|
130 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID , trust_remote_code=True)
|
131 |
if tokenizer.pad_token_id is None:
|
132 |
tokenizer.pad_token_id = tokenizer.eos_token_id
|
133 |
|
134 |
+
model = AutoModelForCausalLM.from_pretrained(
|
135 |
MODEL_ID,
|
136 |
torch_dtype=torch.float16,
|
137 |
device_map="cuda",
|
138 |
+
attn_implementation="flash_attention_2",
|
139 |
+
trust_remote_code=True,
|
140 |
+
quantization_config=quantization_config
|
141 |
+
|
142 |
)
|
143 |
|
144 |
return model, tokenizer
|