Daemontatox commited on
Commit
e339ee0
·
verified ·
1 Parent(s): 80bfd17

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -11
app.py CHANGED
@@ -5,10 +5,6 @@ subprocess.run(
5
  env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
6
  shell=True
7
  )
8
- subprocess.run(
9
- 'pip install autoawq',
10
- shell=True
11
- )
12
  import os
13
  import re
14
  import time
@@ -22,10 +18,17 @@ from transformers import (
22
  BitsAndBytesConfig,
23
  TextIteratorStreamer
24
  )
25
- from awq import AutoAWQForCausalLM
26
 
27
  # Configuration Constants
28
- MODEL_ID = "KirillR/QwQ-32B-Preview-AWQ"
 
 
 
 
 
 
 
 
29
 
30
  DEFAULT_SYSTEM_PROMPT ="""
31
  You are an expert mathematician with extensive experience in mathematical competitions. You approach problems through systematic thinking and rigorous reasoning. When solving problems, follow these thought processes:
@@ -73,7 +76,6 @@ When you're ready, present your complete solution with:
73
  - Thorough verification
74
  Focus on clear, logical progression of ideas and thorough explanation of your mathematical reasoning. Provide answers in the same language as the user asking the question, repeat the final answer using a '\\boxed{}' without any units, you have [[8192]] tokens to complete the answer.
75
  """
76
-
77
  # UI Configuration
78
  TITLE = "<h1><center>AI Reasoning Assistant</center></h1>"
79
  PLACEHOLDER = "Ask me anything! I'll think through it step by step."
@@ -117,16 +119,26 @@ h3 {
117
  """
118
 
119
  def initialize_model():
120
- """Initialize the model with AWQ configuration"""
121
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 
 
 
 
 
 
 
122
  if tokenizer.pad_token_id is None:
123
  tokenizer.pad_token_id = tokenizer.eos_token_id
124
 
125
- model = AutoAWQForCausalLM.from_pretrained(
126
  MODEL_ID,
127
  torch_dtype=torch.float16,
128
  device_map="cuda",
129
- trust_remote_code=True
 
 
 
130
  )
131
 
132
  return model, tokenizer
 
5
  env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
6
  shell=True
7
  )
 
 
 
 
8
  import os
9
  import re
10
  import time
 
18
  BitsAndBytesConfig,
19
  TextIteratorStreamer
20
  )
 
21
 
22
  # Configuration Constants
23
+ MODEL_ID = "Daemontatox/PathFinderAi3.0"
24
+
25
+
26
+ # Understand]: Analyze the question to identify key details and clarify the goal.
27
+ # [Plan]: Outline a logical, step-by-step approach to address the question or problem.
28
+ # [Reason]: Execute the plan, applying logical reasoning, calculations, or analysis to reach a conclusion. Document each step clearly.
29
+ # [Reflect]: Review the reasoning and the final answer to ensure it is accurate, complete, and adheres to the principle of openness.
30
+ # [Respond]: Present a well-structured and transparent answer, enriched with supporting details as needed.
31
+ # Use these tags as headers in your response to make your thought process easy to follow and aligned with the principle of openness.
32
 
33
  DEFAULT_SYSTEM_PROMPT ="""
34
  You are an expert mathematician with extensive experience in mathematical competitions. You approach problems through systematic thinking and rigorous reasoning. When solving problems, follow these thought processes:
 
76
  - Thorough verification
77
  Focus on clear, logical progression of ideas and thorough explanation of your mathematical reasoning. Provide answers in the same language as the user asking the question, repeat the final answer using a '\\boxed{}' without any units, you have [[8192]] tokens to complete the answer.
78
  """
 
79
  # UI Configuration
80
  TITLE = "<h1><center>AI Reasoning Assistant</center></h1>"
81
  PLACEHOLDER = "Ask me anything! I'll think through it step by step."
 
119
  """
120
 
121
  def initialize_model():
122
+ """Initialize the model with appropriate configurations"""
123
+ quantization_config = BitsAndBytesConfig(
124
+ load_in_8bit=True,
125
+ bnb_8bit_compute_dtype=torch.bfloat16,
126
+ bnb_8bit_quant_type="nf4",
127
+ bnb_8bit_use_double_quant=True
128
+ )
129
+
130
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID , trust_remote_code=True)
131
  if tokenizer.pad_token_id is None:
132
  tokenizer.pad_token_id = tokenizer.eos_token_id
133
 
134
+ model = AutoModelForCausalLM.from_pretrained(
135
  MODEL_ID,
136
  torch_dtype=torch.float16,
137
  device_map="cuda",
138
+ attn_implementation="flash_attention_2",
139
+ trust_remote_code=True,
140
+ quantization_config=quantization_config
141
+
142
  )
143
 
144
  return model, tokenizer