Coding_Assistant

Running on Zero

App Files Files Community

Daemontatox commited on 20 days ago

Commit

e339ee0

verified ·

1 Parent(s): 80bfd17

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -11

app.py CHANGED Viewed

@@ -5,10 +5,6 @@ subprocess.run(
     env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
     shell=True
 )
-subprocess.run(
-    'pip install autoawq',
-    shell=True
-)
 import os
 import re
 import time
@@ -22,10 +18,17 @@ from transformers import (
     BitsAndBytesConfig,
     TextIteratorStreamer
 )
-from awq import AutoAWQForCausalLM
 # Configuration Constants
-MODEL_ID = "KirillR/QwQ-32B-Preview-AWQ"
 DEFAULT_SYSTEM_PROMPT ="""
 You are an expert mathematician with extensive experience in mathematical competitions. You approach problems through systematic thinking and rigorous reasoning. When solving problems, follow these thought processes:
@@ -73,7 +76,6 @@ When you're ready, present your complete solution with:
 - Thorough verification
 Focus on clear, logical progression of ideas and thorough explanation of your mathematical reasoning. Provide answers in the same language as the user asking the question, repeat the final answer using a '\\boxed{}' without any units, you have [[8192]] tokens to complete the answer.
 """
 # UI Configuration
 TITLE = "<h1><center>AI Reasoning Assistant</center></h1>"
 PLACEHOLDER = "Ask me anything! I'll think through it step by step."
@@ -117,16 +119,26 @@ h3 {
 """
 def initialize_model():
-    """Initialize the model with AWQ configuration"""
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
     if tokenizer.pad_token_id is None:
         tokenizer.pad_token_id = tokenizer.eos_token_id
-    model = AutoAWQForCausalLM.from_pretrained(
         MODEL_ID,
         torch_dtype=torch.float16,
         device_map="cuda",
-        trust_remote_code=True
     )
     return model, tokenizer

     env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
     shell=True
 )
 import os
 import re
 import time
     BitsAndBytesConfig,
     TextIteratorStreamer
 )
 # Configuration Constants
+MODEL_ID = "Daemontatox/PathFinderAi3.0"
+# Understand]: Analyze the question to identify key details and clarify the goal.
+# [Plan]: Outline a logical, step-by-step approach to address the question or problem.
+# [Reason]: Execute the plan, applying logical reasoning, calculations, or analysis to reach a conclusion. Document each step clearly.
+# [Reflect]: Review the reasoning and the final answer to ensure it is accurate, complete, and adheres to the principle of openness.
+# [Respond]: Present a well-structured and transparent answer, enriched with supporting details as needed.
+# Use these tags as headers in your response to make your thought process easy to follow and aligned with the principle of openness.
 DEFAULT_SYSTEM_PROMPT ="""
 You are an expert mathematician with extensive experience in mathematical competitions. You approach problems through systematic thinking and rigorous reasoning. When solving problems, follow these thought processes:
 - Thorough verification
 Focus on clear, logical progression of ideas and thorough explanation of your mathematical reasoning. Provide answers in the same language as the user asking the question, repeat the final answer using a '\\boxed{}' without any units, you have [[8192]] tokens to complete the answer.
 """
 # UI Configuration
 TITLE = "<h1><center>AI Reasoning Assistant</center></h1>"
 PLACEHOLDER = "Ask me anything! I'll think through it step by step."
 """
 def initialize_model():
+    """Initialize the model with appropriate configurations"""
+    quantization_config = BitsAndBytesConfig(
+        load_in_8bit=True,
+        bnb_8bit_compute_dtype=torch.bfloat16,
+        bnb_8bit_quant_type="nf4",
+        bnb_8bit_use_double_quant=True
+    )
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID , trust_remote_code=True)
     if tokenizer.pad_token_id is None:
         tokenizer.pad_token_id = tokenizer.eos_token_id
+    model = AutoModelForCausalLM.from_pretrained(
         MODEL_ID,
         torch_dtype=torch.float16,
         device_map="cuda",
+        attn_implementation="flash_attention_2",
+        trust_remote_code=True,
+        quantization_config=quantization_config
     )
     return model, tokenizer