Coding_Assistant

Running on Zero

App Files Files Community

Daemontatox commited on 13 days ago

Commit

c8e2710

verified ·

1 Parent(s): 22e29b1

Update app.py

Browse files

Files changed (1) hide show

app.py +152 -273

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
 import os
 import re
 import time
@@ -10,334 +9,214 @@ from transformers import (
     AutoModelForCausalLM,
     AutoTokenizer,
     BitsAndBytesConfig,
-    TextIteratorStreamer
 )
 # Configuration Constants
-MODEL_ID="deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"
-# Understand]: Analyze the question to identify key details and clarify the goal.
-# [Plan]: Outline a logical, step-by-step approach to address the question or problem.
-# [Reason]: Execute the plan, applying logical reasoning, calculations, or analysis to reach a conclusion. Document each step clearly.
-# [Reflect]: Review the reasoning and the final answer to ensure it is accurate, complete, and adheres to the principle of openness.
-# [Respond]: Present a well-structured and transparent answer, enriched with supporting details as needed.
-# Use these tags as headers in your response to make your thought process easy to follow and aligned with the principle of openness.
-DEFAULT_SYSTEM_PROMPT ="""
-You are an intelligent assistant , You should think Step by Step.
-"""
 # UI Configuration
-TITLE = "<h1><center>AI Reasoning Assistant</center></h1>"
-PLACEHOLDER = "Ask me anything! I'll think through it step by step."
 CSS = """
-.duplicate-button {
-    margin: auto !important;
-    color: white !important;
-    background: black !important;
-    border-radius: 100vh !important;
-}
-h3 {
-    text-align: center;
-}
-.message-wrap {
-    overflow-x: auto;
-}
-.message-wrap p {
-    margin-bottom: 1em;
-}
-.message-wrap pre {
-    background-color: #f6f8fa;
-    border-radius: 3px;
-    padding: 16px;
-    overflow-x: auto;
-}
-.message-wrap code {
-    background-color: rgba(175,184,193,0.2);
-    border-radius: 3px;
-    padding: 0.2em 0.4em;
-    font-family: monospace;
-}
-.custom-tag {
-    color: #0066cc;
-    font-weight: bold;
-}
-.chat-area {
-    height: 500px !important;
-    overflow-y: auto !important;
-}
 """
 def initialize_model():
-    """Initialize the model with appropriate configurations"""
     quantization_config = BitsAndBytesConfig(
         load_in_4bit=True,
         bnb_4bit_compute_dtype=torch.bfloat16,
         bnb_4bit_quant_type="nf4",
         bnb_4bit_use_double_quant=True,
-        #llm_int8_enable_fp32_cpu_offload=True
     )
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID , trust_remote_code=True)
-    if tokenizer.pad_token_id is None:
-        tokenizer.pad_token_id = tokenizer.eos_token_id
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_ID,
-        torch_dtype="auto",
-        device_map="cuda",
-        # attn_implementation="flash_attention_2",
-        trust_remote_code=True,
-        quantization_config=quantization_config
     )
     return model, tokenizer
-def format_text(text):
-    """Format text with proper spacing and tag highlighting (but keep tags visible)"""
-    tag_patterns = [
-        (r'<Thinking>', '\n<Thinking>\n'),
-        (r'</Thinking>', '\n</Thinking>\n'),
-        (r'<Critique>', '\n<Critique>\n'),
-        (r'</Critique>', '\n</Critique>\n'),
-        (r'<Revising>', '\n<Revising>\n'),
-        (r'</Revising>', '\n</Revising>\n'),
-        (r'<Final>', '\n<Final>\n'),
-        (r'</Final>', '\n</Final>\n')
-    ]
-    formatted = text
-    for pattern, replacement in tag_patterns:
-        formatted = re.sub(pattern, replacement, formatted)
-    formatted = '\n'.join(line for line in formatted.split('\n') if line.strip())
     return formatted
-def format_chat_history(history):
-    """Format chat history for display, keeping tags visible"""
-    formatted = []
-    for user_msg, assistant_msg in history:
-        formatted.append(f"User: {user_msg}")
-        if assistant_msg:
-            formatted.append(f"Assistant: {assistant_msg}")
-    return "\n\n".join(formatted)
-def create_examples():
-    """Create example queries for the UI"""
-    return [
-        "Explain the concept of artificial intelligence.",
-        "How does photosynthesis work?",
-        "What are the main causes of climate change?",
-        "Describe the process of protein synthesis.",
-        "What are the key features of a democratic government?",
-        "Explain the theory of relativity.",
-        "How do vaccines work to prevent diseases?",
-        "What are the major events of World War II?",
-        "Describe the structure of a human cell.",
-        "What is the role of DNA in genetics?"
-    ]
 @spaces.GPU(duration=120)
 def chat_response(
     message: str,
     history: list,
-    chat_display: str,
     system_prompt: str,
     temperature: float = 0.3,
-    max_new_tokens: int =4096 ,
-    top_p: float = 0.1,
-    top_k: int = 45,
-    penalty: float = 1.5,
 ):
-    """Generate chat responses, keeping tags visible in the output"""
-    conversation = [
-        {"role": "system", "content": system_prompt}
-    ]
-    for prompt, answer in history:
-        conversation.extend([
-            {"role": "user", "content": prompt},
-            {"role": "assistant", "content": answer}
-        ])
-    conversation.append({"role": "user", "content": message})
-    input_ids = tokenizer.apply_chat_template(
-        conversation,
-        add_generation_prompt=True,
-        return_tensors="pt"
-    ).to(model.device)
-    streamer = TextIteratorStreamer(
-        tokenizer,
-        timeout=60.0,
-        skip_prompt=True,
-        skip_special_tokens=True
-    )
-    generate_kwargs = dict(
-        input_ids=input_ids,
-        max_new_tokens=max_new_tokens,
-        do_sample=False if temperature == 0 else True,
-        top_p=top_p,
-        top_k=top_k,
-        temperature=temperature,
-        repetition_penalty=penalty,
-        streamer=streamer,
-    )
-    buffer = ""
-    with torch.no_grad():
         thread = Thread(target=model.generate, kwargs=generate_kwargs)
         thread.start()
-        history = history + [[message, ""]]
         for new_text in streamer:
-            buffer += new_text
-            formatted_buffer = format_text(buffer)
-            history[-1][1] = formatted_buffer
-            chat_display = format_chat_history(history)
-            yield history, chat_display
-def process_example(example: str) -> tuple:
-    """Process example query and return empty history and updated display"""
-    return [], f"User: {example}\n\n"
 def main():
-    """Main function to set up and launch the Gradio interface"""
     global model, tokenizer
     model, tokenizer = initialize_model()
-    with gr.Blocks(css=CSS, theme="soft") as demo:
         gr.HTML(TITLE)
-        gr.DuplicateButton(
-            value="Duplicate Space for private use",
-            elem_classes="duplicate-button"
-        )
         with gr.Row():
-            with gr.Column():
-                chat_history = gr.State([])
-                chat_display = gr.TextArea(
-                    value="",
-                    label="Chat History",
-                    interactive=False,
-                    elem_classes=["chat-area"],
                 )
-                message = gr.TextArea(
-                    placeholder=PLACEHOLDER,
-                    label="Your message",
-                    lines=3
                 )
                 with gr.Row():
-                    submit = gr.Button("Send")
-                    clear = gr.Button("Clear")
-                with gr.Accordion("⚙️ Advanced Settings", open=False):
                     system_prompt = gr.TextArea(
                         value=DEFAULT_SYSTEM_PROMPT,
-                        label="System Prompt",
-                        lines=5,
-                    )
-                    temperature = gr.Slider(
-                        minimum=0,
-                        maximum=1,
-                        step=0.1,
-                        value=0.3,
-                        label="Temperature",
-                    )
-                    max_tokens = gr.Slider(
-                        minimum=128,
-                        maximum=32000,
-                        step=128,
-                        value=4096,
-                        label="Max Tokens",
-                    )
-                    top_p = gr.Slider(
-                        minimum=0.1,
-                        maximum=1.0,
-                        step=0.1,
-                        value=0.8,
-                        label="Top-p",
                     )
-                    top_k = gr.Slider(
-                        minimum=1,
-                        maximum=100,
-                        step=1,
-                        value=45,
-                        label="Top-k",
-                    )
-                    penalty = gr.Slider(
-                        minimum=1.0,
-                        maximum=2.0,
-                        step=0.1,
-                        value=1.5,
-                        label="Repetition Penalty",
-                    )
-                examples = gr.Examples(
-                    examples=create_examples(),
-                    inputs=[message],
-                    outputs=[chat_history, chat_display],
-                    fn=process_example,
-                    cache_examples=False,
-                )
-        # Set up event handlers
-        submit_click = submit.click(
             chat_response,
-            inputs=[
-                message,
-                chat_history,
-                chat_display,
-                system_prompt,
-                temperature,
-                max_tokens,
-                top_p,
-                top_k,
-                penalty,
-            ],
-            outputs=[chat_history, chat_display],
-            show_progress=True,
-        )
-        message.submit(
             chat_response,
-            inputs=[
-                message,
-                chat_history,
-                chat_display,
-                system_prompt,
-                temperature,
-                max_tokens,
-                top_p,
-                top_k,
-                penalty,
-            ],
-            outputs=[chat_history, chat_display],
-            show_progress=True,
-        )
-        clear.click(
-            lambda: ([], ""),
-            outputs=[chat_history, chat_display],
-            show_progress=True,
-        )
-        submit_click.then(lambda: "", outputs=message)
-        message.submit(lambda: "", outputs=message)
     return demo
 if __name__ == "__main__":
     demo = main()
-    demo.launch()

 import os
 import re
 import time
     AutoModelForCausalLM,
     AutoTokenizer,
     BitsAndBytesConfig,
+    TextIteratorStreamer,
+    StoppingCriteria,
+    StoppingCriteriaList
 )
 # Configuration Constants
+MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"
+# Enhanced System Prompt
+DEFAULT_SYSTEM_PROMPT = """You are an Expert Reasoning Assistant. Follow these steps:
+[Understand]: Analyze key elements and clarify objectives
+[Plan]: Outline step-by-step methodology
+[Reason]: Execute plan with detailed analysis
+[Verify]: Check logic and evidence
+[Conclude]: Present structured conclusion
+Use these section headers and maintain technical accuracy with clear explanations."""
 # UI Configuration
+TITLE = """
+<h1 align="center" style="color: #2d3436; margin-bottom: 0">🧠 AI Reasoning Assistant</h1>
+<p align="center" style="color: #636e72; margin-top: 0">DeepSeek-R1-Distill-Qwen-14B</p>
+"""
 CSS = """
+.gr-chatbot { min-height: 500px !important; border-radius: 15px !important; }
+.message-wrap pre { background: #f8f9fa !important; padding: 15px !important; }
+.thinking-tag { color: #2ecc71; font-weight: 600; }
+.plan-tag { color: #e67e22; font-weight: 600; }
+.conclude-tag { color: #3498db; font-weight: 600; }
+.control-panel { background: #f8f9fa !important; padding: 20px !important; }
+footer { visibility: hidden !important; }
 """
+class StopOnTokens(StoppingCriteria):
+    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
+        stop_ids = [0]  # Add custom stop tokens here
+        return input_ids[0][-1] in stop_ids
 def initialize_model():
+    """Initialize model with safety checks"""
+    if not torch.cuda.is_available():
+        raise RuntimeError("CUDA is required for this application")
     quantization_config = BitsAndBytesConfig(
         load_in_4bit=True,
         bnb_4bit_compute_dtype=torch.bfloat16,
         bnb_4bit_quant_type="nf4",
         bnb_4bit_use_double_quant=True,
     )
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
+    tokenizer.pad_token = tokenizer.eos_token
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_ID,
+        device_map="auto",
+        quantization_config=quantization_config,
+        torch_dtype=torch.bfloat16,
+        trust_remote_code=True
     )
     return model, tokenizer
+def format_response(text):
+    """Enhanced formatting with syntax highlighting for reasoning steps"""
+    formatted = text.replace("[Understand]", '\n<strong class="thinking-tag">[Understand]</strong>\n')
+    formatted = formatted.replace("[Plan]", '\n<strong class="plan-tag">[Plan]</strong>\n')
+    formatted = formatted.replace("[Conclude]", '\n<strong class="conclude-tag">[Conclude]</strong>\n')
     return formatted
 @spaces.GPU(duration=120)
 def chat_response(
     message: str,
     history: list,
     system_prompt: str,
     temperature: float = 0.3,
+    max_new_tokens: int = 2048,
+    top_p: float = 0.9,
+    top_k: int = 50,
+    penalty: float = 1.2,
 ):
+    """Improved streaming generator with error handling"""
+    try:
+        conversation = [{"role": "system", "content": system_prompt}]
+        for user, assistant in history:
+            conversation.extend([
+                {"role": "user", "content": user},
+                {"role": "assistant", "content": assistant}
+            ])
+        conversation.append({"role": "user", "content": message})
+        input_ids = tokenizer.apply_chat_template(
+            conversation,
+            add_generation_prompt=True,
+            return_tensors="pt"
+        ).to(model.device)
+        streamer = TextIteratorStreamer(
+            tokenizer,
+            timeout=30,
+            skip_prompt=True,
+            skip_special_tokens=True
+        )
+        generate_kwargs = dict(
+            input_ids=input_ids,
+            max_new_tokens=max_new_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            top_k=top_k,
+            repetition_penalty=penalty,
+            streamer=streamer,
+            stopping_criteria=StoppingCriteriaList([StopOnTokens()])
+        )
+        buffer = []
         thread = Thread(target=model.generate, kwargs=generate_kwargs)
         thread.start()
         for new_text in streamer:
+            buffer.append(new_text)
+            partial_result = "".join(buffer)
+            # Check for complete sections
+            if any(tag in partial_result for tag in ["[Understand]", "[Plan]", "[Conclude]"]):
+                yield format_response(partial_result)
+            else:
+                yield format_response(partial_result + " ▌")
+        # Final formatting pass
+        yield format_response("".join(buffer))
+    except Exception as e:
+        yield f"⚠️ Error generating response: {str(e)}"
+def create_examples():
+    """Enhanced examples with diverse use cases"""
+    return [
+        ["Explain quantum entanglement in simple terms"],
+        ["Design a study plan for learning machine learning"],
+        ["Compare blockchain and traditional databases"],
+        ["How would you optimize AWS costs for a startup?"],
+        ["Explain the ethical implications of CRISPR technology"]
+    ]
 def main():
+    """Improved UI layout and interactions"""
     global model, tokenizer
     model, tokenizer = initialize_model()
+    with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
         gr.HTML(TITLE)
         with gr.Row():
+            with gr.Column(scale=3):
+                chatbot = gr.Chatbot(
+                    elem_id="chatbot",
+                    bubble_full_width=False,
+                    show_copy_button=True,
+                    render=False
                 )
+                msg = gr.Textbox(
+                    placeholder="Enter your question...",
+                    label="Ask the Expert",
+                    container=False
                 )
                 with gr.Row():
+                    submit_btn = gr.Button("Send", variant="primary")
+                    clear_btn = gr.Button("Clear", variant="secondary")
+            with gr.Column(scale=1, elem_classes="control-panel"):
+                gr.Examples(
+                    examples=create_examples(),
+                    inputs=msg,
+                    label="Example Queries",
+                    examples_per_page=5
+                )
+                with gr.Accordion("⚙️ Generation Parameters", open=False):
                     system_prompt = gr.TextArea(
                         value=DEFAULT_SYSTEM_PROMPT,
+                        label="System Instructions",
+                        lines=5
                     )
+                    temperature = gr.Slider(0, 2, value=0.7, label="Creativity")
+                    max_tokens = gr.Slider(128, 4096, value=2048, step=128, label="Max Tokens")
+                    top_p = gr.Slider(0, 1, value=0.9, step=0.05, label="Focus (Top-p)")
+                    penalty = gr.Slider(1, 2, value=1.2, step=0.1, label="Repetition Control")
+        # Event handling
+        msg.submit(
             chat_response,
+            [msg, chatbot, system_prompt, temperature, max_tokens, top_p, penalty],
+            [msg, chatbot],
+            show_progress="hidden"
+        ).then(lambda: "", None, msg)
+        submit_btn.click(
             chat_response,
+            [msg, chatbot, system_prompt, temperature, max_tokens, top_p, penalty],
+            [msg, chatbot],
+            show_progress="hidden"
+        ).then(lambda: "", None, msg)
+        clear_btn.click(lambda: None, None, chatbot, queue=False)
     return demo
 if __name__ == "__main__":
     demo = main()
+    demo.queue(max_size=20).launch()