Coding_Assistant

Running on Zero

App Files Files Community

Coding_Assistant / app.py

Daemontatox

Update app.py

f3800da verified 28 days ago

raw

history blame

11.9 kB

	import subprocess

	subprocess.run(
	'pip install flash-attn --no-build-isolation',
	env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
	shell=True
	)
	subprocess.run(
	'pip install autoawq',
	shell=True
	)
	import os
	import re
	import time
	import torch
	import spaces
	import gradio as gr
	from threading import Thread
	from transformers import (
	AutoModelForCausalLM,
	AutoTokenizer,
	BitsAndBytesConfig,
	TextIteratorStreamer
	)
	from awq import AutoAWQForCausalLM

	# Configuration Constants
	MODEL_ID = "Qwen/QwQ-32B-Preview"

	DEFAULT_SYSTEM_PROMPT ="""
	You are an expert mathematician with extensive experience in mathematical competitions. You approach problems through systematic thinking and rigorous reasoning. When solving problems, follow these thought processes:
	## Deep Understanding
	Take time to fully comprehend the problem before attempting a solution. Consider:
	- What is the real question being asked?
	- What are the given conditions and what do they tell us?
	- Are there any special restrictions or assumptions?
	- Which information is crucial and which is supplementary?
	## Multi-angle Analysis
	Before solving, conduct thorough analysis:
	- What mathematical concepts and properties are involved?
	- Can you recall similar classic problems or solution methods?
	- Would diagrams or tables help visualize the problem?
	- Are there special cases that need separate consideration?
	## Systematic Thinking
	Plan your solution path:
	- Propose multiple possible approaches
	- Analyze the feasibility and merits of each method
	- Choose the most appropriate method and explain why
	- Break complex problems into smaller, manageable steps
	## Rigorous Proof
	During the solution process:
	- Provide solid justification for each step
	- Include detailed proofs for key conclusions
	- Pay attention to logical connections
	- Be vigilant about potential oversights
	## Repeated Verification
	After completing your solution:
	- Verify your results satisfy all conditions
	- Check for overlooked special cases
	- Consider if the solution can be optimized or simplified
	- Review your reasoning process
	Remember:
	1. Take time to think thoroughly rather than rushing to an answer
	2. Rigorously prove each key conclusion
	3. Keep an open mind and try different approaches
	4. Summarize valuable problem-solving methods
	5. Maintain healthy skepticism and verify multiple times
	Your response should reflect deep mathematical understanding and precise logical thinking, making your solution path and reasoning clear to others.
	When you're ready, present your complete solution with:
	- Clear problem understanding
	- Detailed solution process
	- Key insights
	- Thorough verification
	Focus on clear, logical progression of ideas and thorough explanation of your mathematical reasoning. Provide answers in the same language as the user asking the question, repeat the final answer using a '\\boxed{}' without any units, you have [[8192]] tokens to complete the answer.
	"""

	# UI Configuration
	TITLE = "<h1><center>AI Reasoning Assistant</center></h1>"
	PLACEHOLDER = "Ask me anything! I'll think through it step by step."

	CSS = """
	.duplicate-button {
	margin: auto !important;
	color: white !important;
	background: black !important;
	border-radius: 100vh !important;
	}
	h3 {
	text-align: center;
	}
	.message-wrap {
	overflow-x: auto;
	}
	.message-wrap p {
	margin-bottom: 1em;
	}
	.message-wrap pre {
	background-color: #f6f8fa;
	border-radius: 3px;
	padding: 16px;
	overflow-x: auto;
	}
	.message-wrap code {
	background-color: rgba(175,184,193,0.2);
	border-radius: 3px;
	padding: 0.2em 0.4em;
	font-family: monospace;
	}
	.custom-tag {
	color: #0066cc;
	font-weight: bold;
	}
	.chat-area {
	height: 500px !important;
	overflow-y: auto !important;
	}
	"""

	def initialize_model():
	"""Initialize the model with AWQ configuration"""
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
	if tokenizer.pad_token_id is None:
	tokenizer.pad_token_id = tokenizer.eos_token_id

	model = AutoAWQForCausalLM.from_pretrained(
	MODEL_ID,
	torch_dtype=torch.float16,
	device_map="cuda",
	trust_remote_code=True
	)

	return model, tokenizer

	def format_text(text):
	"""Format text with proper spacing and tag highlighting (but keep tags visible)"""
	tag_patterns = [
	(r'<Thinking>', '\n<Thinking>\n'),
	(r'</Thinking>', '\n</Thinking>\n'),
	(r'<Critique>', '\n<Critique>\n'),
	(r'</Critique>', '\n</Critique>\n'),
	(r'<Revising>', '\n<Revising>\n'),
	(r'</Revising>', '\n</Revising>\n'),
	(r'<Final>', '\n<Final>\n'),
	(r'</Final>', '\n</Final>\n')
	]

	formatted = text
	for pattern, replacement in tag_patterns:
	formatted = re.sub(pattern, replacement, formatted)

	formatted = '\n'.join(line for line in formatted.split('\n') if line.strip())

	return formatted

	def format_chat_history(history):
	"""Format chat history for display, keeping tags visible"""
	formatted = []
	for user_msg, assistant_msg in history:
	formatted.append(f"User: {user_msg}")
	if assistant_msg:
	formatted.append(f"Assistant: {assistant_msg}")
	return "\n\n".join(formatted)

	def create_examples():
	"""Create example queries for the UI"""
	return [
	"Explain the concept of artificial intelligence.",
	"How does photosynthesis work?",
	"What are the main causes of climate change?",
	"Describe the process of protein synthesis.",
	"What are the key features of a democratic government?",
	"Explain the theory of relativity.",
	"How do vaccines work to prevent diseases?",
	"What are the major events of World War II?",
	"Describe the structure of a human cell.",
	"What is the role of DNA in genetics?"
	]

	@spaces.GPU()
	def chat_response(
	message: str,
	history: list,
	chat_display: str,
	system_prompt: str,
	temperature: float = 0.3,
	max_new_tokens: int = 8192,
	top_p: float = 0.1,
	top_k: int = 45,
	penalty: float = 1.2,
	):
	"""Generate chat responses, keeping tags visible in the output"""
	conversation = [
	{"role": "system", "content": system_prompt}
	]

	for prompt, answer in history:
	conversation.extend([
	{"role": "user", "content": prompt},
	{"role": "assistant", "content": answer}
	])

	conversation.append({"role": "user", "content": message})

	input_ids = tokenizer.apply_chat_template(
	conversation,
	add_generation_prompt=True,
	return_tensors="pt"
	).to(model.device)

	streamer = TextIteratorStreamer(
	tokenizer,
	timeout=60.0,
	skip_prompt=True,
	skip_special_tokens=True
	)

	generate_kwargs = dict(
	input_ids=input_ids,
	max_new_tokens=max_new_tokens,
	do_sample=False if temperature == 0 else True,
	top_p=top_p,
	top_k=top_k,
	temperature=temperature,
	repetition_penalty=penalty,
	streamer=streamer,
	)

	buffer = ""

	with torch.no_grad():
	thread = Thread(target=model.generate, kwargs=generate_kwargs)
	thread.start()

	history = history + [[message, ""]]

	for new_text in streamer:
	buffer += new_text
	formatted_buffer = format_text(buffer)
	history[-1][1] = formatted_buffer
	chat_display = format_chat_history(history)

	yield history, chat_display

	def process_example(example: str) -> tuple:
	"""Process example query and return empty history and updated display"""
	return [], f"User: {example}\n\n"

	def main():
	"""Main function to set up and launch the Gradio interface"""
	global model, tokenizer
	model, tokenizer = initialize_model()

	with gr.Blocks(css=CSS, theme="soft") as demo:
	gr.HTML(TITLE)
	gr.DuplicateButton(
	value="Duplicate Space for private use",
	elem_classes="duplicate-button"
	)

	with gr.Row():
	with gr.Column():
	chat_history = gr.State([])
	chat_display = gr.TextArea(
	value="",
	label="Chat History",
	interactive=False,
	elem_classes=["chat-area"],
	)

	message = gr.TextArea(
	placeholder=PLACEHOLDER,
	label="Your message",
	lines=3
	)

	with gr.Row():
	submit = gr.Button("Send")
	clear = gr.Button("Clear")

	with gr.Accordion("⚙️ Advanced Settings", open=False):
	system_prompt = gr.TextArea(
	value=DEFAULT_SYSTEM_PROMPT,
	label="System Prompt",
	lines=5,
	)
	temperature = gr.Slider(
	minimum=0,
	maximum=1,
	step=0.1,
	value=0.3,
	label="Temperature",
	)
	max_tokens = gr.Slider(
	minimum=128,
	maximum=32000,
	step=128,
	value=8192,
	label="Max Tokens",
	)
	top_p = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	step=0.1,
	value=0.8,
	label="Top-p",
	)
	top_k = gr.Slider(
	minimum=1,
	maximum=100,
	step=1,
	value=45,
	label="Top-k",
	)
	penalty = gr.Slider(
	minimum=1.0,
	maximum=2.0,
	step=0.1,
	value=1.2,
	label="Repetition Penalty",
	)

	examples = gr.Examples(
	examples=create_examples(),
	inputs=[message],
	outputs=[chat_history, chat_display],
	fn=process_example,
	cache_examples=False,
	)

	# Set up event handlers
	submit_click = submit.click(
	chat_response,
	inputs=[
	message,
	chat_history,
	chat_display,
	system_prompt,
	temperature,
	max_tokens,
	top_p,
	top_k,
	penalty,
	],
	outputs=[chat_history, chat_display],
	show_progress=True,
	)

	message.submit(
	chat_response,
	inputs=[
	message,
	chat_history,
	chat_display,
	system_prompt,
	temperature,
	max_tokens,
	top_p,
	top_k,
	penalty,
	],
	outputs=[chat_history, chat_display],
	show_progress=True,
	)

	clear.click(
	lambda: ([], ""),
	outputs=[chat_history, chat_display],
	show_progress=True,
	)

	submit_click.then(lambda: "", outputs=message)
	message.submit(lambda: "", outputs=message)

	return demo

	if __name__ == "__main__":
	demo = main()
	demo.launch()