Spaces:

sagar007
/

DeepSeekR1_Search

Running on Zero

App Files Files Community

sagar007 commited on 12 days ago

Commit

835fc41

verified ·

1 Parent(s): 8a9a6c3

Update app.py

Browse files

Files changed (1) hide show

app.py +224 -88

app.py CHANGED Viewed

@@ -1,49 +1,80 @@
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
 import spaces
 from duckduckgo_search import DDGS
 import time
 import torch
 from datetime import datetime
-import gc  # For manual garbage collection
-# Initialize model and tokenizer with optimizations
 model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
-# Load config first to set optimal parameters
-config = AutoConfig.from_pretrained(model_name)
-config.use_cache = True  # Enable KV-caching for faster inference
-# Initialize tokenizer with optimizations
-tokenizer = AutoTokenizer.from_pretrained(
-    model_name,
-    model_max_length=256,  # Reduced for faster processing
-    padding_side="left",
-    truncation_side="left",
-)
 tokenizer.pad_token = tokenizer.eos_token
-# Load model with optimizations
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    config=config,
-    device_map="cpu",
-    low_cpu_mem_usage=True,
-    torch_dtype=torch.float32,
-)
-# Enable model optimizations
-model.eval()  # Set to evaluation mode
-torch.set_num_threads(4)  # Limit CPU threads for better performance
-def get_web_results(query, max_results=3):  # Reduced max results
     """Get web search results using DuckDuckGo"""
     try:
         with DDGS() as ddgs:
             results = list(ddgs.text(query, max_results=max_results))
             return [{
                 "title": result.get("title", ""),
-                "snippet": result["body"][:200],  # Limit snippet length
                 "url": result["href"],
                 "date": result.get("published", "")
             } for result in results]
@@ -51,10 +82,21 @@ def get_web_results(query, max_results=3):  # Reduced max results
         return []
 def format_prompt(query, context):
-    """Format the prompt with web context - optimized version"""
-    context_lines = '\n'.join([f'[{i+1}] {res["snippet"]}'
-                              for i, res in enumerate(context)])
-    return f"""Answer this query using the context: {query}\n\nContext:\n{context_lines}\n\nAnswer:"""
 def format_sources(web_results):
     """Format sources with more details"""
@@ -78,82 +120,155 @@ def format_sources(web_results):
     sources_html += "</div>"
     return sources_html
 def generate_answer(prompt):
-    """Generate answer using the DeepSeek model - optimized version"""
     try:
-        # Clear CUDA cache and garbage collect
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        gc.collect()
-        inputs = tokenizer(
-            prompt,
-            return_tensors="pt",
-            padding=True,
-            truncation=True,
-            max_length=256,
-            return_attention_mask=True
-        )
-        with torch.no_grad():  # Disable gradient calculation
-            outputs = model.generate(
-                inputs.input_ids,
-                attention_mask=inputs.attention_mask,
-                max_new_tokens=100,  # Further reduced for speed
-                temperature=0.7,
-                top_p=0.95,
-                pad_token_id=tokenizer.eos_token_id,
-                do_sample=True,
-                num_beams=1,
-                early_stopping=True,
-                no_repeat_ngram_size=3,
-                length_penalty=1.0
-            )
-        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        return response.split('Answer:')[-1].strip()
     except Exception as e:
-        return f"Error generating response: {str(e)}"
-def process_query(query, history):
-    """Process user query with optimized streaming effect"""
     try:
         if history is None:
             history = []
         # Get web results first
         web_results = get_web_results(query)
         sources_html = format_sources(web_results)
-        # Show searching status
         yield {
-            answer_output: gr.Markdown("*Searching and generating response...*"),
             sources_output: gr.HTML(sources_html),
-            search_btn: gr.Button("Please wait...", interactive=False),
-            chat_history_display: history + [[query, "*Processing...*"]]
         }
-        # Generate answer with timeout protection
         prompt = format_prompt(query, web_results)
         answer = generate_answer(prompt)
-        # Update with final answer
-        final_history = history + [[query, answer]]
         yield {
-            answer_output: gr.Markdown(answer),
             sources_output: gr.HTML(sources_html),
             search_btn: gr.Button("Search", interactive=True),
-            chat_history_display: final_history
         }
     except Exception as e:
-        error_msg = f"Error: {str(e)}"
         yield {
-            answer_output: gr.Markdown(error_msg),
-            sources_output: gr.HTML("<div>Error fetching sources</div>"),
             search_btn: gr.Button("Search", interactive=True),
-            chat_history_display: history + [[query, error_msg]]
         }
 # Update the CSS for better contrast and readability
@@ -327,6 +442,19 @@ css = """
     border-radius: 8px !important;
     margin-top: 1rem !important;
 }
 """
 # Update the Gradio interface layout
@@ -335,7 +463,7 @@ with gr.Blocks(title="AI Search Assistant", css=css, theme="dark") as demo:
     with gr.Column(elem_id="header"):
         gr.Markdown("# 🔍 AI Search Assistant")
-        gr.Markdown("### Powered by DeepSeek & Real-time Web Results")
     with gr.Column(elem_classes="search-container"):
         with gr.Row(elem_classes="search-box"):
@@ -346,11 +474,19 @@ with gr.Blocks(title="AI Search Assistant", css=css, theme="dark") as demo:
                 container=False
             )
             search_btn = gr.Button("Search", variant="primary", scale=1)
         with gr.Row(elem_classes="results-container"):
             with gr.Column(scale=2):
                 with gr.Column(elem_classes="answer-box"):
                     answer_output = gr.Markdown(elem_classes="markdown-content")
                 with gr.Accordion("Chat History", open=False, elem_classes="accordion"):
                     chat_history_display = gr.Chatbot(elem_classes="chat-history")
             with gr.Column(scale=1):
@@ -373,15 +509,15 @@ with gr.Blocks(title="AI Search Assistant", css=css, theme="dark") as demo:
     # Handle interactions
     search_btn.click(
         fn=process_query,
-        inputs=[search_input, chat_history],
-        outputs=[answer_output, sources_output, search_btn, chat_history_display]
     )
     # Also trigger search on Enter key
     search_input.submit(
         fn=process_query,
-        inputs=[search_input, chat_history],
-        outputs=[answer_output, sources_output, search_btn, chat_history_display]
     )
 if __name__ == "__main__":

 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer
 import spaces
 from duckduckgo_search import DDGS
 import time
 import torch
 from datetime import datetime
+import os
+import subprocess
+import numpy as np
+# Install required dependencies for Kokoro with better error handling
+try:
+    subprocess.run(['git', 'lfs', 'install'], check=True)
+    if not os.path.exists('Kokoro-82M'):
+        subprocess.run(['git', 'clone', 'https://huggingface.co/hexgrad/Kokoro-82M'], check=True)
+    # Try installing espeak with proper package manager commands
+    try:
+        # Update package list first
+        subprocess.run(['apt-get', 'update'], check=True)
+        # Try installing espeak first (more widely available)
+        subprocess.run(['apt-get', 'install', '-y', 'espeak'], check=True)
+    except subprocess.CalledProcessError:
+        print("Warning: Could not install espeak. Attempting espeak-ng...")
+        try:
+            subprocess.run(['apt-get', 'install', '-y', 'espeak-ng'], check=True)
+        except subprocess.CalledProcessError:
+            print("Warning: Could not install espeak or espeak-ng. TTS functionality may be limited.")
+except Exception as e:
+    print(f"Warning: Initial setup error: {str(e)}")
+    print("Continuing with limited functionality...")
+# Initialize models and tokenizers
 model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
 tokenizer.pad_token = tokenizer.eos_token
+# Move model initialization inside a function to prevent CUDA initialization in main process
+def init_models():
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        device_map="auto",
+        offload_folder="offload",
+        low_cpu_mem_usage=True,
+        torch_dtype=torch.float16
+    )
+    return model
+# Initialize Kokoro TTS with better error handling
+try:
+    import sys
+    sys.path.append('Kokoro-82M')
+    from models import build_model
+    from kokoro import generate
+    # Don't initialize models/voices in main process for ZeroGPU compatibility
+    VOICE_CHOICES = {
+        '🇺🇸 Female (Default)': 'af',
+        '🇺🇸 Bella': 'af_bella',
+        '🇺🇸 Sarah': 'af_sarah',
+        '🇺🇸 Nicole': 'af_nicole'
+    }
+    TTS_ENABLED = True
+except Exception as e:
+    print(f"Warning: Could not initialize Kokoro TTS: {str(e)}")
+    TTS_ENABLED = False
+def get_web_results(query, max_results=5):  # Increased to 5 for better context
     """Get web search results using DuckDuckGo"""
     try:
         with DDGS() as ddgs:
             results = list(ddgs.text(query, max_results=max_results))
             return [{
                 "title": result.get("title", ""),
+                "snippet": result["body"],
                 "url": result["href"],
                 "date": result.get("published", "")
             } for result in results]
         return []
 def format_prompt(query, context):
+    """Format the prompt with web context"""
+    current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    context_lines = '\n'.join([f'- [{res["title"]}]: {res["snippet"]}' for res in context])
+    return f"""You are an intelligent search assistant. Answer the user's query using the provided web context.
+Current Time: {current_time}
+Important: For election-related queries, please distinguish clearly between different election years and types (presidential vs. non-presidential). Only use information from the provided web context.
+Query: {query}
+Web Context:
+{context_lines}
+Provide a detailed answer in markdown format. Include relevant information from sources and cite them using [1], [2], etc. If the query is about elections, clearly specify which year and type of election you're discussing.
+Answer:"""
 def format_sources(web_results):
     """Format sources with more details"""
     sources_html += "</div>"
     return sources_html
+# Wrap the answer generation with spaces.GPU decorator
+@spaces.GPU(duration=30)
 def generate_answer(prompt):
+    """Generate answer using the DeepSeek model"""
+    # Initialize model inside the GPU-decorated function
+    model = init_models()
+    inputs = tokenizer(
+        prompt,
+        return_tensors="pt",
+        padding=True,
+        truncation=True,
+        max_length=512,
+        return_attention_mask=True
+    ).to(model.device)
+    outputs = model.generate(
+        inputs.input_ids,
+        attention_mask=inputs.attention_mask,
+        max_new_tokens=256,
+        temperature=0.7,
+        top_p=0.95,
+        pad_token_id=tokenizer.eos_token_id,
+        do_sample=True,
+        early_stopping=True
+    )
+    return tokenizer.decode(outputs[0], skip_special_tokens=True)
+# Similarly wrap TTS generation with spaces.GPU
+@spaces.GPU(duration=60)
+def generate_speech_with_gpu(text, voice_name='af'):
+    """Generate speech from text using Kokoro TTS model with GPU handling"""
     try:
+        # Initialize TTS model and voice inside GPU function
+        device = 'cuda'
+        TTS_MODEL = build_model('Kokoro-82M/kokoro-v0_19.pth', device)
+        VOICEPACK = torch.load(f'Kokoro-82M/voices/{voice_name}.pt', weights_only=True).to(device)
+        # Clean the text
+        clean_text = ' '.join([line for line in text.split('\n') if not line.startswith('#')])
+        clean_text = clean_text.replace('[', '').replace(']', '').replace('*', '')
+        # Split long text into chunks
+        max_chars = 1000
+        chunks = []
+        if len(clean_text) > max_chars:
+            sentences = clean_text.split('.')
+            current_chunk = ""
+            for sentence in sentences:
+                if len(current_chunk) + len(sentence) < max_chars:
+                    current_chunk += sentence + "."
+                else:
+                    if current_chunk:
+                        chunks.append(current_chunk)
+                    current_chunk = sentence + "."
+            if current_chunk:
+                chunks.append(current_chunk)
+        else:
+            chunks = [clean_text]
+        # Generate audio for each chunk
+        audio_chunks = []
+        for chunk in chunks:
+            if chunk.strip():  # Only process non-empty chunks
+                chunk_audio, _ = generate(TTS_MODEL, chunk.strip(), VOICEPACK, lang='a')
+                if isinstance(chunk_audio, torch.Tensor):
+                    chunk_audio = chunk_audio.cpu().numpy()
+                audio_chunks.append(chunk_audio)
+        # Concatenate chunks if we have any
+        if audio_chunks:
+            if len(audio_chunks) > 1:
+                final_audio = np.concatenate(audio_chunks)
+            else:
+                final_audio = audio_chunks[0]
+            return (24000, final_audio)
+        return None
     except Exception as e:
+        print(f"Error generating speech: {str(e)}")
+        import traceback
+        traceback.print_exc()
+        return None
+def process_query(query, history, selected_voice='af'):
+    """Process user query with streaming effect"""
     try:
         if history is None:
             history = []
         # Get web results first
         web_results = get_web_results(query)
         sources_html = format_sources(web_results)
+        current_history = history + [[query, "*Searching...*"]]
         yield {
+            answer_output: gr.Markdown("*Searching the web...*"),
             sources_output: gr.HTML(sources_html),
+            search_btn: gr.Button("Searching...", interactive=False),
+            chat_history_display: current_history,
+            audio_output: None
         }
+        # Generate answer
         prompt = format_prompt(query, web_results)
         answer = generate_answer(prompt)
+        final_answer = answer.split("Answer:")[-1].strip()
+        # Generate speech from the answer
+        if TTS_ENABLED:
+            try:
+                yield {
+                    answer_output: gr.Markdown(final_answer),
+                    sources_output: gr.HTML(sources_html),
+                    search_btn: gr.Button("Generating audio...", interactive=False),
+                    chat_history_display: history + [[query, final_answer]],
+                    audio_output: None
+                }
+                audio = generate_speech_with_gpu(final_answer, selected_voice)
+                if audio is None:
+                    print("Failed to generate audio")
+            except Exception as e:
+                print(f"Error in speech generation: {str(e)}")
+                audio = None
+        else:
+            audio = None
+        updated_history = history + [[query, final_answer]]
         yield {
+            answer_output: gr.Markdown(final_answer),
             sources_output: gr.HTML(sources_html),
             search_btn: gr.Button("Search", interactive=True),
+            chat_history_display: updated_history,
+            audio_output: audio if audio is not None else gr.Audio(value=None)
         }
     except Exception as e:
+        error_message = str(e)
+        if "GPU quota" in error_message:
+            error_message = "⚠️ GPU quota exceeded. Please try again later when the daily quota resets."
         yield {
+            answer_output: gr.Markdown(f"Error: {error_message}"),
+            sources_output: gr.HTML(sources_html),
             search_btn: gr.Button("Search", interactive=True),
+            chat_history_display: history + [[query, f"*Error: {error_message}*"]],
+            audio_output: None
         }
 # Update the CSS for better contrast and readability
     border-radius: 8px !important;
     margin-top: 1rem !important;
 }
+.voice-selector {
+    margin-top: 1rem;
+    background: #2c2d30;
+    border-radius: 8px;
+    padding: 0.5rem;
+}
+.voice-selector select {
+    background: #3a3b3e !important;
+    color: white !important;
+    border: 1px solid #4a4b4e !important;
+}
 """
 # Update the Gradio interface layout
     with gr.Column(elem_id="header"):
         gr.Markdown("# 🔍 AI Search Assistant")
+        gr.Markdown("### Powered by DeepSeek & Real-time Web Results with Voice")
     with gr.Column(elem_classes="search-container"):
         with gr.Row(elem_classes="search-box"):
                 container=False
             )
             search_btn = gr.Button("Search", variant="primary", scale=1)
+            voice_select = gr.Dropdown(
+                choices=list(VOICE_CHOICES.items()),
+                value='af',
+                label="Select Voice",
+                elem_classes="voice-selector"
+            )
         with gr.Row(elem_classes="results-container"):
             with gr.Column(scale=2):
                 with gr.Column(elem_classes="answer-box"):
                     answer_output = gr.Markdown(elem_classes="markdown-content")
+                    with gr.Row():
+                        audio_output = gr.Audio(label="Voice Response", elem_classes="audio-player")
                 with gr.Accordion("Chat History", open=False, elem_classes="accordion"):
                     chat_history_display = gr.Chatbot(elem_classes="chat-history")
             with gr.Column(scale=1):
     # Handle interactions
     search_btn.click(
         fn=process_query,
+        inputs=[search_input, chat_history, voice_select],
+        outputs=[answer_output, sources_output, search_btn, chat_history_display, audio_output]
     )
     # Also trigger search on Enter key
     search_input.submit(
         fn=process_query,
+        inputs=[search_input, chat_history, voice_select],
+        outputs=[answer_output, sources_output, search_btn, chat_history_display, audio_output]
     )
 if __name__ == "__main__":