Spaces:

prithivMLmods
/

QwQ-Edge

Running on Zero

App Files Files Community

prithivMLmods commited on 17 days ago

Commit

b06a87f

verified ·

1 Parent(s): 2c4a4a6

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -58

app.py CHANGED Viewed

@@ -1,19 +1,31 @@
 import os
 import gradio as gr
 import torch
-import tempfile
-import asyncio
 import edge_tts
-import spaces
-from pydub import AudioSegment
-from threading import Thread
-from collections.abc import Iterator
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 DESCRIPTION = """
-# QwQ Tiny with Edge TTS (MP3 Output)
 """
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
@@ -29,24 +41,14 @@ model = AutoModelForCausalLM.from_pretrained(
 )
 model.eval()
-async def text_to_speech(text: str) -> str:
-    """Converts text to speech using Edge TTS, converts WAV to MP3, and returns the MP3 file path."""
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_wav:
-        wav_path = tmp_wav.name
-    communicate = edge_tts.Communicate(text)
-    await communicate.save(wav_path)
-    # Convert WAV to MP3
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_mp3:
-        mp3_path = tmp_mp3.name
-    audio = AudioSegment.from_wav(wav_path)
-    audio.export(mp3_path, format="mp3")
-    os.remove(wav_path)  # Delete the original WAV file
-    return mp3_path  # Return the MP3 file path
 @spaces.GPU
 def generate(
     message: str,
@@ -56,55 +58,47 @@ def generate(
     top_p: float = 0.9,
     top_k: int = 50,
     repetition_penalty: float = 1.2,
-) -> Iterator[str] | str:
-    is_tts = message.strip().startswith("@tts")
-    is_text_only = message.strip().startswith("@text")
-    # Remove special tags
-    if is_tts:
-        message = message.replace("@tts", "").strip()
-    elif is_text_only:
-        message = message.replace("@text", "").strip()
     conversation = [*chat_history, {"role": "user", "content": message}]
-    input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
     input_ids = input_ids.to(model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
-    generate_kwargs = {
-        "input_ids": input_ids,
-        "streamer": streamer,
-        "max_new_tokens": max_new_tokens,
-        "do_sample": True,
-        "top_p": top_p,
-        "top_k": top_k,
-        "temperature": temperature,
-        "num_beams": 1,
-        "repetition_penalty": repetition_penalty,
-    }
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
     outputs = []
     for text in streamer:
         outputs.append(text)
-    final_output = "".join(outputs)
-    # If TTS requested, generate speech and return audio file
     if is_tts:
-        loop = asyncio.new_event_loop()
-        asyncio.set_event_loop(loop)
-        audio_path = loop.run_until_complete(text_to_speech(final_output))
-        return audio_path
-    return final_output  #
 demo = gr.ChatInterface(
     fn=generate,
@@ -118,13 +112,15 @@ demo = gr.ChatInterface(
     stop_btn=None,
     examples=[
         ["A train travels 60 kilometers per hour. If it travels for 5 hours, how far will it travel in total?"],
-        ["@text What is AI?"],
-        ["@tts Explain Newton's third law of motion."],
-        ["@text Rewrite the following sentence in passive voice: 'The dog chased the cat.'"],
     ],
     cache_examples=False,
     type="messages",
     description=DESCRIPTION,
     fill_height=True,
 )

 import os
+from collections.abc import Iterator
+from threading import Thread
 import gradio as gr
+import spaces
 import torch
 import edge_tts
+import asyncio
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 DESCRIPTION = """
+# QwQ Tiny
 """
+css = '''
+h1 {
+  text-align: center;
+  display: block;
+}
+#duplicate-button {
+  margin: auto;
+  color: #fff;
+  background: #1565c0;
+  border-radius: 100vh;
+}
+'''
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 )
 model.eval()
+async def text_to_speech(text: str, output_file="output.mp3"):
+    voice = "en-US-JennyNeural"
+    communicate = edge_tts.Communicate(text, voice)
+    await communicate.save(output_file)
+    return output_file
 @spaces.GPU
 def generate(
     message: str,
     top_p: float = 0.9,
     top_k: int = 50,
     repetition_penalty: float = 1.2,
+):
+    """Generates chatbot response and handles TTS requests"""
+    is_tts = message.strip().lower().startswith("@tts")
+    message = message.replace("@tts", "").strip()
     conversation = [*chat_history, {"role": "user", "content": message}]
+    input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
     input_ids = input_ids.to(model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
+    generate_kwargs = dict(
+        {"input_ids": input_ids},
+        streamer=streamer,
+        max_new_tokens=max_new_tokens,
+        do_sample=True,
+        top_p=top_p,
+        top_k=top_k,
+        temperature=temperature,
+        num_beams=1,
+        repetition_penalty=repetition_penalty,
+    )
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
     outputs = []
     for text in streamer:
         outputs.append(text)
+        yield "".join(outputs)
+    final_response = "".join(outputs)
     if is_tts:
+        output_file = asyncio.run(text_to_speech(final_response))
+        yield output_file  # Return MP3 file
+    else:
+        yield final_response  # Return text response
 demo = gr.ChatInterface(
     fn=generate,
     stop_btn=None,
     examples=[
         ["A train travels 60 kilometers per hour. If it travels for 5 hours, how far will it travel in total?"],
+        ["Write a Python function to check if a number is prime."],
+        ["What causes rainbows to form?"],
+        ["Rewrite the following sentence in passive voice: 'The dog chased the cat.'"],
+        ["@tts What is the capital of France?"],
     ],
     cache_examples=False,
     type="messages",
     description=DESCRIPTION,
+    css=css,
     fill_height=True,
 )