genaibeauty's picture
Update app.py
ad1d791 verified
import gradio as gr
import requests
import os
# Set up the Hugging Face API key (ensure you've set this as an environment variable)
api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN")
# API URLs
WHISPER_API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
MODEL = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
# MODEL = "deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
API_URL = f"https://api-inference.huggingface.co/models/{MODEL}"
HEADERS = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
# Function to query the Hugging Face Whisper model for audio transcription
def transcribe_audio(audio_file):
with open(audio_file, "rb") as f:
data = f.read()
response = requests.post(WHISPER_API_URL, headers=HEADERS, data=data)
return response.json().get("text", "Transcription failed.") if response.status_code == 200 else f"Error: {response.status_code}, {response.text}"
# Function to generate optimized Mermaid.js code
def generate_mermaid_code(prompt):
mermaid_prompt = ("Convert the following user process into well-structured, bug-free all possible (flow, class, sequence, graph LR,graph TD etc) Mermaid.js code. "
"Ensure correctness and logical flow:\n" + prompt)
payload = {"inputs": mermaid_prompt, "parameters": {"max_length": 250, "temperature": 0.3, "top_p": 0.9}}
response = requests.post(API_URL, headers=HEADERS, json=payload)
return response.json()[0]['generated_text'].strip() if response.status_code == 200 else f"Error: {response.status_code}, {response.text}"
# Function to process text, audio, or both inputs
def process_input(input_type, text_input, audio_input):
transcription = transcribe_audio(audio_input) if input_type in ["Audio", "Text and Audio"] and audio_input else ""
combined_input = f"{text_input} {transcription}".strip()
return generate_mermaid_code(combined_input) if combined_input else "No valid input provided."
# Set up the Gradio interface
iface = gr.Interface(
fn=process_input,
inputs=[
gr.Radio(["Text", "Audio", "Text and Audio"], label="Input Type", value="Text"),
gr.Textbox(lines=10, label="Text Input", placeholder="Enter task flow description here..."),
gr.Audio(type="filepath", label="Audio Input")
],
outputs=[gr.Textbox(lines=20, label="Generated Mermaid.js Code")],
title="Mermaid.js Generator",
description="Provide text, audio, or both. Mermaid.js code will be generated based on the text or audio input, ensuring accuracy."
)
# Launch the Gradio app
iface.launch()