import gradio as gr
import torch
from transformers import AutoProcessor, AutoModel
import scipy.io.wavfile as wavfile
import numpy as np
import os

# Initialize model and processor
def load_model():
    processor = AutoProcessor.from_pretrained("suno/bark-small")
    model = AutoModel.from_pretrained("suno/bark-small")
    return processor, model

# Text to speech function
def text_to_speech(text):
    try:
        # Generate speech
        inputs = processor(
            text=[text],
            return_tensors="pt",
        )
        speech_values = model.generate(**inputs, do_sample=True)
        
        # Convert to numpy and normalize
        audio_data = speech_values.cpu().numpy().squeeze()
        sampling_rate = model.generation_config.sample_rate
        
        # Create temporary file
        temp_path = "temp_audio.wav"
        wavfile.write(temp_path, sampling_rate, audio_data)
        
        return temp_path
    except Exception as e:
        return f"Error generating speech: {str(e)}"

# Load models globally
print("Loading models...")
processor, model = load_model()
print("Models loaded successfully!")

# Create Gradio interface
demo = gr.Interface(
    fn=text_to_speech,
    inputs=[
        gr.Textbox(
            label="Enter text (Hindi supported)", 
            placeholder="इस योजना से संबंधित लाभों का विवरण प्राप्त कर सकते"
        )
    ],
    outputs=gr.Audio(label="Generated Speech"),
    title="Hindi Text-to-Speech using Bark",
    description="Generate natural-sounding speech from Hindi text using the Bark model.",
    examples=[
        ["इस योजना से संबंधित लाभों का विवरण प्राप्त कर सकते"],
        ["नमस्ते, आप कैसे हैं?"],
    ]
)

# Launch the app
if __name__ == "__main__":
    demo.launch()