Spaces:
Running
Running
import gradio as gr | |
import torch | |
from transformers import AutoProcessor, AutoModel | |
import scipy.io.wavfile as wavfile | |
import numpy as np | |
import os | |
# Initialize model and processor | |
def load_model(): | |
processor = AutoProcessor.from_pretrained("suno/bark-small") | |
model = AutoModel.from_pretrained("suno/bark-small") | |
return processor, model | |
# Text to speech function | |
def text_to_speech(text): | |
try: | |
# Generate speech | |
inputs = processor( | |
text=[text], | |
return_tensors="pt", | |
) | |
speech_values = model.generate(**inputs, do_sample=True) | |
# Convert to numpy and normalize | |
audio_data = speech_values.cpu().numpy().squeeze() | |
sampling_rate = model.generation_config.sample_rate | |
# Create temporary file | |
temp_path = "temp_audio.wav" | |
wavfile.write(temp_path, sampling_rate, audio_data) | |
return temp_path | |
except Exception as e: | |
return f"Error generating speech: {str(e)}" | |
# Load models globally | |
print("Loading models...") | |
processor, model = load_model() | |
print("Models loaded successfully!") | |
# Create Gradio interface | |
demo = gr.Interface( | |
fn=text_to_speech, | |
inputs=[ | |
gr.Textbox( | |
label="Enter text (Hindi supported)", | |
placeholder="इस योजना से संबंधित लाभों का विवरण प्राप्त कर सकते" | |
) | |
], | |
outputs=gr.Audio(label="Generated Speech"), | |
title="Hindi Text-to-Speech using Bark", | |
description="Generate natural-sounding speech from Hindi text using the Bark model.", | |
examples=[ | |
["इस योजना से संबंधित लाभों का विवरण प्राप्त कर सकते"], | |
["नमस्ते, आप कैसे हैं?"], | |
] | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
demo.launch() |