Spaces:
Running
Running
File size: 1,947 Bytes
28e403b d82b8b6 cddcc30 d82b8b6 cddcc30 e2e5380 cddcc30 e2e5380 cddcc30 e2e5380 cddcc30 27d6995 cddcc30 d82b8b6 cddcc30 27d6995 cddcc30 e2e5380 cddcc30 28e403b cddcc30 28e403b 2079cba d82b8b6 2079cba cddcc30 2079cba d82b8b6 cddcc30 d82b8b6 28e403b 27d6995 cddcc30 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import gradio as gr
import torch
from transformers import AutoProcessor, AutoModel
import scipy.io.wavfile as wavfile
import numpy as np
import os
# Initialize model and processor
def load_model():
processor = AutoProcessor.from_pretrained("suno/bark-small")
model = AutoModel.from_pretrained("suno/bark-small")
return processor, model
# Text to speech function
def text_to_speech(text):
try:
# Generate speech
inputs = processor(
text=[text],
return_tensors="pt",
)
speech_values = model.generate(**inputs, do_sample=True)
# Convert to numpy and normalize
audio_data = speech_values.cpu().numpy().squeeze()
sampling_rate = model.generation_config.sample_rate
# Create temporary file
temp_path = "temp_audio.wav"
wavfile.write(temp_path, sampling_rate, audio_data)
return temp_path
except Exception as e:
return f"Error generating speech: {str(e)}"
# Load models globally
print("Loading models...")
processor, model = load_model()
print("Models loaded successfully!")
# Create Gradio interface
demo = gr.Interface(
fn=text_to_speech,
inputs=[
gr.Textbox(
label="Enter text (Hindi supported)",
placeholder="इस योजना से संबंधित लाभों का विवरण प्राप्त कर सकते"
)
],
outputs=gr.Audio(label="Generated Speech"),
title="Hindi Text-to-Speech using Bark",
description="Generate natural-sounding speech from Hindi text using the Bark model.",
examples=[
["इस योजना से संबंधित लाभों का विवरण प्राप्त कर सकते"],
["नमस्ते, आप कैसे हैं?"],
]
)
# Launch the app
if __name__ == "__main__":
demo.launch() |