Spaces:
Running
Running
import gradio as gr | |
from transformers import AutoProcessor, BarkModel | |
import scipy.io.wavfile | |
import torch | |
import os | |
# Initialize model and processor | |
processor = AutoProcessor.from_pretrained("suno/bark") | |
model = BarkModel.from_pretrained("suno/bark") | |
def text_to_speech(text, voice_preset="v2/hi_speaker_2"): | |
# Generate audio from text | |
inputs = processor(text, voice_preset=voice_preset) | |
# Generate audio | |
audio_array = model.generate(**inputs) | |
audio_array = audio_array.cpu().numpy().squeeze() | |
# Get sample rate from model config | |
sample_rate = model.generation_config.sample_rate | |
# Create temporary file path | |
output_path = "temp_audio.wav" | |
# Save audio file | |
scipy.io.wavfile.write(output_path, rate=sample_rate, data=audio_array) | |
return output_path | |
# Define available voice presets | |
voice_presets = [ | |
"v2/hi_speaker_1", | |
"v2/hi_speaker_2", | |
"v2/hi_speaker_3", | |
"v2/hi_speaker_4", | |
"v2/hi_speaker_5" | |
] | |
# Create Gradio interface | |
demo = gr.Interface( | |
fn=text_to_speech, | |
inputs=[ | |
gr.Textbox(label="Enter text (Hindi or English)", placeholder="तुम बहुत अच्छे हो..."), | |
gr.Dropdown(choices=voice_presets, value="v2/hi_speaker_2", label="Select Voice") | |
], | |
outputs=gr.Audio(label="Generated Speech"), | |
title="Bark Text-to-Speech", | |
description="Convert text to speech using the Bark model. Supports Hindi and English text.", | |
examples=[ | |
["तुम बहुत अच्छे हो और मैं भी तुम्हारी तरह अच्छा हूँ", "v2/hi_speaker_2"], | |
["You are very nice and I am also nice like you", "v2/hi_speaker_1"] | |
] | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
demo.launch() |