Bark_tts_hindi / app.py
ayush2607's picture
Create app.py
28e403b verified
raw
history blame
1.8 kB
import gradio as gr
from transformers import AutoProcessor, BarkModel
import scipy.io.wavfile
import torch
import os
# Initialize model and processor
processor = AutoProcessor.from_pretrained("suno/bark")
model = BarkModel.from_pretrained("suno/bark")
def text_to_speech(text, voice_preset="v2/hi_speaker_2"):
# Generate audio from text
inputs = processor(text, voice_preset=voice_preset)
# Generate audio
audio_array = model.generate(**inputs)
audio_array = audio_array.cpu().numpy().squeeze()
# Get sample rate from model config
sample_rate = model.generation_config.sample_rate
# Create temporary file path
output_path = "temp_audio.wav"
# Save audio file
scipy.io.wavfile.write(output_path, rate=sample_rate, data=audio_array)
return output_path
# Define available voice presets
voice_presets = [
"v2/hi_speaker_1",
"v2/hi_speaker_2",
"v2/hi_speaker_3",
"v2/hi_speaker_4",
"v2/hi_speaker_5"
]
# Create Gradio interface
demo = gr.Interface(
fn=text_to_speech,
inputs=[
gr.Textbox(label="Enter text (Hindi or English)", placeholder="तुम बहुत अच्छे हो..."),
gr.Dropdown(choices=voice_presets, value="v2/hi_speaker_2", label="Select Voice")
],
outputs=gr.Audio(label="Generated Speech"),
title="Bark Text-to-Speech",
description="Convert text to speech using the Bark model. Supports Hindi and English text.",
examples=[
["तुम बहुत अच्छे हो और मैं भी तुम्हारी तरह अच्छा हूँ", "v2/hi_speaker_2"],
["You are very nice and I am also nice like you", "v2/hi_speaker_1"]
]
)
# Launch the app
if __name__ == "__main__":
demo.launch()