File size: 1,947 Bytes
28e403b
d82b8b6
cddcc30
 
d82b8b6
cddcc30
e2e5380
cddcc30
 
 
 
 
e2e5380
cddcc30
 
e2e5380
cddcc30
 
 
 
 
 
27d6995
cddcc30
 
 
d82b8b6
cddcc30
 
 
27d6995
cddcc30
e2e5380
cddcc30
28e403b
cddcc30
 
 
 
28e403b
2079cba
d82b8b6
 
 
2079cba
cddcc30
 
2079cba
d82b8b6
 
cddcc30
 
 
 
 
 
d82b8b6
28e403b
27d6995
cddcc30
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import gradio as gr
import torch
from transformers import AutoProcessor, AutoModel
import scipy.io.wavfile as wavfile
import numpy as np
import os

# Initialize model and processor
def load_model():
    processor = AutoProcessor.from_pretrained("suno/bark-small")
    model = AutoModel.from_pretrained("suno/bark-small")
    return processor, model

# Text to speech function
def text_to_speech(text):
    try:
        # Generate speech
        inputs = processor(
            text=[text],
            return_tensors="pt",
        )
        speech_values = model.generate(**inputs, do_sample=True)
        
        # Convert to numpy and normalize
        audio_data = speech_values.cpu().numpy().squeeze()
        sampling_rate = model.generation_config.sample_rate
        
        # Create temporary file
        temp_path = "temp_audio.wav"
        wavfile.write(temp_path, sampling_rate, audio_data)
        
        return temp_path
    except Exception as e:
        return f"Error generating speech: {str(e)}"

# Load models globally
print("Loading models...")
processor, model = load_model()
print("Models loaded successfully!")

# Create Gradio interface
demo = gr.Interface(
    fn=text_to_speech,
    inputs=[
        gr.Textbox(
            label="Enter text (Hindi supported)", 
            placeholder="इस योजना से संबंधित लाभों का विवरण प्राप्त कर सकते"
        )
    ],
    outputs=gr.Audio(label="Generated Speech"),
    title="Hindi Text-to-Speech using Bark",
    description="Generate natural-sounding speech from Hindi text using the Bark model.",
    examples=[
        ["इस योजना से संबंधित लाभों का विवरण प्राप्त कर सकते"],
        ["नमस्ते, आप कैसे हैं?"],
    ]
)

# Launch the app
if __name__ == "__main__":
    demo.launch()