import gradio as gr import torch from transformers import AutoProcessor, AutoModel import scipy.io.wavfile as wavfile import numpy as np import os # Initialize model and processor def load_model(): processor = AutoProcessor.from_pretrained("suno/bark-small") model = AutoModel.from_pretrained("suno/bark-small") return processor, model # Text to speech function def text_to_speech(text): try: # Generate speech inputs = processor( text=[text], return_tensors="pt", ) speech_values = model.generate(**inputs, do_sample=True) # Convert to numpy and normalize audio_data = speech_values.cpu().numpy().squeeze() sampling_rate = model.generation_config.sample_rate # Create temporary file temp_path = "temp_audio.wav" wavfile.write(temp_path, sampling_rate, audio_data) return temp_path except Exception as e: return f"Error generating speech: {str(e)}" # Load models globally print("Loading models...") processor, model = load_model() print("Models loaded successfully!") # Create Gradio interface demo = gr.Interface( fn=text_to_speech, inputs=[ gr.Textbox( label="Enter text (Hindi supported)", placeholder="इस योजना से संबंधित लाभों का विवरण प्राप्त कर सकते" ) ], outputs=gr.Audio(label="Generated Speech"), title="Hindi Text-to-Speech using Bark", description="Generate natural-sounding speech from Hindi text using the Bark model.", examples=[ ["इस योजना से संबंधित लाभों का विवरण प्राप्त कर सकते"], ["नमस्ते, आप कैसे हैं?"], ] ) # Launch the app if __name__ == "__main__": demo.launch()