Spaces:

Herishop
/

OpenAI-TTS

Running

File size: 4,653 Bytes

import gradio as gr
from openai import OpenAI
import tempfile

# Hàm kiểm tra API key hợp lệ
def check_api_key(api_key):
    try:
        client = OpenAI(api_key=api_key)
        # Thực hiện một yêu cầu thử nghiệm đơn giản đến API OpenAI để kiểm tra kết nối
        client.models.list()  # Gọi danh sách mô hình của OpenAI để kiểm tra kết nối
        return True  # Nếu không có lỗi, API key hợp lệ
    except Exception as e:
        print(f"Error: {e}")
        return False  # Nếu có lỗi, API key không hợp lệ

# Hàm TTS (Text to Speech)
def tts(text, model, voice, speed, api_key, audio_file=None):
    # Kiểm tra tính hợp lệ của API key
    if not api_key or api_key.strip() == "":
        raise gr.Error('Please enter your OpenAI API Key')
    
    # Kiểm tra tính hợp lệ của API key
    if not check_api_key(api_key):
        raise gr.Error('Invalid OpenAI API Key. Please enter a valid API key.')

    try:
        client = OpenAI(api_key=api_key)
        
        # Nếu người dùng tải lên tệp âm thanh, sử dụng Whisper để chuyển thành văn bản
        if audio_file:
            audio_file = open(audio_file, 'rb')
            transcript = client.audio.transcriptions.create(model='whisper-1', file=audio_file, response_format='text')
            text = transcript['text']  # Lấy văn bản từ tệp âm thanh

        # Tạo yêu cầu TTS với tốc độ điều chỉnh
        response = client.audio.speech.create(
            model=model,
            voice=voice,
            input=text,
            speed=speed
        )

    except Exception as error:
        print(str(error))
        raise gr.Error("An error occurred while generating speech. Please check your API key and try again.")

    # Lưu âm thanh vào tệp tạm thời
    with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
        temp_file.write(response.content)
        temp_file_path = temp_file.name

    return temp_file_path

# Hàm giao diện Gradio
def gradio_interface():
    with gr.Blocks() as demo:
        gr.Markdown("# <center> OpenAI Text-To-Speech with Speed Control </center>")

        # Định dạng lại giao diện với hai cột
        with gr.Row():
            # Cột bên trái: Nhập Text, chọn Model, Voice, Speed và nút Generate
            with gr.Column(scale=2):  # Thêm `scale` để điều chỉnh kích thước cột
                api_key = gr.Textbox(type='password', label='Enter your OpenAI API Key', placeholder='Enter your OpenAI API key')

                # Cột cho Model và Voice Options cùng nằm trên một hàng ngang
                with gr.Row():
                    model = gr.Dropdown(choices=['tts-1', 'tts-1-hd'], label='Model', value='tts-1', elem_id="model-dropdown", interactive=True)
                    voice = gr.Dropdown(
                        choices=[
                            'alloy', 'ash', 'coral', 'echo', 'fable', 'onyx', 'nova', 'sage', 'shimmer'
                        ], 
                        label='Voice Options', 
                        value='alloy',
                        elem_id="voice-dropdown", interactive=True
                    )

                speed = gr.Slider(minimum=0.5, maximum=2.0, step=0.1, label="Speed", value=1.0)

                # Đặt Input Text vào hàng riêng biệt
                with gr.Row():  # Tạo một hàng riêng biệt cho phần nhập Text
                    text = gr.Textbox(label="Input Text", placeholder="Enter your text here")
                btn = gr.Button("Generate Speech")
                
            # Cột bên phải: Upload audio và Speech Output
            with gr.Column(scale=2):  # Thêm `scale` để điều chỉnh kích thước cột
                input_type = gr.Radio(["Text", "Audio"], label="Input Type", value="Text")
                audio_file = gr.File(label="Upload Audio File")
                output_audio = gr.Audio(label="Speech Output")

        # Quy trình xử lý
        def process_input(input_type, text, audio_file, api_key):
            if input_type == "Text":
                return tts(text, model.value, voice.value, speed.value, api_key)
            elif input_type == "Audio":
                return tts(None, model.value, voice.value, speed.value, api_key, audio_file.name)

        # Thiết lập sự kiện cho việc nhấn nút
        btn.click(fn=process_input, inputs=[input_type, text, audio_file, api_key], outputs=output_audio)

    demo.launch()

if __name__ == "__main__":
    gradio_interface()