import gradio as gr
from gtts import gTTS
from pydub import AudioSegment
from io import BytesIO

def custom_tts(
    text1, accent1,
    text2, accent2,
    text3, accent3,
    text4, accent4,
    text5, accent5,
    text6, accent6,
    text7, accent7,
    text8, accent8,
    text9, accent9,
    text10, accent10
):
    # 각 악센트에 따른 언어 코드와 tld 설정
    accent_mapping = {
        "British": ("en", "co.uk"),
        "American": ("en", "com"),
        "Australian": ("en", "com.au")
    }
    
    # 10개의 대화문과 선택된 악센트를 튜플 리스트로 구성합니다.
    dialogues = [
        (text1, accent1),
        (text2, accent2),
        (text3, accent3),
        (text4, accent4),
        (text5, accent5),
        (text6, accent6),
        (text7, accent7),
        (text8, accent8),
        (text9, accent9),
        (text10, accent10)
    ]
    
    combined_audio = AudioSegment.silent(duration=0)  # 초기 빈 오디오
    
    # 각 대화문에 대해 음성 생성 후 결합
    for text, accent in dialogues:
        if text.strip():  # 텍스트가 비어있지 않은 경우에만 처리
            lang, tld = accent_mapping.get(accent, ("en", "com"))
            tts = gTTS(text, lang=lang, tld=tld)
            audio_file = BytesIO()
            tts.write_to_fp(audio_file)
            audio_file.seek(0)
            tts_audio = AudioSegment.from_file(audio_file, format="mp3")
            # 각 음성 사이에 500ms의 침묵 추가
            combined_audio += tts_audio + AudioSegment.silent(duration=500)
    
    # 최종 결합된 오디오를 mp3 파일로 저장
    output_file = "combined_output.mp3"
    combined_audio.export(output_file, format="mp3")
    return output_file

with gr.Blocks() as demo:
    gr.Markdown("## Custom TTS: 10개의 대화문 입력란에서 악센트를 선택하여 음성 생성하기")
    
    with gr.Row():
        text1 = gr.Textbox(label="Dialogue 1", placeholder="Enter text for Dialogue 1")
        accent1 = gr.Dropdown(label="Accent for Dialogue 1", choices=["British", "American", "Australian"], value="British")
    
    with gr.Row():
        text2 = gr.Textbox(label="Dialogue 2", placeholder="Enter text for Dialogue 2")
        accent2 = gr.Dropdown(label="Accent for Dialogue 2", choices=["British", "American", "Australian"], value="British")
    
    with gr.Row():
        text3 = gr.Textbox(label="Dialogue 3", placeholder="Enter text for Dialogue 3")
        accent3 = gr.Dropdown(label="Accent for Dialogue 3", choices=["British", "American", "Australian"], value="British")
    
    with gr.Row():
        text4 = gr.Textbox(label="Dialogue 4", placeholder="Enter text for Dialogue 4")
        accent4 = gr.Dropdown(label="Accent for Dialogue 4", choices=["British", "American", "Australian"], value="British")
    
    with gr.Row():
        text5 = gr.Textbox(label="Dialogue 5", placeholder="Enter text for Dialogue 5")
        accent5 = gr.Dropdown(label="Accent for Dialogue 5", choices=["British", "American", "Australian"], value="British")
    
    with gr.Row():
        text6 = gr.Textbox(label="Dialogue 6", placeholder="Enter text for Dialogue 6")
        accent6 = gr.Dropdown(label="Accent for Dialogue 6", choices=["British", "American", "Australian"], value="British")
    
    with gr.Row():
        text7 = gr.Textbox(label="Dialogue 7", placeholder="Enter text for Dialogue 7")
        accent7 = gr.Dropdown(label="Accent for Dialogue 7", choices=["British", "American", "Australian"], value="British")
    
    with gr.Row():
        text8 = gr.Textbox(label="Dialogue 8", placeholder="Enter text for Dialogue 8")
        accent8 = gr.Dropdown(label="Accent for Dialogue 8", choices=["British", "American", "Australian"], value="British")
    
    with gr.Row():
        text9 = gr.Textbox(label="Dialogue 9", placeholder="Enter text for Dialogue 9")
        accent9 = gr.Dropdown(label="Accent for Dialogue 9", choices=["British", "American", "Australian"], value="British")
    
    with gr.Row():
        text10 = gr.Textbox(label="Dialogue 10", placeholder="Enter text for Dialogue 10")
        accent10 = gr.Dropdown(label="Accent for Dialogue 10", choices=["British", "American", "Australian"], value="British")
    
    output_audio = gr.Audio(label="Generated Speech", type="filepath")
    generate_button = gr.Button("Generate Speech")
    
    generate_button.click(
        custom_tts,
        inputs=[
            text1, accent1,
            text2, accent2,
            text3, accent3,
            text4, accent4,
            text5, accent5,
            text6, accent6,
            text7, accent7,
            text8, accent8,
            text9, accent9,
            text10, accent10
        ],
        outputs=output_audio
    )

if __name__ == "__main__":
    demo.launch()