import gradio as gr from gtts import gTTS from pydub import AudioSegment from io import BytesIO def custom_tts( text1, accent1, text2, accent2, text3, accent3, text4, accent4, text5, accent5, text6, accent6, text7, accent7, text8, accent8, text9, accent9, text10, accent10 ): # 각 악센트에 따른 언어 코드와 tld 설정 accent_mapping = { "British": ("en", "co.uk"), "American": ("en", "com"), "Australian": ("en", "com.au") } # 10개의 대화문과 선택된 악센트를 튜플 리스트로 구성합니다. dialogues = [ (text1, accent1), (text2, accent2), (text3, accent3), (text4, accent4), (text5, accent5), (text6, accent6), (text7, accent7), (text8, accent8), (text9, accent9), (text10, accent10) ] combined_audio = AudioSegment.silent(duration=0) # 초기 빈 오디오 # 각 대화문에 대해 음성 생성 후 결합 for text, accent in dialogues: if text.strip(): # 텍스트가 비어있지 않은 경우에만 처리 lang, tld = accent_mapping.get(accent, ("en", "com")) tts = gTTS(text, lang=lang, tld=tld) audio_file = BytesIO() tts.write_to_fp(audio_file) audio_file.seek(0) tts_audio = AudioSegment.from_file(audio_file, format="mp3") # 각 음성 사이에 500ms의 침묵 추가 combined_audio += tts_audio + AudioSegment.silent(duration=500) # 최종 결합된 오디오를 mp3 파일로 저장 output_file = "combined_output.mp3" combined_audio.export(output_file, format="mp3") return output_file with gr.Blocks() as demo: gr.Markdown("## Custom TTS: 10개의 대화문 입력란에서 악센트를 선택하여 음성 생성하기") with gr.Row(): text1 = gr.Textbox(label="Dialogue 1", placeholder="Enter text for Dialogue 1") accent1 = gr.Dropdown(label="Accent for Dialogue 1", choices=["British", "American", "Australian"], value="British") with gr.Row(): text2 = gr.Textbox(label="Dialogue 2", placeholder="Enter text for Dialogue 2") accent2 = gr.Dropdown(label="Accent for Dialogue 2", choices=["British", "American", "Australian"], value="British") with gr.Row(): text3 = gr.Textbox(label="Dialogue 3", placeholder="Enter text for Dialogue 3") accent3 = gr.Dropdown(label="Accent for Dialogue 3", choices=["British", "American", "Australian"], value="British") with gr.Row(): text4 = gr.Textbox(label="Dialogue 4", placeholder="Enter text for Dialogue 4") accent4 = gr.Dropdown(label="Accent for Dialogue 4", choices=["British", "American", "Australian"], value="British") with gr.Row(): text5 = gr.Textbox(label="Dialogue 5", placeholder="Enter text for Dialogue 5") accent5 = gr.Dropdown(label="Accent for Dialogue 5", choices=["British", "American", "Australian"], value="British") with gr.Row(): text6 = gr.Textbox(label="Dialogue 6", placeholder="Enter text for Dialogue 6") accent6 = gr.Dropdown(label="Accent for Dialogue 6", choices=["British", "American", "Australian"], value="British") with gr.Row(): text7 = gr.Textbox(label="Dialogue 7", placeholder="Enter text for Dialogue 7") accent7 = gr.Dropdown(label="Accent for Dialogue 7", choices=["British", "American", "Australian"], value="British") with gr.Row(): text8 = gr.Textbox(label="Dialogue 8", placeholder="Enter text for Dialogue 8") accent8 = gr.Dropdown(label="Accent for Dialogue 8", choices=["British", "American", "Australian"], value="British") with gr.Row(): text9 = gr.Textbox(label="Dialogue 9", placeholder="Enter text for Dialogue 9") accent9 = gr.Dropdown(label="Accent for Dialogue 9", choices=["British", "American", "Australian"], value="British") with gr.Row(): text10 = gr.Textbox(label="Dialogue 10", placeholder="Enter text for Dialogue 10") accent10 = gr.Dropdown(label="Accent for Dialogue 10", choices=["British", "American", "Australian"], value="British") output_audio = gr.Audio(label="Generated Speech", type="filepath") generate_button = gr.Button("Generate Speech") generate_button.click( custom_tts, inputs=[ text1, accent1, text2, accent2, text3, accent3, text4, accent4, text5, accent5, text6, accent6, text7, accent7, text8, accent8, text9, accent9, text10, accent10 ], outputs=output_audio ) if __name__ == "__main__": demo.launch()