Spaces:
Running
Running
import gradio as gr | |
from gtts import gTTS | |
from pydub import AudioSegment | |
from io import BytesIO | |
def custom_tts( | |
text1, accent1, | |
text2, accent2, | |
text3, accent3, | |
text4, accent4, | |
text5, accent5, | |
text6, accent6, | |
text7, accent7, | |
text8, accent8, | |
text9, accent9, | |
text10, accent10 | |
): | |
# ๊ฐ ์ ์ผํธ์ ๋ฐ๋ฅธ ์ธ์ด ์ฝ๋์ tld ์ค์ | |
accent_mapping = { | |
"British": ("en", "co.uk"), | |
"American": ("en", "com"), | |
"Australian": ("en", "com.au") | |
} | |
# 10๊ฐ์ ๋ํ๋ฌธ๊ณผ ์ ํ๋ ์ ์ผํธ๋ฅผ ํํ ๋ฆฌ์คํธ๋ก ๊ตฌ์ฑํฉ๋๋ค. | |
dialogues = [ | |
(text1, accent1), | |
(text2, accent2), | |
(text3, accent3), | |
(text4, accent4), | |
(text5, accent5), | |
(text6, accent6), | |
(text7, accent7), | |
(text8, accent8), | |
(text9, accent9), | |
(text10, accent10) | |
] | |
combined_audio = AudioSegment.silent(duration=0) # ์ด๊ธฐ ๋น ์ค๋์ค | |
# ๊ฐ ๋ํ๋ฌธ์ ๋ํด ์์ฑ ์์ฑ ํ ๊ฒฐํฉ | |
for text, accent in dialogues: | |
if text.strip(): # ํ ์คํธ๊ฐ ๋น์ด์์ง ์์ ๊ฒฝ์ฐ์๋ง ์ฒ๋ฆฌ | |
lang, tld = accent_mapping.get(accent, ("en", "com")) | |
tts = gTTS(text, lang=lang, tld=tld) | |
audio_file = BytesIO() | |
tts.write_to_fp(audio_file) | |
audio_file.seek(0) | |
tts_audio = AudioSegment.from_file(audio_file, format="mp3") | |
# ๊ฐ ์์ฑ ์ฌ์ด์ 500ms์ ์นจ๋ฌต ์ถ๊ฐ | |
combined_audio += tts_audio + AudioSegment.silent(duration=500) | |
# ์ต์ข ๊ฒฐํฉ๋ ์ค๋์ค๋ฅผ mp3 ํ์ผ๋ก ์ ์ฅ | |
output_file = "combined_output.mp3" | |
combined_audio.export(output_file, format="mp3") | |
return output_file | |
with gr.Blocks() as demo: | |
gr.Markdown("## Custom TTS: 10๊ฐ์ ๋ํ๋ฌธ ์ ๋ ฅ๋์์ ์ ์ผํธ๋ฅผ ์ ํํ์ฌ ์์ฑ ์์ฑํ๊ธฐ") | |
with gr.Row(): | |
text1 = gr.Textbox(label="Dialogue 1", placeholder="Enter text for Dialogue 1") | |
accent1 = gr.Dropdown(label="Accent for Dialogue 1", choices=["British", "American", "Australian"], value="British") | |
with gr.Row(): | |
text2 = gr.Textbox(label="Dialogue 2", placeholder="Enter text for Dialogue 2") | |
accent2 = gr.Dropdown(label="Accent for Dialogue 2", choices=["British", "American", "Australian"], value="British") | |
with gr.Row(): | |
text3 = gr.Textbox(label="Dialogue 3", placeholder="Enter text for Dialogue 3") | |
accent3 = gr.Dropdown(label="Accent for Dialogue 3", choices=["British", "American", "Australian"], value="British") | |
with gr.Row(): | |
text4 = gr.Textbox(label="Dialogue 4", placeholder="Enter text for Dialogue 4") | |
accent4 = gr.Dropdown(label="Accent for Dialogue 4", choices=["British", "American", "Australian"], value="British") | |
with gr.Row(): | |
text5 = gr.Textbox(label="Dialogue 5", placeholder="Enter text for Dialogue 5") | |
accent5 = gr.Dropdown(label="Accent for Dialogue 5", choices=["British", "American", "Australian"], value="British") | |
with gr.Row(): | |
text6 = gr.Textbox(label="Dialogue 6", placeholder="Enter text for Dialogue 6") | |
accent6 = gr.Dropdown(label="Accent for Dialogue 6", choices=["British", "American", "Australian"], value="British") | |
with gr.Row(): | |
text7 = gr.Textbox(label="Dialogue 7", placeholder="Enter text for Dialogue 7") | |
accent7 = gr.Dropdown(label="Accent for Dialogue 7", choices=["British", "American", "Australian"], value="British") | |
with gr.Row(): | |
text8 = gr.Textbox(label="Dialogue 8", placeholder="Enter text for Dialogue 8") | |
accent8 = gr.Dropdown(label="Accent for Dialogue 8", choices=["British", "American", "Australian"], value="British") | |
with gr.Row(): | |
text9 = gr.Textbox(label="Dialogue 9", placeholder="Enter text for Dialogue 9") | |
accent9 = gr.Dropdown(label="Accent for Dialogue 9", choices=["British", "American", "Australian"], value="British") | |
with gr.Row(): | |
text10 = gr.Textbox(label="Dialogue 10", placeholder="Enter text for Dialogue 10") | |
accent10 = gr.Dropdown(label="Accent for Dialogue 10", choices=["British", "American", "Australian"], value="British") | |
output_audio = gr.Audio(label="Generated Speech", type="filepath") | |
generate_button = gr.Button("Generate Speech") | |
generate_button.click( | |
custom_tts, | |
inputs=[ | |
text1, accent1, | |
text2, accent2, | |
text3, accent3, | |
text4, accent4, | |
text5, accent5, | |
text6, accent6, | |
text7, accent7, | |
text8, accent8, | |
text9, accent9, | |
text10, accent10 | |
], | |
outputs=output_audio | |
) | |
if __name__ == "__main__": | |
demo.launch() | |