engdialogue / app.py
englissi's picture
Update app.py
1a4473d verified
raw
history blame
4.84 kB
import gradio as gr
from gtts import gTTS
from pydub import AudioSegment
from io import BytesIO
def custom_tts(
text1, accent1,
text2, accent2,
text3, accent3,
text4, accent4,
text5, accent5,
text6, accent6,
text7, accent7,
text8, accent8,
text9, accent9,
text10, accent10
):
# ๊ฐ ์•…์„ผํŠธ์— ๋”ฐ๋ฅธ ์–ธ์–ด ์ฝ”๋“œ์™€ tld ์„ค์ •
accent_mapping = {
"British": ("en", "co.uk"),
"American": ("en", "com"),
"Australian": ("en", "com.au")
}
# 10๊ฐœ์˜ ๋Œ€ํ™”๋ฌธ๊ณผ ์„ ํƒ๋œ ์•…์„ผํŠธ๋ฅผ ํŠœํ”Œ ๋ฆฌ์ŠคํŠธ๋กœ ๊ตฌ์„ฑํ•ฉ๋‹ˆ๋‹ค.
dialogues = [
(text1, accent1),
(text2, accent2),
(text3, accent3),
(text4, accent4),
(text5, accent5),
(text6, accent6),
(text7, accent7),
(text8, accent8),
(text9, accent9),
(text10, accent10)
]
combined_audio = AudioSegment.silent(duration=0) # ์ดˆ๊ธฐ ๋นˆ ์˜ค๋””์˜ค
# ๊ฐ ๋Œ€ํ™”๋ฌธ์— ๋Œ€ํ•ด ์Œ์„ฑ ์ƒ์„ฑ ํ›„ ๊ฒฐํ•ฉ
for text, accent in dialogues:
if text.strip(): # ํ…์ŠคํŠธ๊ฐ€ ๋น„์–ด์žˆ์ง€ ์•Š์€ ๊ฒฝ์šฐ์—๋งŒ ์ฒ˜๋ฆฌ
lang, tld = accent_mapping.get(accent, ("en", "com"))
tts = gTTS(text, lang=lang, tld=tld)
audio_file = BytesIO()
tts.write_to_fp(audio_file)
audio_file.seek(0)
tts_audio = AudioSegment.from_file(audio_file, format="mp3")
# ๊ฐ ์Œ์„ฑ ์‚ฌ์ด์— 500ms์˜ ์นจ๋ฌต ์ถ”๊ฐ€
combined_audio += tts_audio + AudioSegment.silent(duration=500)
# ์ตœ์ข… ๊ฒฐํ•ฉ๋œ ์˜ค๋””์˜ค๋ฅผ mp3 ํŒŒ์ผ๋กœ ์ €์žฅ
output_file = "combined_output.mp3"
combined_audio.export(output_file, format="mp3")
return output_file
with gr.Blocks() as demo:
gr.Markdown("## Custom TTS: 10๊ฐœ์˜ ๋Œ€ํ™”๋ฌธ ์ž…๋ ฅ๋ž€์—์„œ ์•…์„ผํŠธ๋ฅผ ์„ ํƒํ•˜์—ฌ ์Œ์„ฑ ์ƒ์„ฑํ•˜๊ธฐ")
with gr.Row():
text1 = gr.Textbox(label="Dialogue 1", placeholder="Enter text for Dialogue 1")
accent1 = gr.Dropdown(label="Accent for Dialogue 1", choices=["British", "American", "Australian"], value="British")
with gr.Row():
text2 = gr.Textbox(label="Dialogue 2", placeholder="Enter text for Dialogue 2")
accent2 = gr.Dropdown(label="Accent for Dialogue 2", choices=["British", "American", "Australian"], value="British")
with gr.Row():
text3 = gr.Textbox(label="Dialogue 3", placeholder="Enter text for Dialogue 3")
accent3 = gr.Dropdown(label="Accent for Dialogue 3", choices=["British", "American", "Australian"], value="British")
with gr.Row():
text4 = gr.Textbox(label="Dialogue 4", placeholder="Enter text for Dialogue 4")
accent4 = gr.Dropdown(label="Accent for Dialogue 4", choices=["British", "American", "Australian"], value="British")
with gr.Row():
text5 = gr.Textbox(label="Dialogue 5", placeholder="Enter text for Dialogue 5")
accent5 = gr.Dropdown(label="Accent for Dialogue 5", choices=["British", "American", "Australian"], value="British")
with gr.Row():
text6 = gr.Textbox(label="Dialogue 6", placeholder="Enter text for Dialogue 6")
accent6 = gr.Dropdown(label="Accent for Dialogue 6", choices=["British", "American", "Australian"], value="British")
with gr.Row():
text7 = gr.Textbox(label="Dialogue 7", placeholder="Enter text for Dialogue 7")
accent7 = gr.Dropdown(label="Accent for Dialogue 7", choices=["British", "American", "Australian"], value="British")
with gr.Row():
text8 = gr.Textbox(label="Dialogue 8", placeholder="Enter text for Dialogue 8")
accent8 = gr.Dropdown(label="Accent for Dialogue 8", choices=["British", "American", "Australian"], value="British")
with gr.Row():
text9 = gr.Textbox(label="Dialogue 9", placeholder="Enter text for Dialogue 9")
accent9 = gr.Dropdown(label="Accent for Dialogue 9", choices=["British", "American", "Australian"], value="British")
with gr.Row():
text10 = gr.Textbox(label="Dialogue 10", placeholder="Enter text for Dialogue 10")
accent10 = gr.Dropdown(label="Accent for Dialogue 10", choices=["British", "American", "Australian"], value="British")
output_audio = gr.Audio(label="Generated Speech", type="filepath")
generate_button = gr.Button("Generate Speech")
generate_button.click(
custom_tts,
inputs=[
text1, accent1,
text2, accent2,
text3, accent3,
text4, accent4,
text5, accent5,
text6, accent6,
text7, accent7,
text8, accent8,
text9, accent9,
text10, accent10
],
outputs=output_audio
)
if __name__ == "__main__":
demo.launch()