englissi commited on
Commit
1a4473d
ยท
verified ยท
1 Parent(s): 9e130e4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -57
app.py CHANGED
@@ -3,36 +3,45 @@ from gtts import gTTS
3
  from pydub import AudioSegment
4
  from io import BytesIO
5
 
6
- def multilingual_tts(
7
- korean_text,
8
- british_text,
9
- american_text,
10
- british_text_add1,
11
- british_text_add2,
12
- australian_text_add1,
13
- australian_text_add2,
14
- american_text_add1,
15
- additional_english_text_9,
16
- additional_english_text_10
17
  ):
18
- # ๊ฐ ์Œ์„ฑ์— ๋Œ€ํ•ด (์–ธ์–ด ์ฝ”๋“œ, tld, ํ…์ŠคํŠธ) ํŠœํ”Œ์„ ๋ฆฌ์ŠคํŠธ์— ์ €์žฅํ•ฉ๋‹ˆ๋‹ค.
19
- voices = [
20
- ("ko", "com", korean_text), # ํ•œ๊ตญ์–ด
21
- ("en", "co.uk", british_text), # ๊ธฐ์กด ์˜๊ตญ์‹
22
- ("en", "com", american_text), # ๊ธฐ์กด ๋ฏธ๊ตญ์‹
23
- ("en", "co.uk", british_text_add1), # ์ถ”๊ฐ€ ์˜๊ตญ์‹ 1
24
- ("en", "co.uk", british_text_add2), # ์ถ”๊ฐ€ ์˜๊ตญ์‹ 2
25
- ("en", "com.au", australian_text_add1), # ์ถ”๊ฐ€ ํ˜ธ์ฃผ์‹ 1
26
- ("en", "com.au", australian_text_add2), # ์ถ”๊ฐ€ ํ˜ธ์ฃผ์‹ 2
27
- ("en", "com", american_text_add1), # ์ถ”๊ฐ€ ๋ฏธ๊ตญ์‹ 1
28
- ("en", "com", additional_english_text_9), # ์ถ”๊ฐ€ ์˜์–ด 9
29
- ("en", "com", additional_english_text_10) # ์ถ”๊ฐ€ ์˜์–ด 10
 
 
 
 
 
 
 
30
  ]
31
 
32
- combined_audio = AudioSegment.silent(duration=0) # ๋นˆ ์˜ค๋””์˜ค
33
 
34
- for lang, tld, text in voices:
35
- if text.strip(): # ํ…์ŠคํŠธ๊ฐ€ ์ž…๋ ฅ๋˜์–ด ์žˆ์„ ๋•Œ๋งŒ ์ฒ˜๋ฆฌ
 
 
36
  tts = gTTS(text, lang=lang, tld=tld)
37
  audio_file = BytesIO()
38
  tts.write_to_fp(audio_file)
@@ -44,57 +53,70 @@ def multilingual_tts(
44
  # ์ตœ์ข… ๊ฒฐํ•ฉ๋œ ์˜ค๋””์˜ค๋ฅผ mp3 ํŒŒ์ผ๋กœ ์ €์žฅ
45
  output_file = "combined_output.mp3"
46
  combined_audio.export(output_file, format="mp3")
47
-
48
  return output_file
49
 
50
  with gr.Blocks() as demo:
51
- gr.Markdown("## Multilingual TTS: Generate a Single Audio File (์ด 10๊ฐœ ์Œ์„ฑ)")
 
 
 
 
 
 
 
 
52
 
53
- # ํ•œ๊ตญ์–ด ์ž…๋ ฅ๋ž€
54
- korean_input = gr.Textbox(label="Enter Korean Text:", placeholder="์•ˆ๋…•ํ•˜์„ธ์š”")
 
 
 
 
 
55
 
56
- # ๊ธฐ์กด ์˜์–ด ์ž…๋ ฅ๋ž€ (์˜๊ตญ์‹, ๋ฏธ๊ตญ์‹)
57
  with gr.Row():
58
- british_input = gr.Textbox(label="Enter British English Text:", placeholder="Hello (British)")
59
- american_input = gr.Textbox(label="Enter American English Text:", placeholder="Hello (American)")
60
 
61
- # ์ถ”๊ฐ€ ์˜์–ด ์ž…๋ ฅ๋ž€ (์ถ”๊ฐ€ ์˜๊ตญ์‹)
62
  with gr.Row():
63
- british_input_add1 = gr.Textbox(label="Enter Additional British English Text 1:", placeholder="Hi there (British)")
64
- british_input_add2 = gr.Textbox(label="Enter Additional British English Text 2:", placeholder="Good day (British)")
65
 
66
- # ์ถ”๊ฐ€ ์˜์–ด ์ž…๋ ฅ๋ž€ (์ถ”๊ฐ€ ํ˜ธ์ฃผ์‹)
67
  with gr.Row():
68
- australian_input_add1 = gr.Textbox(label="Enter Additional Australian English Text 1:", placeholder="G'day (Australian)")
69
- australian_input_add2 = gr.Textbox(label="Enter Additional Australian English Text 2:", placeholder="How ya going? (Australian)")
70
 
71
- # ์ถ”๊ฐ€ ์˜์–ด ์ž…๋ ฅ๋ž€ (์ถ”๊ฐ€ ๋ฏธ๊ตญ์‹ ๋ฐ ์ถ”๊ฐ€ ์˜์–ด)
72
  with gr.Row():
73
- american_input_add1 = gr.Textbox(label="Enter Additional American English Text 1:", placeholder="Hey (American)")
74
- additional_english_input_9 = gr.Textbox(label="Enter Additional English Text 9:", placeholder="Additional dialogue 9 (English)")
75
 
76
- # ๋งˆ์ง€๋ง‰ ์ถ”๊ฐ€ ์˜์–ด ์ž…๋ ฅ๋ž€
77
- additional_english_input_10 = gr.Textbox(label="Enter Additional English Text 10:", placeholder="Additional dialogue 10 (English)")
 
 
 
 
 
78
 
79
  output_audio = gr.Audio(label="Generated Speech", type="filepath")
80
  generate_button = gr.Button("Generate Speech")
81
 
82
  generate_button.click(
83
- multilingual_tts,
84
  inputs=[
85
- korean_input,
86
- british_input,
87
- american_input,
88
- british_input_add1,
89
- british_input_add2,
90
- australian_input_add1,
91
- australian_input_add2,
92
- american_input_add1,
93
- additional_english_input_9,
94
- additional_english_input_10
95
- ],
96
  outputs=output_audio
97
  )
98
 
99
  if __name__ == "__main__":
100
- demo.launch()
 
3
  from pydub import AudioSegment
4
  from io import BytesIO
5
 
6
+ def custom_tts(
7
+ text1, accent1,
8
+ text2, accent2,
9
+ text3, accent3,
10
+ text4, accent4,
11
+ text5, accent5,
12
+ text6, accent6,
13
+ text7, accent7,
14
+ text8, accent8,
15
+ text9, accent9,
16
+ text10, accent10
17
  ):
18
+ # ๊ฐ ์•…์„ผํŠธ์— ๋”ฐ๋ฅธ ์–ธ์–ด ์ฝ”๋“œ์™€ tld ์„ค์ •
19
+ accent_mapping = {
20
+ "British": ("en", "co.uk"),
21
+ "American": ("en", "com"),
22
+ "Australian": ("en", "com.au")
23
+ }
24
+
25
+ # 10๊ฐœ์˜ ๋Œ€ํ™”๋ฌธ๊ณผ ์„ ํƒ๋œ ์•…์„ผํŠธ๋ฅผ ํŠœํ”Œ ๋ฆฌ์ŠคํŠธ๋กœ ๊ตฌ์„ฑํ•ฉ๋‹ˆ๋‹ค.
26
+ dialogues = [
27
+ (text1, accent1),
28
+ (text2, accent2),
29
+ (text3, accent3),
30
+ (text4, accent4),
31
+ (text5, accent5),
32
+ (text6, accent6),
33
+ (text7, accent7),
34
+ (text8, accent8),
35
+ (text9, accent9),
36
+ (text10, accent10)
37
  ]
38
 
39
+ combined_audio = AudioSegment.silent(duration=0) # ์ดˆ๊ธฐ ๋นˆ ์˜ค๋””์˜ค
40
 
41
+ # ๊ฐ ๋Œ€ํ™”๋ฌธ์— ๋Œ€ํ•ด ์Œ์„ฑ ์ƒ์„ฑ ํ›„ ๊ฒฐํ•ฉ
42
+ for text, accent in dialogues:
43
+ if text.strip(): # ํ…์ŠคํŠธ๊ฐ€ ๋น„์–ด์žˆ์ง€ ์•Š์€ ๊ฒฝ์šฐ์—๋งŒ ์ฒ˜๋ฆฌ
44
+ lang, tld = accent_mapping.get(accent, ("en", "com"))
45
  tts = gTTS(text, lang=lang, tld=tld)
46
  audio_file = BytesIO()
47
  tts.write_to_fp(audio_file)
 
53
  # ์ตœ์ข… ๊ฒฐํ•ฉ๋œ ์˜ค๋””์˜ค๋ฅผ mp3 ํŒŒ์ผ๋กœ ์ €์žฅ
54
  output_file = "combined_output.mp3"
55
  combined_audio.export(output_file, format="mp3")
 
56
  return output_file
57
 
58
  with gr.Blocks() as demo:
59
+ gr.Markdown("## Custom TTS: 10๊ฐœ์˜ ๋Œ€ํ™”๋ฌธ ์ž…๋ ฅ๋ž€์—์„œ ์•…์„ผํŠธ๋ฅผ ์„ ํƒํ•˜์—ฌ ์Œ์„ฑ ์ƒ์„ฑํ•˜๊ธฐ")
60
+
61
+ with gr.Row():
62
+ text1 = gr.Textbox(label="Dialogue 1", placeholder="Enter text for Dialogue 1")
63
+ accent1 = gr.Dropdown(label="Accent for Dialogue 1", choices=["British", "American", "Australian"], value="British")
64
+
65
+ with gr.Row():
66
+ text2 = gr.Textbox(label="Dialogue 2", placeholder="Enter text for Dialogue 2")
67
+ accent2 = gr.Dropdown(label="Accent for Dialogue 2", choices=["British", "American", "Australian"], value="British")
68
 
69
+ with gr.Row():
70
+ text3 = gr.Textbox(label="Dialogue 3", placeholder="Enter text for Dialogue 3")
71
+ accent3 = gr.Dropdown(label="Accent for Dialogue 3", choices=["British", "American", "Australian"], value="British")
72
+
73
+ with gr.Row():
74
+ text4 = gr.Textbox(label="Dialogue 4", placeholder="Enter text for Dialogue 4")
75
+ accent4 = gr.Dropdown(label="Accent for Dialogue 4", choices=["British", "American", "Australian"], value="British")
76
 
 
77
  with gr.Row():
78
+ text5 = gr.Textbox(label="Dialogue 5", placeholder="Enter text for Dialogue 5")
79
+ accent5 = gr.Dropdown(label="Accent for Dialogue 5", choices=["British", "American", "Australian"], value="British")
80
 
 
81
  with gr.Row():
82
+ text6 = gr.Textbox(label="Dialogue 6", placeholder="Enter text for Dialogue 6")
83
+ accent6 = gr.Dropdown(label="Accent for Dialogue 6", choices=["British", "American", "Australian"], value="British")
84
 
 
85
  with gr.Row():
86
+ text7 = gr.Textbox(label="Dialogue 7", placeholder="Enter text for Dialogue 7")
87
+ accent7 = gr.Dropdown(label="Accent for Dialogue 7", choices=["British", "American", "Australian"], value="British")
88
 
 
89
  with gr.Row():
90
+ text8 = gr.Textbox(label="Dialogue 8", placeholder="Enter text for Dialogue 8")
91
+ accent8 = gr.Dropdown(label="Accent for Dialogue 8", choices=["British", "American", "Australian"], value="British")
92
 
93
+ with gr.Row():
94
+ text9 = gr.Textbox(label="Dialogue 9", placeholder="Enter text for Dialogue 9")
95
+ accent9 = gr.Dropdown(label="Accent for Dialogue 9", choices=["British", "American", "Australian"], value="British")
96
+
97
+ with gr.Row():
98
+ text10 = gr.Textbox(label="Dialogue 10", placeholder="Enter text for Dialogue 10")
99
+ accent10 = gr.Dropdown(label="Accent for Dialogue 10", choices=["British", "American", "Australian"], value="British")
100
 
101
  output_audio = gr.Audio(label="Generated Speech", type="filepath")
102
  generate_button = gr.Button("Generate Speech")
103
 
104
  generate_button.click(
105
+ custom_tts,
106
  inputs=[
107
+ text1, accent1,
108
+ text2, accent2,
109
+ text3, accent3,
110
+ text4, accent4,
111
+ text5, accent5,
112
+ text6, accent6,
113
+ text7, accent7,
114
+ text8, accent8,
115
+ text9, accent9,
116
+ text10, accent10
117
+ ],
118
  outputs=output_audio
119
  )
120
 
121
  if __name__ == "__main__":
122
+ demo.launch()