Spaces:
Configuration error
Configuration error
Fedir Zadniprovskyi
commited on
Commit
·
526f427
1
Parent(s):
7785332
fix: gradio app breaks on arm
Browse files
src/faster_whisper_server/gradio_app.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
from collections.abc import AsyncGenerator
|
2 |
from pathlib import Path
|
|
|
3 |
|
4 |
import gradio as gr
|
5 |
import httpx
|
@@ -9,14 +10,6 @@ from openai import AsyncOpenAI
|
|
9 |
from faster_whisper_server.config import Config, Task
|
10 |
from faster_whisper_server.hf_utils import PiperModel
|
11 |
|
12 |
-
# FIX: this won't work on ARM
|
13 |
-
from faster_whisper_server.routers.speech import (
|
14 |
-
DEFAULT_VOICE,
|
15 |
-
MAX_SAMPLE_RATE,
|
16 |
-
MIN_SAMPLE_RATE,
|
17 |
-
SUPPORTED_RESPONSE_FORMATS,
|
18 |
-
)
|
19 |
-
|
20 |
TRANSCRIPTION_ENDPOINT = "/v1/audio/transcriptions"
|
21 |
TRANSLATION_ENDPOINT = "/v1/audio/translations"
|
22 |
TIMEOUT_SECONDS = 180
|
@@ -163,13 +156,20 @@ def create_gradio_demo(config: Config) -> gr.Blocks: # noqa: C901, PLR0915
|
|
163 |
)
|
164 |
|
165 |
with gr.Tab(label="Speech Generation"):
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
The last part of the voice name is the quality (x_low, low, medium, high).
|
174 |
Each quality has a different default sample rate:
|
175 |
- x_low: 16000 Hz
|
@@ -177,32 +177,34 @@ Each quality has a different default sample rate:
|
|
177 |
- medium: 22050 Hz
|
178 |
- high: 22050 Hz
|
179 |
""",
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
Setting this will resample the generated audio to the desired sample rate.
|
193 |
You may want to set this if you are going to use voices of different qualities but want to keep the same sample rate.
|
194 |
Default: None (No resampling)
|
195 |
""",
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
|
|
|
|
|
|
205 |
|
206 |
demo.load(update_whisper_model_dropdown, inputs=None, outputs=model_dropdown)
|
207 |
-
demo.load(update_piper_voices_dropdown, inputs=None, outputs=voice_dropdown)
|
208 |
return demo
|
|
|
1 |
from collections.abc import AsyncGenerator
|
2 |
from pathlib import Path
|
3 |
+
import platform
|
4 |
|
5 |
import gradio as gr
|
6 |
import httpx
|
|
|
10 |
from faster_whisper_server.config import Config, Task
|
11 |
from faster_whisper_server.hf_utils import PiperModel
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
TRANSCRIPTION_ENDPOINT = "/v1/audio/transcriptions"
|
14 |
TRANSLATION_ENDPOINT = "/v1/audio/translations"
|
15 |
TIMEOUT_SECONDS = 180
|
|
|
156 |
)
|
157 |
|
158 |
with gr.Tab(label="Speech Generation"):
|
159 |
+
if platform.machine() != "x86_64":
|
160 |
+
from faster_whisper_server.routers.speech import (
|
161 |
+
DEFAULT_VOICE,
|
162 |
+
MAX_SAMPLE_RATE,
|
163 |
+
MIN_SAMPLE_RATE,
|
164 |
+
SUPPORTED_RESPONSE_FORMATS,
|
165 |
+
)
|
166 |
+
|
167 |
+
text = gr.Textbox(label="Input Text")
|
168 |
+
voice_dropdown = gr.Dropdown(
|
169 |
+
choices=["en_US-amy-medium"],
|
170 |
+
label="Voice",
|
171 |
+
value="en_US-amy-medium",
|
172 |
+
info="""
|
173 |
The last part of the voice name is the quality (x_low, low, medium, high).
|
174 |
Each quality has a different default sample rate:
|
175 |
- x_low: 16000 Hz
|
|
|
177 |
- medium: 22050 Hz
|
178 |
- high: 22050 Hz
|
179 |
""",
|
180 |
+
)
|
181 |
+
response_fromat_dropdown = gr.Dropdown(
|
182 |
+
choices=SUPPORTED_RESPONSE_FORMATS,
|
183 |
+
label="Response Format",
|
184 |
+
value="wav",
|
185 |
+
)
|
186 |
+
speed_slider = gr.Slider(minimum=0.25, maximum=4.0, step=0.05, label="Speed", value=1.0)
|
187 |
+
sample_rate_slider = gr.Number(
|
188 |
+
minimum=MIN_SAMPLE_RATE,
|
189 |
+
maximum=MAX_SAMPLE_RATE,
|
190 |
+
label="Desired Sample Rate",
|
191 |
+
info="""
|
192 |
Setting this will resample the generated audio to the desired sample rate.
|
193 |
You may want to set this if you are going to use voices of different qualities but want to keep the same sample rate.
|
194 |
Default: None (No resampling)
|
195 |
""",
|
196 |
+
value=lambda: None,
|
197 |
+
)
|
198 |
+
button = gr.Button("Generate Speech")
|
199 |
+
output = gr.Audio(type="filepath")
|
200 |
+
button.click(
|
201 |
+
handle_audio_speech,
|
202 |
+
[text, voice_dropdown, response_fromat_dropdown, speed_slider, sample_rate_slider],
|
203 |
+
output,
|
204 |
+
)
|
205 |
+
demo.load(update_piper_voices_dropdown, inputs=None, outputs=voice_dropdown)
|
206 |
+
else:
|
207 |
+
gr.Textbox("Speech generation is only supported on x86_64 machines.")
|
208 |
|
209 |
demo.load(update_whisper_model_dropdown, inputs=None, outputs=model_dropdown)
|
|
|
210 |
return demo
|