Fedir Zadniprovskyi commited on
Commit
526f427
·
1 Parent(s): 7785332

fix: gradio app breaks on arm

Browse files
src/faster_whisper_server/gradio_app.py CHANGED
@@ -1,5 +1,6 @@
1
  from collections.abc import AsyncGenerator
2
  from pathlib import Path
 
3
 
4
  import gradio as gr
5
  import httpx
@@ -9,14 +10,6 @@ from openai import AsyncOpenAI
9
  from faster_whisper_server.config import Config, Task
10
  from faster_whisper_server.hf_utils import PiperModel
11
 
12
- # FIX: this won't work on ARM
13
- from faster_whisper_server.routers.speech import (
14
- DEFAULT_VOICE,
15
- MAX_SAMPLE_RATE,
16
- MIN_SAMPLE_RATE,
17
- SUPPORTED_RESPONSE_FORMATS,
18
- )
19
-
20
  TRANSCRIPTION_ENDPOINT = "/v1/audio/transcriptions"
21
  TRANSLATION_ENDPOINT = "/v1/audio/translations"
22
  TIMEOUT_SECONDS = 180
@@ -163,13 +156,20 @@ def create_gradio_demo(config: Config) -> gr.Blocks: # noqa: C901, PLR0915
163
  )
164
 
165
  with gr.Tab(label="Speech Generation"):
166
- # TODO: add warning about ARM
167
- text = gr.Textbox(label="Input Text")
168
- voice_dropdown = gr.Dropdown(
169
- choices=["en_US-amy-medium"],
170
- label="Voice",
171
- value="en_US-amy-medium",
172
- info="""
 
 
 
 
 
 
 
173
  The last part of the voice name is the quality (x_low, low, medium, high).
174
  Each quality has a different default sample rate:
175
  - x_low: 16000 Hz
@@ -177,32 +177,34 @@ Each quality has a different default sample rate:
177
  - medium: 22050 Hz
178
  - high: 22050 Hz
179
  """,
180
- )
181
- response_fromat_dropdown = gr.Dropdown(
182
- choices=SUPPORTED_RESPONSE_FORMATS,
183
- label="Response Format",
184
- value="wav",
185
- )
186
- speed_slider = gr.Slider(minimum=0.25, maximum=4.0, step=0.05, label="Speed", value=1.0)
187
- sample_rate_slider = gr.Number(
188
- minimum=MIN_SAMPLE_RATE,
189
- maximum=MAX_SAMPLE_RATE,
190
- label="Desired Sample Rate",
191
- info="""
192
  Setting this will resample the generated audio to the desired sample rate.
193
  You may want to set this if you are going to use voices of different qualities but want to keep the same sample rate.
194
  Default: None (No resampling)
195
  """,
196
- value=lambda: None,
197
- )
198
- button = gr.Button("Generate Speech")
199
- output = gr.Audio(type="filepath")
200
- button.click(
201
- handle_audio_speech,
202
- [text, voice_dropdown, response_fromat_dropdown, speed_slider, sample_rate_slider],
203
- output,
204
- )
 
 
 
205
 
206
  demo.load(update_whisper_model_dropdown, inputs=None, outputs=model_dropdown)
207
- demo.load(update_piper_voices_dropdown, inputs=None, outputs=voice_dropdown)
208
  return demo
 
1
  from collections.abc import AsyncGenerator
2
  from pathlib import Path
3
+ import platform
4
 
5
  import gradio as gr
6
  import httpx
 
10
  from faster_whisper_server.config import Config, Task
11
  from faster_whisper_server.hf_utils import PiperModel
12
 
 
 
 
 
 
 
 
 
13
  TRANSCRIPTION_ENDPOINT = "/v1/audio/transcriptions"
14
  TRANSLATION_ENDPOINT = "/v1/audio/translations"
15
  TIMEOUT_SECONDS = 180
 
156
  )
157
 
158
  with gr.Tab(label="Speech Generation"):
159
+ if platform.machine() != "x86_64":
160
+ from faster_whisper_server.routers.speech import (
161
+ DEFAULT_VOICE,
162
+ MAX_SAMPLE_RATE,
163
+ MIN_SAMPLE_RATE,
164
+ SUPPORTED_RESPONSE_FORMATS,
165
+ )
166
+
167
+ text = gr.Textbox(label="Input Text")
168
+ voice_dropdown = gr.Dropdown(
169
+ choices=["en_US-amy-medium"],
170
+ label="Voice",
171
+ value="en_US-amy-medium",
172
+ info="""
173
  The last part of the voice name is the quality (x_low, low, medium, high).
174
  Each quality has a different default sample rate:
175
  - x_low: 16000 Hz
 
177
  - medium: 22050 Hz
178
  - high: 22050 Hz
179
  """,
180
+ )
181
+ response_fromat_dropdown = gr.Dropdown(
182
+ choices=SUPPORTED_RESPONSE_FORMATS,
183
+ label="Response Format",
184
+ value="wav",
185
+ )
186
+ speed_slider = gr.Slider(minimum=0.25, maximum=4.0, step=0.05, label="Speed", value=1.0)
187
+ sample_rate_slider = gr.Number(
188
+ minimum=MIN_SAMPLE_RATE,
189
+ maximum=MAX_SAMPLE_RATE,
190
+ label="Desired Sample Rate",
191
+ info="""
192
  Setting this will resample the generated audio to the desired sample rate.
193
  You may want to set this if you are going to use voices of different qualities but want to keep the same sample rate.
194
  Default: None (No resampling)
195
  """,
196
+ value=lambda: None,
197
+ )
198
+ button = gr.Button("Generate Speech")
199
+ output = gr.Audio(type="filepath")
200
+ button.click(
201
+ handle_audio_speech,
202
+ [text, voice_dropdown, response_fromat_dropdown, speed_slider, sample_rate_slider],
203
+ output,
204
+ )
205
+ demo.load(update_piper_voices_dropdown, inputs=None, outputs=voice_dropdown)
206
+ else:
207
+ gr.Textbox("Speech generation is only supported on x86_64 machines.")
208
 
209
  demo.load(update_whisper_model_dropdown, inputs=None, outputs=model_dropdown)
 
210
  return demo