Spaces:
Running
Running
jhj0517
commited on
Commit
·
5a66e88
1
Parent(s):
a85ea1b
refactor `insanely-fast-whisper` parameters
Browse files
app.py
CHANGED
@@ -125,7 +125,7 @@ class App:
|
|
125 |
info="Penalty applied to the score of previously generated tokens (set > 1 to penalize).")
|
126 |
nb_no_repeat_ngram_size = gr.Number(label="No Repeat N-gram Size", value=0, precision=0,
|
127 |
info="Prevent repetitions of n-grams with this size (set 0 to disable).")
|
128 |
-
tb_prefix = gr.Textbox(label="Prefix", value=
|
129 |
info="Optional text to provide as a prefix for the first window.")
|
130 |
cb_suppress_blank = gr.Checkbox(label="Suppress Blank", value=True,
|
131 |
info="Suppress blank outputs at the beginning of the sampling.")
|
@@ -147,7 +147,7 @@ class App:
|
|
147 |
nb_hallucination_silence_threshold = gr.Number(label="Hallucination Silence Threshold",
|
148 |
value=None,
|
149 |
info="When word_timestamps is True, skip silent periods longer than this threshold (in seconds) when a possible hallucination is detected.")
|
150 |
-
tb_hotwords = gr.Textbox(label="Hotwords", value=
|
151 |
info="Hotwords/hint phrases to provide the model with. Has no effect if prefix is not None.")
|
152 |
nb_language_detection_threshold = gr.Number(label="Language Detection Threshold",
|
153 |
value=None,
|
@@ -155,6 +155,10 @@ class App:
|
|
155 |
nb_language_detection_segments = gr.Number(label="Language Detection Segments", value=1,
|
156 |
precision=0,
|
157 |
info="Number of segments to consider for the language detection.")
|
|
|
|
|
|
|
|
|
158 |
with gr.Accordion("VAD", open=False):
|
159 |
cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
|
160 |
sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
|
@@ -173,10 +177,6 @@ class App:
|
|
173 |
dd_diarization_device = gr.Dropdown(label="Device",
|
174 |
choices=self.whisper_inf.diarizer.get_available_device(),
|
175 |
value=self.whisper_inf.diarizer.get_device())
|
176 |
-
with gr.Accordion("Insanely Fast Whisper Parameters", open=False,
|
177 |
-
visible=isinstance(self.whisper_inf, InsanelyFastWhisperInference)):
|
178 |
-
nb_chunk_length_s = gr.Number(label="Chunk Lengths (sec)", value=30, precision=0)
|
179 |
-
nb_batch_size = gr.Number(label="Batch Size", value=24, precision=0)
|
180 |
with gr.Row():
|
181 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
182 |
with gr.Row():
|
@@ -279,7 +279,7 @@ class App:
|
|
279 |
info="Penalty applied to the score of previously generated tokens (set > 1 to penalize).")
|
280 |
nb_no_repeat_ngram_size = gr.Number(label="No Repeat N-gram Size", value=0, precision=0,
|
281 |
info="Prevent repetitions of n-grams with this size (set 0 to disable).")
|
282 |
-
tb_prefix = gr.Textbox(label="Prefix", value=
|
283 |
info="Optional text to provide as a prefix for the first window.")
|
284 |
cb_suppress_blank = gr.Checkbox(label="Suppress Blank", value=True,
|
285 |
info="Suppress blank outputs at the beginning of the sampling.")
|
@@ -301,7 +301,7 @@ class App:
|
|
301 |
nb_hallucination_silence_threshold = gr.Number(label="Hallucination Silence Threshold",
|
302 |
value=None,
|
303 |
info="When word_timestamps is True, skip silent periods longer than this threshold (in seconds) when a possible hallucination is detected.")
|
304 |
-
tb_hotwords = gr.Textbox(label="Hotwords", value=
|
305 |
info="Hotwords/hint phrases to provide the model with. Has no effect if prefix is not None.")
|
306 |
nb_language_detection_threshold = gr.Number(label="Language Detection Threshold",
|
307 |
value=None,
|
@@ -309,6 +309,10 @@ class App:
|
|
309 |
nb_language_detection_segments = gr.Number(label="Language Detection Segments", value=1,
|
310 |
precision=0,
|
311 |
info="Number of segments to consider for the language detection.")
|
|
|
|
|
|
|
|
|
312 |
with gr.Accordion("VAD", open=False):
|
313 |
cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
|
314 |
sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
|
@@ -327,10 +331,6 @@ class App:
|
|
327 |
dd_diarization_device = gr.Dropdown(label="Device",
|
328 |
choices=self.whisper_inf.diarizer.get_available_device(),
|
329 |
value=self.whisper_inf.diarizer.get_device())
|
330 |
-
with gr.Accordion("Insanely Fast Whisper Parameters", open=False,
|
331 |
-
visible=isinstance(self.whisper_inf, InsanelyFastWhisperInference)):
|
332 |
-
nb_chunk_length_s = gr.Number(label="Chunk Lengths (sec)", value=30, precision=0)
|
333 |
-
nb_batch_size = gr.Number(label="Batch Size", value=24, precision=0)
|
334 |
with gr.Row():
|
335 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
336 |
with gr.Row():
|
@@ -425,7 +425,7 @@ class App:
|
|
425 |
info="Penalty applied to the score of previously generated tokens (set > 1 to penalize).")
|
426 |
nb_no_repeat_ngram_size = gr.Number(label="No Repeat N-gram Size", value=0, precision=0,
|
427 |
info="Prevent repetitions of n-grams with this size (set 0 to disable).")
|
428 |
-
tb_prefix = gr.Textbox(label="Prefix", value=
|
429 |
info="Optional text to provide as a prefix for the first window.")
|
430 |
cb_suppress_blank = gr.Checkbox(label="Suppress Blank", value=True,
|
431 |
info="Suppress blank outputs at the beginning of the sampling.")
|
@@ -447,7 +447,7 @@ class App:
|
|
447 |
nb_hallucination_silence_threshold = gr.Number(label="Hallucination Silence Threshold",
|
448 |
value=None,
|
449 |
info="When word_timestamps is True, skip silent periods longer than this threshold (in seconds) when a possible hallucination is detected.")
|
450 |
-
tb_hotwords = gr.Textbox(label="Hotwords", value=
|
451 |
info="Hotwords/hint phrases to provide the model with. Has no effect if prefix is not None.")
|
452 |
nb_language_detection_threshold = gr.Number(label="Language Detection Threshold",
|
453 |
value=None,
|
@@ -455,6 +455,11 @@ class App:
|
|
455 |
nb_language_detection_segments = gr.Number(label="Language Detection Segments", value=1,
|
456 |
precision=0,
|
457 |
info="Number of segments to consider for the language detection.")
|
|
|
|
|
|
|
|
|
|
|
458 |
with gr.Accordion("VAD", open=False):
|
459 |
cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
|
460 |
sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
|
@@ -473,10 +478,6 @@ class App:
|
|
473 |
dd_diarization_device = gr.Dropdown(label="Device",
|
474 |
choices=self.whisper_inf.diarizer.get_available_device(),
|
475 |
value=self.whisper_inf.diarizer.get_device())
|
476 |
-
with gr.Accordion("Insanely Fast Whisper Parameters", open=False,
|
477 |
-
visible=isinstance(self.whisper_inf, InsanelyFastWhisperInference)):
|
478 |
-
nb_chunk_length_s = gr.Number(label="Chunk Lengths (sec)", value=30, precision=0)
|
479 |
-
nb_batch_size = gr.Number(label="Batch Size", value=24, precision=0)
|
480 |
with gr.Row():
|
481 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
482 |
with gr.Row():
|
|
|
125 |
info="Penalty applied to the score of previously generated tokens (set > 1 to penalize).")
|
126 |
nb_no_repeat_ngram_size = gr.Number(label="No Repeat N-gram Size", value=0, precision=0,
|
127 |
info="Prevent repetitions of n-grams with this size (set 0 to disable).")
|
128 |
+
tb_prefix = gr.Textbox(label="Prefix", value=None,
|
129 |
info="Optional text to provide as a prefix for the first window.")
|
130 |
cb_suppress_blank = gr.Checkbox(label="Suppress Blank", value=True,
|
131 |
info="Suppress blank outputs at the beginning of the sampling.")
|
|
|
147 |
nb_hallucination_silence_threshold = gr.Number(label="Hallucination Silence Threshold",
|
148 |
value=None,
|
149 |
info="When word_timestamps is True, skip silent periods longer than this threshold (in seconds) when a possible hallucination is detected.")
|
150 |
+
tb_hotwords = gr.Textbox(label="Hotwords", value=None,
|
151 |
info="Hotwords/hint phrases to provide the model with. Has no effect if prefix is not None.")
|
152 |
nb_language_detection_threshold = gr.Number(label="Language Detection Threshold",
|
153 |
value=None,
|
|
|
155 |
nb_language_detection_segments = gr.Number(label="Language Detection Segments", value=1,
|
156 |
precision=0,
|
157 |
info="Number of segments to consider for the language detection.")
|
158 |
+
|
159 |
+
with gr.Group(visible=isinstance(self.whisper_inf, InsanelyFastWhisperInference)):
|
160 |
+
nb_chunk_length_s = gr.Number(label="Chunk Lengths (sec)", value=30, precision=0)
|
161 |
+
nb_batch_size = gr.Number(label="Batch Size", value=24, precision=0)
|
162 |
with gr.Accordion("VAD", open=False):
|
163 |
cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
|
164 |
sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
|
|
|
177 |
dd_diarization_device = gr.Dropdown(label="Device",
|
178 |
choices=self.whisper_inf.diarizer.get_available_device(),
|
179 |
value=self.whisper_inf.diarizer.get_device())
|
|
|
|
|
|
|
|
|
180 |
with gr.Row():
|
181 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
182 |
with gr.Row():
|
|
|
279 |
info="Penalty applied to the score of previously generated tokens (set > 1 to penalize).")
|
280 |
nb_no_repeat_ngram_size = gr.Number(label="No Repeat N-gram Size", value=0, precision=0,
|
281 |
info="Prevent repetitions of n-grams with this size (set 0 to disable).")
|
282 |
+
tb_prefix = gr.Textbox(label="Prefix", value=None,
|
283 |
info="Optional text to provide as a prefix for the first window.")
|
284 |
cb_suppress_blank = gr.Checkbox(label="Suppress Blank", value=True,
|
285 |
info="Suppress blank outputs at the beginning of the sampling.")
|
|
|
301 |
nb_hallucination_silence_threshold = gr.Number(label="Hallucination Silence Threshold",
|
302 |
value=None,
|
303 |
info="When word_timestamps is True, skip silent periods longer than this threshold (in seconds) when a possible hallucination is detected.")
|
304 |
+
tb_hotwords = gr.Textbox(label="Hotwords", value=None,
|
305 |
info="Hotwords/hint phrases to provide the model with. Has no effect if prefix is not None.")
|
306 |
nb_language_detection_threshold = gr.Number(label="Language Detection Threshold",
|
307 |
value=None,
|
|
|
309 |
nb_language_detection_segments = gr.Number(label="Language Detection Segments", value=1,
|
310 |
precision=0,
|
311 |
info="Number of segments to consider for the language detection.")
|
312 |
+
|
313 |
+
with gr.Group(visible=isinstance(self.whisper_inf, InsanelyFastWhisperInference)):
|
314 |
+
nb_chunk_length_s = gr.Number(label="Chunk Lengths (sec)", value=30, precision=0)
|
315 |
+
nb_batch_size = gr.Number(label="Batch Size", value=24, precision=0)
|
316 |
with gr.Accordion("VAD", open=False):
|
317 |
cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
|
318 |
sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
|
|
|
331 |
dd_diarization_device = gr.Dropdown(label="Device",
|
332 |
choices=self.whisper_inf.diarizer.get_available_device(),
|
333 |
value=self.whisper_inf.diarizer.get_device())
|
|
|
|
|
|
|
|
|
334 |
with gr.Row():
|
335 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
336 |
with gr.Row():
|
|
|
425 |
info="Penalty applied to the score of previously generated tokens (set > 1 to penalize).")
|
426 |
nb_no_repeat_ngram_size = gr.Number(label="No Repeat N-gram Size", value=0, precision=0,
|
427 |
info="Prevent repetitions of n-grams with this size (set 0 to disable).")
|
428 |
+
tb_prefix = gr.Textbox(label="Prefix", value=None,
|
429 |
info="Optional text to provide as a prefix for the first window.")
|
430 |
cb_suppress_blank = gr.Checkbox(label="Suppress Blank", value=True,
|
431 |
info="Suppress blank outputs at the beginning of the sampling.")
|
|
|
447 |
nb_hallucination_silence_threshold = gr.Number(label="Hallucination Silence Threshold",
|
448 |
value=None,
|
449 |
info="When word_timestamps is True, skip silent periods longer than this threshold (in seconds) when a possible hallucination is detected.")
|
450 |
+
tb_hotwords = gr.Textbox(label="Hotwords", value=None,
|
451 |
info="Hotwords/hint phrases to provide the model with. Has no effect if prefix is not None.")
|
452 |
nb_language_detection_threshold = gr.Number(label="Language Detection Threshold",
|
453 |
value=None,
|
|
|
455 |
nb_language_detection_segments = gr.Number(label="Language Detection Segments", value=1,
|
456 |
precision=0,
|
457 |
info="Number of segments to consider for the language detection.")
|
458 |
+
|
459 |
+
with gr.Group(visible=isinstance(self.whisper_inf, InsanelyFastWhisperInference)):
|
460 |
+
nb_chunk_length_s = gr.Number(label="Chunk Lengths (sec)", value=30, precision=0)
|
461 |
+
nb_batch_size = gr.Number(label="Batch Size", value=24, precision=0)
|
462 |
+
|
463 |
with gr.Accordion("VAD", open=False):
|
464 |
cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
|
465 |
sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
|
|
|
478 |
dd_diarization_device = gr.Dropdown(label="Device",
|
479 |
choices=self.whisper_inf.diarizer.get_available_device(),
|
480 |
value=self.whisper_inf.diarizer.get_device())
|
|
|
|
|
|
|
|
|
481 |
with gr.Row():
|
482 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
483 |
with gr.Row():
|