jhj0517 commited on
Commit
5a66e88
·
1 Parent(s): a85ea1b

refactor `insanely-fast-whisper` parameters

Browse files
Files changed (1) hide show
  1. app.py +19 -18
app.py CHANGED
@@ -125,7 +125,7 @@ class App:
125
  info="Penalty applied to the score of previously generated tokens (set > 1 to penalize).")
126
  nb_no_repeat_ngram_size = gr.Number(label="No Repeat N-gram Size", value=0, precision=0,
127
  info="Prevent repetitions of n-grams with this size (set 0 to disable).")
128
- tb_prefix = gr.Textbox(label="Prefix", value="",
129
  info="Optional text to provide as a prefix for the first window.")
130
  cb_suppress_blank = gr.Checkbox(label="Suppress Blank", value=True,
131
  info="Suppress blank outputs at the beginning of the sampling.")
@@ -147,7 +147,7 @@ class App:
147
  nb_hallucination_silence_threshold = gr.Number(label="Hallucination Silence Threshold",
148
  value=None,
149
  info="When word_timestamps is True, skip silent periods longer than this threshold (in seconds) when a possible hallucination is detected.")
150
- tb_hotwords = gr.Textbox(label="Hotwords", value="",
151
  info="Hotwords/hint phrases to provide the model with. Has no effect if prefix is not None.")
152
  nb_language_detection_threshold = gr.Number(label="Language Detection Threshold",
153
  value=None,
@@ -155,6 +155,10 @@ class App:
155
  nb_language_detection_segments = gr.Number(label="Language Detection Segments", value=1,
156
  precision=0,
157
  info="Number of segments to consider for the language detection.")
 
 
 
 
158
  with gr.Accordion("VAD", open=False):
159
  cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
160
  sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
@@ -173,10 +177,6 @@ class App:
173
  dd_diarization_device = gr.Dropdown(label="Device",
174
  choices=self.whisper_inf.diarizer.get_available_device(),
175
  value=self.whisper_inf.diarizer.get_device())
176
- with gr.Accordion("Insanely Fast Whisper Parameters", open=False,
177
- visible=isinstance(self.whisper_inf, InsanelyFastWhisperInference)):
178
- nb_chunk_length_s = gr.Number(label="Chunk Lengths (sec)", value=30, precision=0)
179
- nb_batch_size = gr.Number(label="Batch Size", value=24, precision=0)
180
  with gr.Row():
181
  btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
182
  with gr.Row():
@@ -279,7 +279,7 @@ class App:
279
  info="Penalty applied to the score of previously generated tokens (set > 1 to penalize).")
280
  nb_no_repeat_ngram_size = gr.Number(label="No Repeat N-gram Size", value=0, precision=0,
281
  info="Prevent repetitions of n-grams with this size (set 0 to disable).")
282
- tb_prefix = gr.Textbox(label="Prefix", value="",
283
  info="Optional text to provide as a prefix for the first window.")
284
  cb_suppress_blank = gr.Checkbox(label="Suppress Blank", value=True,
285
  info="Suppress blank outputs at the beginning of the sampling.")
@@ -301,7 +301,7 @@ class App:
301
  nb_hallucination_silence_threshold = gr.Number(label="Hallucination Silence Threshold",
302
  value=None,
303
  info="When word_timestamps is True, skip silent periods longer than this threshold (in seconds) when a possible hallucination is detected.")
304
- tb_hotwords = gr.Textbox(label="Hotwords", value="",
305
  info="Hotwords/hint phrases to provide the model with. Has no effect if prefix is not None.")
306
  nb_language_detection_threshold = gr.Number(label="Language Detection Threshold",
307
  value=None,
@@ -309,6 +309,10 @@ class App:
309
  nb_language_detection_segments = gr.Number(label="Language Detection Segments", value=1,
310
  precision=0,
311
  info="Number of segments to consider for the language detection.")
 
 
 
 
312
  with gr.Accordion("VAD", open=False):
313
  cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
314
  sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
@@ -327,10 +331,6 @@ class App:
327
  dd_diarization_device = gr.Dropdown(label="Device",
328
  choices=self.whisper_inf.diarizer.get_available_device(),
329
  value=self.whisper_inf.diarizer.get_device())
330
- with gr.Accordion("Insanely Fast Whisper Parameters", open=False,
331
- visible=isinstance(self.whisper_inf, InsanelyFastWhisperInference)):
332
- nb_chunk_length_s = gr.Number(label="Chunk Lengths (sec)", value=30, precision=0)
333
- nb_batch_size = gr.Number(label="Batch Size", value=24, precision=0)
334
  with gr.Row():
335
  btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
336
  with gr.Row():
@@ -425,7 +425,7 @@ class App:
425
  info="Penalty applied to the score of previously generated tokens (set > 1 to penalize).")
426
  nb_no_repeat_ngram_size = gr.Number(label="No Repeat N-gram Size", value=0, precision=0,
427
  info="Prevent repetitions of n-grams with this size (set 0 to disable).")
428
- tb_prefix = gr.Textbox(label="Prefix", value="",
429
  info="Optional text to provide as a prefix for the first window.")
430
  cb_suppress_blank = gr.Checkbox(label="Suppress Blank", value=True,
431
  info="Suppress blank outputs at the beginning of the sampling.")
@@ -447,7 +447,7 @@ class App:
447
  nb_hallucination_silence_threshold = gr.Number(label="Hallucination Silence Threshold",
448
  value=None,
449
  info="When word_timestamps is True, skip silent periods longer than this threshold (in seconds) when a possible hallucination is detected.")
450
- tb_hotwords = gr.Textbox(label="Hotwords", value="",
451
  info="Hotwords/hint phrases to provide the model with. Has no effect if prefix is not None.")
452
  nb_language_detection_threshold = gr.Number(label="Language Detection Threshold",
453
  value=None,
@@ -455,6 +455,11 @@ class App:
455
  nb_language_detection_segments = gr.Number(label="Language Detection Segments", value=1,
456
  precision=0,
457
  info="Number of segments to consider for the language detection.")
 
 
 
 
 
458
  with gr.Accordion("VAD", open=False):
459
  cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
460
  sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
@@ -473,10 +478,6 @@ class App:
473
  dd_diarization_device = gr.Dropdown(label="Device",
474
  choices=self.whisper_inf.diarizer.get_available_device(),
475
  value=self.whisper_inf.diarizer.get_device())
476
- with gr.Accordion("Insanely Fast Whisper Parameters", open=False,
477
- visible=isinstance(self.whisper_inf, InsanelyFastWhisperInference)):
478
- nb_chunk_length_s = gr.Number(label="Chunk Lengths (sec)", value=30, precision=0)
479
- nb_batch_size = gr.Number(label="Batch Size", value=24, precision=0)
480
  with gr.Row():
481
  btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
482
  with gr.Row():
 
125
  info="Penalty applied to the score of previously generated tokens (set > 1 to penalize).")
126
  nb_no_repeat_ngram_size = gr.Number(label="No Repeat N-gram Size", value=0, precision=0,
127
  info="Prevent repetitions of n-grams with this size (set 0 to disable).")
128
+ tb_prefix = gr.Textbox(label="Prefix", value=None,
129
  info="Optional text to provide as a prefix for the first window.")
130
  cb_suppress_blank = gr.Checkbox(label="Suppress Blank", value=True,
131
  info="Suppress blank outputs at the beginning of the sampling.")
 
147
  nb_hallucination_silence_threshold = gr.Number(label="Hallucination Silence Threshold",
148
  value=None,
149
  info="When word_timestamps is True, skip silent periods longer than this threshold (in seconds) when a possible hallucination is detected.")
150
+ tb_hotwords = gr.Textbox(label="Hotwords", value=None,
151
  info="Hotwords/hint phrases to provide the model with. Has no effect if prefix is not None.")
152
  nb_language_detection_threshold = gr.Number(label="Language Detection Threshold",
153
  value=None,
 
155
  nb_language_detection_segments = gr.Number(label="Language Detection Segments", value=1,
156
  precision=0,
157
  info="Number of segments to consider for the language detection.")
158
+
159
+ with gr.Group(visible=isinstance(self.whisper_inf, InsanelyFastWhisperInference)):
160
+ nb_chunk_length_s = gr.Number(label="Chunk Lengths (sec)", value=30, precision=0)
161
+ nb_batch_size = gr.Number(label="Batch Size", value=24, precision=0)
162
  with gr.Accordion("VAD", open=False):
163
  cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
164
  sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
 
177
  dd_diarization_device = gr.Dropdown(label="Device",
178
  choices=self.whisper_inf.diarizer.get_available_device(),
179
  value=self.whisper_inf.diarizer.get_device())
 
 
 
 
180
  with gr.Row():
181
  btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
182
  with gr.Row():
 
279
  info="Penalty applied to the score of previously generated tokens (set > 1 to penalize).")
280
  nb_no_repeat_ngram_size = gr.Number(label="No Repeat N-gram Size", value=0, precision=0,
281
  info="Prevent repetitions of n-grams with this size (set 0 to disable).")
282
+ tb_prefix = gr.Textbox(label="Prefix", value=None,
283
  info="Optional text to provide as a prefix for the first window.")
284
  cb_suppress_blank = gr.Checkbox(label="Suppress Blank", value=True,
285
  info="Suppress blank outputs at the beginning of the sampling.")
 
301
  nb_hallucination_silence_threshold = gr.Number(label="Hallucination Silence Threshold",
302
  value=None,
303
  info="When word_timestamps is True, skip silent periods longer than this threshold (in seconds) when a possible hallucination is detected.")
304
+ tb_hotwords = gr.Textbox(label="Hotwords", value=None,
305
  info="Hotwords/hint phrases to provide the model with. Has no effect if prefix is not None.")
306
  nb_language_detection_threshold = gr.Number(label="Language Detection Threshold",
307
  value=None,
 
309
  nb_language_detection_segments = gr.Number(label="Language Detection Segments", value=1,
310
  precision=0,
311
  info="Number of segments to consider for the language detection.")
312
+
313
+ with gr.Group(visible=isinstance(self.whisper_inf, InsanelyFastWhisperInference)):
314
+ nb_chunk_length_s = gr.Number(label="Chunk Lengths (sec)", value=30, precision=0)
315
+ nb_batch_size = gr.Number(label="Batch Size", value=24, precision=0)
316
  with gr.Accordion("VAD", open=False):
317
  cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
318
  sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
 
331
  dd_diarization_device = gr.Dropdown(label="Device",
332
  choices=self.whisper_inf.diarizer.get_available_device(),
333
  value=self.whisper_inf.diarizer.get_device())
 
 
 
 
334
  with gr.Row():
335
  btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
336
  with gr.Row():
 
425
  info="Penalty applied to the score of previously generated tokens (set > 1 to penalize).")
426
  nb_no_repeat_ngram_size = gr.Number(label="No Repeat N-gram Size", value=0, precision=0,
427
  info="Prevent repetitions of n-grams with this size (set 0 to disable).")
428
+ tb_prefix = gr.Textbox(label="Prefix", value=None,
429
  info="Optional text to provide as a prefix for the first window.")
430
  cb_suppress_blank = gr.Checkbox(label="Suppress Blank", value=True,
431
  info="Suppress blank outputs at the beginning of the sampling.")
 
447
  nb_hallucination_silence_threshold = gr.Number(label="Hallucination Silence Threshold",
448
  value=None,
449
  info="When word_timestamps is True, skip silent periods longer than this threshold (in seconds) when a possible hallucination is detected.")
450
+ tb_hotwords = gr.Textbox(label="Hotwords", value=None,
451
  info="Hotwords/hint phrases to provide the model with. Has no effect if prefix is not None.")
452
  nb_language_detection_threshold = gr.Number(label="Language Detection Threshold",
453
  value=None,
 
455
  nb_language_detection_segments = gr.Number(label="Language Detection Segments", value=1,
456
  precision=0,
457
  info="Number of segments to consider for the language detection.")
458
+
459
+ with gr.Group(visible=isinstance(self.whisper_inf, InsanelyFastWhisperInference)):
460
+ nb_chunk_length_s = gr.Number(label="Chunk Lengths (sec)", value=30, precision=0)
461
+ nb_batch_size = gr.Number(label="Batch Size", value=24, precision=0)
462
+
463
  with gr.Accordion("VAD", open=False):
464
  cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=False, interactive=True)
465
  sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
 
478
  dd_diarization_device = gr.Dropdown(label="Device",
479
  choices=self.whisper_inf.diarizer.get_available_device(),
480
  value=self.whisper_inf.diarizer.get_device())
 
 
 
 
481
  with gr.Row():
482
  btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
483
  with gr.Row():