jhj0517 commited on
Commit
f1d9939
·
1 Parent(s): a230be5

add diarization parameter

Browse files
Files changed (2) hide show
  1. app.py +12 -3
  2. modules/whisper_parameter.py +13 -1
app.py CHANGED
@@ -109,6 +109,9 @@ class App:
109
  with gr.Accordion("Insanely Fast Whisper Parameters", open=False, visible=isinstance(self.whisper_inf, InsanelyFastWhisperInference)):
110
  nb_chunk_length_s = gr.Number(label="Chunk Lengths (sec)", value=30, precision=0)
111
  nb_batch_size = gr.Number(label="Batch Size", value=24, precision=0)
 
 
 
112
  with gr.Row():
113
  btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
114
  with gr.Row():
@@ -138,7 +141,9 @@ class App:
138
  window_size_sample=nb_window_size_sample,
139
  speech_pad_ms=nb_speech_pad_ms,
140
  chunk_length_s=nb_chunk_length_s,
141
- batch_size=nb_batch_size)
 
 
142
 
143
  btn_run.click(fn=self.whisper_inf.transcribe_file,
144
  inputs=params + whisper_params.to_list(),
@@ -218,7 +223,9 @@ class App:
218
  window_size_sample=nb_window_size_sample,
219
  speech_pad_ms=nb_speech_pad_ms,
220
  chunk_length_s=nb_chunk_length_s,
221
- batch_size=nb_batch_size)
 
 
222
 
223
  btn_run.click(fn=self.whisper_inf.transcribe_youtube,
224
  inputs=params + whisper_params.to_list(),
@@ -290,7 +297,9 @@ class App:
290
  window_size_sample=nb_window_size_sample,
291
  speech_pad_ms=nb_speech_pad_ms,
292
  chunk_length_s=nb_chunk_length_s,
293
- batch_size=nb_batch_size)
 
 
294
 
295
  btn_run.click(fn=self.whisper_inf.transcribe_mic,
296
  inputs=params + whisper_params.to_list(),
 
109
  with gr.Accordion("Insanely Fast Whisper Parameters", open=False, visible=isinstance(self.whisper_inf, InsanelyFastWhisperInference)):
110
  nb_chunk_length_s = gr.Number(label="Chunk Lengths (sec)", value=30, precision=0)
111
  nb_batch_size = gr.Number(label="Batch Size", value=24, precision=0)
112
+ with gr.Accordion("Diarization Parameters", open=False):
113
+ cb_diarize = gr.Checkbox(label="Enable Diarization")
114
+ tb_hf_token = gr.Text(label="HuggingFace Token", value="")
115
  with gr.Row():
116
  btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
117
  with gr.Row():
 
141
  window_size_sample=nb_window_size_sample,
142
  speech_pad_ms=nb_speech_pad_ms,
143
  chunk_length_s=nb_chunk_length_s,
144
+ batch_size=nb_batch_size,
145
+ is_diarize=cb_diarize,
146
+ hf_token=tb_hf_token)
147
 
148
  btn_run.click(fn=self.whisper_inf.transcribe_file,
149
  inputs=params + whisper_params.to_list(),
 
223
  window_size_sample=nb_window_size_sample,
224
  speech_pad_ms=nb_speech_pad_ms,
225
  chunk_length_s=nb_chunk_length_s,
226
+ batch_size=nb_batch_size,
227
+ is_diarize=cb_diarize,
228
+ hf_token=tb_hf_token)
229
 
230
  btn_run.click(fn=self.whisper_inf.transcribe_youtube,
231
  inputs=params + whisper_params.to_list(),
 
297
  window_size_sample=nb_window_size_sample,
298
  speech_pad_ms=nb_speech_pad_ms,
299
  chunk_length_s=nb_chunk_length_s,
300
+ batch_size=nb_batch_size,
301
+ is_diarize=cb_diarize,
302
+ hf_token=tb_hf_token)
303
 
304
  btn_run.click(fn=self.whisper_inf.transcribe_mic,
305
  inputs=params + whisper_params.to_list(),
modules/whisper_parameter.py CHANGED
@@ -27,6 +27,8 @@ class WhisperParameters:
27
  speech_pad_ms: gr.Number
28
  chunk_length_s: gr.Number
29
  batch_size: gr.Number
 
 
30
  """
31
  A data class for Gradio components of the Whisper Parameters. Use "before" Gradio pre-processing.
32
  This data class is used to mitigate the key-value problem between Gradio components and function parameters.
@@ -122,6 +124,12 @@ class WhisperParameters:
122
 
123
  batch_size: gr.Number
124
  This parameter is related with insanely-fast-whisper pipe. Batch size to pass to the pipe
 
 
 
 
 
 
125
  """
126
 
127
  def to_list(self) -> list:
@@ -168,7 +176,9 @@ class WhisperParameters:
168
  window_size_samples=args[18],
169
  speech_pad_ms=args[19],
170
  chunk_length_s=args[20],
171
- batch_size=args[21]
 
 
172
  )
173
 
174
 
@@ -196,6 +206,8 @@ class WhisperValues:
196
  speech_pad_ms: int
197
  chunk_length_s: int
198
  batch_size: int
 
 
199
  """
200
  A data class to use Whisper parameters.
201
  """
 
27
  speech_pad_ms: gr.Number
28
  chunk_length_s: gr.Number
29
  batch_size: gr.Number
30
+ is_diarize: gr.Checkbox
31
+ hf_token: gr.Textbox
32
  """
33
  A data class for Gradio components of the Whisper Parameters. Use "before" Gradio pre-processing.
34
  This data class is used to mitigate the key-value problem between Gradio components and function parameters.
 
124
 
125
  batch_size: gr.Number
126
  This parameter is related with insanely-fast-whisper pipe. Batch size to pass to the pipe
127
+
128
+ is_diarize: gr.Checkbox
129
+ This parameter is related with whisperx. Boolean value that determines whether to diarize or not.
130
+
131
+ hf_token: gr.Textbox
132
+ This parameter is related with whisperx. Huggingface token is needed to download diarization models.
133
  """
134
 
135
  def to_list(self) -> list:
 
176
  window_size_samples=args[18],
177
  speech_pad_ms=args[19],
178
  chunk_length_s=args[20],
179
+ batch_size=args[21],
180
+ is_diarize=args[22],
181
+ hf_token=args[23],
182
  )
183
 
184
 
 
206
  speech_pad_ms: int
207
  chunk_length_s: int
208
  batch_size: int
209
+ is_diarize: bool
210
+ hf_token: str
211
  """
212
  A data class to use Whisper parameters.
213
  """