Spaces:
Running
Running
jhj0517
commited on
Commit
·
f1d9939
1
Parent(s):
a230be5
add diarization parameter
Browse files- app.py +12 -3
- modules/whisper_parameter.py +13 -1
app.py
CHANGED
@@ -109,6 +109,9 @@ class App:
|
|
109 |
with gr.Accordion("Insanely Fast Whisper Parameters", open=False, visible=isinstance(self.whisper_inf, InsanelyFastWhisperInference)):
|
110 |
nb_chunk_length_s = gr.Number(label="Chunk Lengths (sec)", value=30, precision=0)
|
111 |
nb_batch_size = gr.Number(label="Batch Size", value=24, precision=0)
|
|
|
|
|
|
|
112 |
with gr.Row():
|
113 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
114 |
with gr.Row():
|
@@ -138,7 +141,9 @@ class App:
|
|
138 |
window_size_sample=nb_window_size_sample,
|
139 |
speech_pad_ms=nb_speech_pad_ms,
|
140 |
chunk_length_s=nb_chunk_length_s,
|
141 |
-
batch_size=nb_batch_size
|
|
|
|
|
142 |
|
143 |
btn_run.click(fn=self.whisper_inf.transcribe_file,
|
144 |
inputs=params + whisper_params.to_list(),
|
@@ -218,7 +223,9 @@ class App:
|
|
218 |
window_size_sample=nb_window_size_sample,
|
219 |
speech_pad_ms=nb_speech_pad_ms,
|
220 |
chunk_length_s=nb_chunk_length_s,
|
221 |
-
batch_size=nb_batch_size
|
|
|
|
|
222 |
|
223 |
btn_run.click(fn=self.whisper_inf.transcribe_youtube,
|
224 |
inputs=params + whisper_params.to_list(),
|
@@ -290,7 +297,9 @@ class App:
|
|
290 |
window_size_sample=nb_window_size_sample,
|
291 |
speech_pad_ms=nb_speech_pad_ms,
|
292 |
chunk_length_s=nb_chunk_length_s,
|
293 |
-
batch_size=nb_batch_size
|
|
|
|
|
294 |
|
295 |
btn_run.click(fn=self.whisper_inf.transcribe_mic,
|
296 |
inputs=params + whisper_params.to_list(),
|
|
|
109 |
with gr.Accordion("Insanely Fast Whisper Parameters", open=False, visible=isinstance(self.whisper_inf, InsanelyFastWhisperInference)):
|
110 |
nb_chunk_length_s = gr.Number(label="Chunk Lengths (sec)", value=30, precision=0)
|
111 |
nb_batch_size = gr.Number(label="Batch Size", value=24, precision=0)
|
112 |
+
with gr.Accordion("Diarization Parameters", open=False):
|
113 |
+
cb_diarize = gr.Checkbox(label="Enable Diarization")
|
114 |
+
tb_hf_token = gr.Text(label="HuggingFace Token", value="")
|
115 |
with gr.Row():
|
116 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
117 |
with gr.Row():
|
|
|
141 |
window_size_sample=nb_window_size_sample,
|
142 |
speech_pad_ms=nb_speech_pad_ms,
|
143 |
chunk_length_s=nb_chunk_length_s,
|
144 |
+
batch_size=nb_batch_size,
|
145 |
+
is_diarize=cb_diarize,
|
146 |
+
hf_token=tb_hf_token)
|
147 |
|
148 |
btn_run.click(fn=self.whisper_inf.transcribe_file,
|
149 |
inputs=params + whisper_params.to_list(),
|
|
|
223 |
window_size_sample=nb_window_size_sample,
|
224 |
speech_pad_ms=nb_speech_pad_ms,
|
225 |
chunk_length_s=nb_chunk_length_s,
|
226 |
+
batch_size=nb_batch_size,
|
227 |
+
is_diarize=cb_diarize,
|
228 |
+
hf_token=tb_hf_token)
|
229 |
|
230 |
btn_run.click(fn=self.whisper_inf.transcribe_youtube,
|
231 |
inputs=params + whisper_params.to_list(),
|
|
|
297 |
window_size_sample=nb_window_size_sample,
|
298 |
speech_pad_ms=nb_speech_pad_ms,
|
299 |
chunk_length_s=nb_chunk_length_s,
|
300 |
+
batch_size=nb_batch_size,
|
301 |
+
is_diarize=cb_diarize,
|
302 |
+
hf_token=tb_hf_token)
|
303 |
|
304 |
btn_run.click(fn=self.whisper_inf.transcribe_mic,
|
305 |
inputs=params + whisper_params.to_list(),
|
modules/whisper_parameter.py
CHANGED
@@ -27,6 +27,8 @@ class WhisperParameters:
|
|
27 |
speech_pad_ms: gr.Number
|
28 |
chunk_length_s: gr.Number
|
29 |
batch_size: gr.Number
|
|
|
|
|
30 |
"""
|
31 |
A data class for Gradio components of the Whisper Parameters. Use "before" Gradio pre-processing.
|
32 |
This data class is used to mitigate the key-value problem between Gradio components and function parameters.
|
@@ -122,6 +124,12 @@ class WhisperParameters:
|
|
122 |
|
123 |
batch_size: gr.Number
|
124 |
This parameter is related with insanely-fast-whisper pipe. Batch size to pass to the pipe
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
"""
|
126 |
|
127 |
def to_list(self) -> list:
|
@@ -168,7 +176,9 @@ class WhisperParameters:
|
|
168 |
window_size_samples=args[18],
|
169 |
speech_pad_ms=args[19],
|
170 |
chunk_length_s=args[20],
|
171 |
-
batch_size=args[21]
|
|
|
|
|
172 |
)
|
173 |
|
174 |
|
@@ -196,6 +206,8 @@ class WhisperValues:
|
|
196 |
speech_pad_ms: int
|
197 |
chunk_length_s: int
|
198 |
batch_size: int
|
|
|
|
|
199 |
"""
|
200 |
A data class to use Whisper parameters.
|
201 |
"""
|
|
|
27 |
speech_pad_ms: gr.Number
|
28 |
chunk_length_s: gr.Number
|
29 |
batch_size: gr.Number
|
30 |
+
is_diarize: gr.Checkbox
|
31 |
+
hf_token: gr.Textbox
|
32 |
"""
|
33 |
A data class for Gradio components of the Whisper Parameters. Use "before" Gradio pre-processing.
|
34 |
This data class is used to mitigate the key-value problem between Gradio components and function parameters.
|
|
|
124 |
|
125 |
batch_size: gr.Number
|
126 |
This parameter is related with insanely-fast-whisper pipe. Batch size to pass to the pipe
|
127 |
+
|
128 |
+
is_diarize: gr.Checkbox
|
129 |
+
This parameter is related with whisperx. Boolean value that determines whether to diarize or not.
|
130 |
+
|
131 |
+
hf_token: gr.Textbox
|
132 |
+
This parameter is related with whisperx. Huggingface token is needed to download diarization models.
|
133 |
"""
|
134 |
|
135 |
def to_list(self) -> list:
|
|
|
176 |
window_size_samples=args[18],
|
177 |
speech_pad_ms=args[19],
|
178 |
chunk_length_s=args[20],
|
179 |
+
batch_size=args[21],
|
180 |
+
is_diarize=args[22],
|
181 |
+
hf_token=args[23],
|
182 |
)
|
183 |
|
184 |
|
|
|
206 |
speech_pad_ms: int
|
207 |
chunk_length_s: int
|
208 |
batch_size: int
|
209 |
+
is_diarize: bool
|
210 |
+
hf_token: str
|
211 |
"""
|
212 |
A data class to use Whisper parameters.
|
213 |
"""
|