Spaces:

jhj0517
/

Whisper-WebUI

Running

App Files Files Community

jhj0517 commited on Jun 25, 2024

Commit

f1d9939

1 Parent(s): a230be5

add diarization parameter

Browse files

Files changed (2) hide show

app.py +12 -3
modules/whisper_parameter.py +13 -1

app.py CHANGED Viewed

@@ -109,6 +109,9 @@ class App:
                     with gr.Accordion("Insanely Fast Whisper Parameters", open=False, visible=isinstance(self.whisper_inf, InsanelyFastWhisperInference)):
                         nb_chunk_length_s = gr.Number(label="Chunk Lengths (sec)", value=30, precision=0)
                         nb_batch_size = gr.Number(label="Batch Size", value=24, precision=0)
                     with gr.Row():
                         btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
                     with gr.Row():
@@ -138,7 +141,9 @@ class App:
                                                        window_size_sample=nb_window_size_sample,
                                                        speech_pad_ms=nb_speech_pad_ms,
                                                        chunk_length_s=nb_chunk_length_s,
-                                                       batch_size=nb_batch_size)
                     btn_run.click(fn=self.whisper_inf.transcribe_file,
                                   inputs=params + whisper_params.to_list(),
@@ -218,7 +223,9 @@ class App:
                                                        window_size_sample=nb_window_size_sample,
                                                        speech_pad_ms=nb_speech_pad_ms,
                                                        chunk_length_s=nb_chunk_length_s,
-                                                       batch_size=nb_batch_size)
                     btn_run.click(fn=self.whisper_inf.transcribe_youtube,
                                   inputs=params + whisper_params.to_list(),
@@ -290,7 +297,9 @@ class App:
                                                        window_size_sample=nb_window_size_sample,
                                                        speech_pad_ms=nb_speech_pad_ms,
                                                        chunk_length_s=nb_chunk_length_s,
-                                                       batch_size=nb_batch_size)
                     btn_run.click(fn=self.whisper_inf.transcribe_mic,
                                   inputs=params + whisper_params.to_list(),

                     with gr.Accordion("Insanely Fast Whisper Parameters", open=False, visible=isinstance(self.whisper_inf, InsanelyFastWhisperInference)):
                         nb_chunk_length_s = gr.Number(label="Chunk Lengths (sec)", value=30, precision=0)
                         nb_batch_size = gr.Number(label="Batch Size", value=24, precision=0)
+                    with gr.Accordion("Diarization Parameters", open=False):
+                        cb_diarize = gr.Checkbox(label="Enable Diarization")
+                        tb_hf_token = gr.Text(label="HuggingFace Token", value="")
                     with gr.Row():
                         btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
                     with gr.Row():
                                                        window_size_sample=nb_window_size_sample,
                                                        speech_pad_ms=nb_speech_pad_ms,
                                                        chunk_length_s=nb_chunk_length_s,
+                                                       batch_size=nb_batch_size,
+                                                       is_diarize=cb_diarize,
+                                                       hf_token=tb_hf_token)
                     btn_run.click(fn=self.whisper_inf.transcribe_file,
                                   inputs=params + whisper_params.to_list(),
                                                        window_size_sample=nb_window_size_sample,
                                                        speech_pad_ms=nb_speech_pad_ms,
                                                        chunk_length_s=nb_chunk_length_s,
+                                                       batch_size=nb_batch_size,
+                                                       is_diarize=cb_diarize,
+                                                       hf_token=tb_hf_token)
                     btn_run.click(fn=self.whisper_inf.transcribe_youtube,
                                   inputs=params + whisper_params.to_list(),
                                                        window_size_sample=nb_window_size_sample,
                                                        speech_pad_ms=nb_speech_pad_ms,
                                                        chunk_length_s=nb_chunk_length_s,
+                                                       batch_size=nb_batch_size,
+                                                       is_diarize=cb_diarize,
+                                                       hf_token=tb_hf_token)
                     btn_run.click(fn=self.whisper_inf.transcribe_mic,
                                   inputs=params + whisper_params.to_list(),

modules/whisper_parameter.py CHANGED Viewed

@@ -27,6 +27,8 @@ class WhisperParameters:
     speech_pad_ms: gr.Number
     chunk_length_s: gr.Number
     batch_size: gr.Number
     """
     A data class for Gradio components of the Whisper Parameters. Use "before" Gradio pre-processing.
     This data class is used to mitigate the key-value problem between Gradio components and function parameters.
@@ -122,6 +124,12 @@ class WhisperParameters:
     batch_size: gr.Number
         This parameter is related with insanely-fast-whisper pipe. Batch size to pass to the pipe
     """
     def to_list(self) -> list:
@@ -168,7 +176,9 @@ class WhisperParameters:
             window_size_samples=args[18],
             speech_pad_ms=args[19],
             chunk_length_s=args[20],
-            batch_size=args[21]
         )
@@ -196,6 +206,8 @@ class WhisperValues:
     speech_pad_ms: int
     chunk_length_s: int
     batch_size: int
     """
     A data class to use Whisper parameters.
     """

     speech_pad_ms: gr.Number
     chunk_length_s: gr.Number
     batch_size: gr.Number
+    is_diarize: gr.Checkbox
+    hf_token: gr.Textbox
     """
     A data class for Gradio components of the Whisper Parameters. Use "before" Gradio pre-processing.
     This data class is used to mitigate the key-value problem between Gradio components and function parameters.
     batch_size: gr.Number
         This parameter is related with insanely-fast-whisper pipe. Batch size to pass to the pipe
+    is_diarize: gr.Checkbox
+        This parameter is related with whisperx. Boolean value that determines whether to diarize or not.
+    hf_token: gr.Textbox
+        This parameter is related with whisperx. Huggingface token is needed to download diarization models.
     """
     def to_list(self) -> list:
             window_size_samples=args[18],
             speech_pad_ms=args[19],
             chunk_length_s=args[20],
+            batch_size=args[21],
+            is_diarize=args[22],
+            hf_token=args[23],
         )
     speech_pad_ms: int
     chunk_length_s: int
     batch_size: int
+    is_diarize: bool
+    hf_token: str
     """
     A data class to use Whisper parameters.
     """