Spaces:

jhj0517
/

Whisper-WebUI

Running

jhj0517 commited on Jul 14, 2024

Commit

ac480c2

1 Parent(s): 4da9545

Limit Vad to only faster-whisper

Files changed (2) hide show

modules/whisper/faster_whisper_inference.py CHANGED Viewed

@@ -71,6 +71,20 @@ class FasterWhisperInference(WhisperBase):
         if not params.hotwords:
             params.hotwords = None
         params.suppress_tokens = self.format_suppress_tokens_str(params.suppress_tokens)
         segments, info = self.model.transcribe(
@@ -100,7 +114,9 @@ class FasterWhisperInference(WhisperBase):
             hotwords=params.hotwords,
             language_detection_threshold=params.language_detection_threshold,
             language_detection_segments=params.language_detection_segments,
-            prompt_reset_on_temperature=params.prompt_reset_on_temperature
         )
         progress(0, desc="Loading audio..")

         if not params.hotwords:
             params.hotwords = None
+        vad_options = None
+        if params.vad_filter:
+            # Explicit value set for float('inf') from gr.Number()
+            if params.max_speech_duration_s >= 9999:
+                params.max_speech_duration_s = float('inf')
+            vad_options = VadOptions(
+                threshold=params.threshold,
+                min_speech_duration_ms=params.min_speech_duration_ms,
+                max_speech_duration_s=params.max_speech_duration_s,
+                min_silence_duration_ms=params.min_silence_duration_ms,
+                speech_pad_ms=params.speech_pad_ms
+            )
         params.suppress_tokens = self.format_suppress_tokens_str(params.suppress_tokens)
         segments, info = self.model.transcribe(
             hotwords=params.hotwords,
             language_detection_threshold=params.language_detection_threshold,
             language_detection_segments=params.language_detection_segments,
+            prompt_reset_on_temperature=params.prompt_reset_on_temperature,
+            vad_filter=params.vad_filter,
+            vad_parameters=vad_options
         )
         progress(0, desc="Loading audio..")

modules/whisper/whisper_base.py CHANGED Viewed

@@ -85,20 +85,6 @@ class WhisperBase(ABC):
         """
         params = WhisperParameters.as_value(*whisper_params)
-        if params.vad_filter:
-            vad_options = VadOptions(
-                threshold=params.threshold,
-                min_speech_duration_ms=params.min_speech_duration_ms,
-                max_speech_duration_s=params.max_speech_duration_s,
-                min_silence_duration_ms=params.min_silence_duration_ms,
-                speech_pad_ms=params.speech_pad_ms
-            )
-            audio = self.vad.run(
-                audio=audio,
-                vad_parameters=vad_options,
-                progress=progress
-            )
         if params.lang == "Automatic Detection":
             params.lang = None
         else:

         """
         params = WhisperParameters.as_value(*whisper_params)
         if params.lang == "Automatic Detection":
             params.lang = None
         else: