jhj0517 commited on
Commit
ac480c2
·
1 Parent(s): 4da9545

Limit Vad to only faster-whisper

Browse files
modules/whisper/faster_whisper_inference.py CHANGED
@@ -71,6 +71,20 @@ class FasterWhisperInference(WhisperBase):
71
  if not params.hotwords:
72
  params.hotwords = None
73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  params.suppress_tokens = self.format_suppress_tokens_str(params.suppress_tokens)
75
 
76
  segments, info = self.model.transcribe(
@@ -100,7 +114,9 @@ class FasterWhisperInference(WhisperBase):
100
  hotwords=params.hotwords,
101
  language_detection_threshold=params.language_detection_threshold,
102
  language_detection_segments=params.language_detection_segments,
103
- prompt_reset_on_temperature=params.prompt_reset_on_temperature
 
 
104
  )
105
  progress(0, desc="Loading audio..")
106
 
 
71
  if not params.hotwords:
72
  params.hotwords = None
73
 
74
+ vad_options = None
75
+ if params.vad_filter:
76
+ # Explicit value set for float('inf') from gr.Number()
77
+ if params.max_speech_duration_s >= 9999:
78
+ params.max_speech_duration_s = float('inf')
79
+
80
+ vad_options = VadOptions(
81
+ threshold=params.threshold,
82
+ min_speech_duration_ms=params.min_speech_duration_ms,
83
+ max_speech_duration_s=params.max_speech_duration_s,
84
+ min_silence_duration_ms=params.min_silence_duration_ms,
85
+ speech_pad_ms=params.speech_pad_ms
86
+ )
87
+
88
  params.suppress_tokens = self.format_suppress_tokens_str(params.suppress_tokens)
89
 
90
  segments, info = self.model.transcribe(
 
114
  hotwords=params.hotwords,
115
  language_detection_threshold=params.language_detection_threshold,
116
  language_detection_segments=params.language_detection_segments,
117
+ prompt_reset_on_temperature=params.prompt_reset_on_temperature,
118
+ vad_filter=params.vad_filter,
119
+ vad_parameters=vad_options
120
  )
121
  progress(0, desc="Loading audio..")
122
 
modules/whisper/whisper_base.py CHANGED
@@ -85,20 +85,6 @@ class WhisperBase(ABC):
85
  """
86
  params = WhisperParameters.as_value(*whisper_params)
87
 
88
- if params.vad_filter:
89
- vad_options = VadOptions(
90
- threshold=params.threshold,
91
- min_speech_duration_ms=params.min_speech_duration_ms,
92
- max_speech_duration_s=params.max_speech_duration_s,
93
- min_silence_duration_ms=params.min_silence_duration_ms,
94
- speech_pad_ms=params.speech_pad_ms
95
- )
96
- audio = self.vad.run(
97
- audio=audio,
98
- vad_parameters=vad_options,
99
- progress=progress
100
- )
101
-
102
  if params.lang == "Automatic Detection":
103
  params.lang = None
104
  else:
 
85
  """
86
  params = WhisperParameters.as_value(*whisper_params)
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  if params.lang == "Automatic Detection":
89
  params.lang = None
90
  else: