jhj0517 commited on
Commit
19ab4f1
·
1 Parent(s): 6a24751

add faster-whisper parameters

Browse files
Files changed (1) hide show
  1. modules/whisper/whisper_parameter.py +105 -1
modules/whisper/whisper_parameter.py CHANGED
@@ -29,6 +29,22 @@ class WhisperParameters:
29
  is_diarize: gr.Checkbox
30
  hf_token: gr.Textbox
31
  diarization_device: gr.Dropdown
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  """
33
  A data class for Gradio components of the Whisper Parameters. Use "before" Gradio pre-processing.
34
  This data class is used to mitigate the key-value problem between Gradio components and function parameters.
@@ -129,6 +145,62 @@ class WhisperParameters:
129
 
130
  diarization_device: gr.Dropdown
131
  This parameter is related with whisperx. Device to run diarization model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  """
133
 
134
  def as_list(self) -> list:
@@ -177,7 +249,23 @@ class WhisperParameters:
177
  batch_size=args[20],
178
  is_diarize=args[21],
179
  hf_token=args[22],
180
- diarization_device=args[23]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  )
182
 
183
 
@@ -207,6 +295,22 @@ class WhisperValues:
207
  is_diarize: bool
208
  hf_token: str
209
  diarization_device: str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  """
211
  A data class to use Whisper parameters.
212
  """
 
29
  is_diarize: gr.Checkbox
30
  hf_token: gr.Textbox
31
  diarization_device: gr.Dropdown
32
+ length_penalty: gr.Number
33
+ repetition_penalty: gr.Number
34
+ no_repeat_ngram_size: gr.Number
35
+ prefix: gr.Textbox
36
+ suppress_blank: gr.Checkbox
37
+ suppress_tokens: gr.Textbox
38
+ max_initial_timestamp: gr.Number
39
+ word_timestamps: gr.Checkbox
40
+ prepend_punctuations: gr.Textbox
41
+ append_punctuations: gr.Textbox
42
+ max_new_tokens: gr.Number
43
+ chunk_length: gr.Number
44
+ hallucination_silence_threshold: gr.Number
45
+ hotwords: gr.Textbox
46
+ language_detection_threshold: gr.Number
47
+ language_detection_segments: gr.Number
48
  """
49
  A data class for Gradio components of the Whisper Parameters. Use "before" Gradio pre-processing.
50
  This data class is used to mitigate the key-value problem between Gradio components and function parameters.
 
145
 
146
  diarization_device: gr.Dropdown
147
  This parameter is related with whisperx. Device to run diarization model
148
+
149
+ length_penalty:
150
+ This parameter is related to faster-whisper. Exponential length penalty constant.
151
+
152
+ repetition_penalty:
153
+ This parameter is related to faster-whisper. Penalty applied to the score of previously generated tokens
154
+ (set > 1 to penalize).
155
+
156
+ no_repeat_ngram_size:
157
+ This parameter is related to faster-whisper. Prevent repetitions of n-grams with this size (set 0 to disable).
158
+
159
+ prefix:
160
+ This parameter is related to faster-whisper. Optional text to provide as a prefix for the first window.
161
+
162
+ suppress_blank:
163
+ This parameter is related to faster-whisper. Suppress blank outputs at the beginning of the sampling.
164
+
165
+ suppress_tokens:
166
+ This parameter is related to faster-whisper. List of token IDs to suppress. -1 will suppress a default set
167
+ of symbols as defined in the model config.json file.
168
+
169
+ max_initial_timestamp:
170
+ This parameter is related to faster-whisper. The initial timestamp cannot be later than this.
171
+
172
+ word_timestamps:
173
+ This parameter is related to faster-whisper. Extract word-level timestamps using the cross-attention pattern
174
+ and dynamic time warping, and include the timestamps for each word in each segment.
175
+
176
+ prepend_punctuations:
177
+ This parameter is related to faster-whisper. If word_timestamps is True, merge these punctuation symbols
178
+ with the next word.
179
+
180
+ append_punctuations:
181
+ This parameter is related to faster-whisper. If word_timestamps is True, merge these punctuation symbols
182
+ with the previous word.
183
+
184
+ max_new_tokens:
185
+ This parameter is related to faster-whisper. Maximum number of new tokens to generate per-chunk. If not set,
186
+ the maximum will be set by the default max_length.
187
+
188
+ chunk_length:
189
+ This parameter is related to faster-whisper. The length of audio segments. If it is not None, it will overwrite the
190
+ default chunk_length of the FeatureExtractor.
191
+
192
+ hallucination_silence_threshold:
193
+ This parameter is related to faster-whisper. When word_timestamps is True, skip silent periods longer than this threshold
194
+ (in seconds) when a possible hallucination is detected.
195
+
196
+ hotwords:
197
+ This parameter is related to faster-whisper. Hotwords/hint phrases to provide the model with. Has no effect if prefix is not None.
198
+
199
+ language_detection_threshold:
200
+ This parameter is related to faster-whisper. If the maximum probability of the language tokens is higher than this value, the language is detected.
201
+
202
+ language_detection_segments:
203
+ This parameter is related to faster-whisper. Number of segments to consider for the language detection.
204
  """
205
 
206
  def as_list(self) -> list:
 
249
  batch_size=args[20],
250
  is_diarize=args[21],
251
  hf_token=args[22],
252
+ diarization_device=args[23],
253
+ length_penalty=args[24],
254
+ repetition_penalty=args[25],
255
+ no_repeat_ngram_size=args[26],
256
+ prefix=args[27],
257
+ suppress_blank=args[28],
258
+ suppress_tokens=args[29],
259
+ max_initial_timestamp=args[30],
260
+ word_timestamps=args[31],
261
+ prepend_punctuations=args[32],
262
+ append_punctuations=args[33],
263
+ max_new_tokens=args[34],
264
+ chunk_length=args[35],
265
+ hallucination_silence_threshold=args[36],
266
+ hotwords=args[37],
267
+ language_detection_threshold=args[38],
268
+ language_detection_segments=args[39]
269
  )
270
 
271
 
 
295
  is_diarize: bool
296
  hf_token: str
297
  diarization_device: str
298
+ length_penalty: float
299
+ repetition_penalty: float
300
+ no_repeat_ngram_size: int
301
+ prefix: Optional[str]
302
+ suppress_blank: bool
303
+ suppress_tokens: Optional[str]
304
+ max_initial_timestamp: float
305
+ word_timestamps: bool
306
+ prepend_punctuations: Optional[str]
307
+ append_punctuations: Optional[str]
308
+ max_new_tokens: int
309
+ chunk_length: float
310
+ hallucination_silence_threshold: float
311
+ hotwords: Optional[str]
312
+ language_detection_threshold: float
313
+ language_detection_segments: int
314
  """
315
  A data class to use Whisper parameters.
316
  """