jhj0517 commited on
Commit
5206df6
·
1 Parent(s): ac4bff9
Files changed (1) hide show
  1. modules/whisper_data_class.py +10 -0
modules/whisper_data_class.py CHANGED
@@ -1,5 +1,6 @@
1
  from dataclasses import dataclass, fields
2
  import gradio as gr
 
3
 
4
 
5
  @dataclass
@@ -22,26 +23,35 @@ class WhisperGradioComponents:
22
  ----------
23
  model_size: gr.Dropdown
24
  Whisper model size.
 
25
  lang: gr.Dropdown
26
  Source language of the file to transcribe.
 
27
  is_translate: gr.Checkbox
28
  Boolean value that determines whether to translate to English.
29
  It's Whisper's feature to translate speech from another language directly into English end-to-end.
 
30
  beam_size: gr.Number
31
  Int value that is used for decoding option.
 
32
  log_prob_threshold: gr.Number
33
  If the average log probability over sampled tokens is below this value, treat as failed.
 
34
  no_speech_threshold: gr.Number
35
  If the no_speech probability is higher than this value AND
36
  the average log probability over sampled tokens is below `log_prob_threshold`,
37
  consider the segment as silent.
 
38
  compute_type: gr.Dropdown
39
  compute type for transcription.
40
  see more info : https://opennmt.net/CTranslate2/quantization.html
 
41
  best_of: gr.Number
42
  Number of candidates when sampling with non-zero temperature.
 
43
  patience: gr.Number
44
  Beam search patience factor.
 
45
  condition_on_previous_text: bool
46
  if True, the previous output of the model is provided as a prompt for the next window;
47
  disabling may make the text inconsistent across windows, but the model becomes less prone to
 
1
  from dataclasses import dataclass, fields
2
  import gradio as gr
3
+ from typing import Optional
4
 
5
 
6
  @dataclass
 
23
  ----------
24
  model_size: gr.Dropdown
25
  Whisper model size.
26
+
27
  lang: gr.Dropdown
28
  Source language of the file to transcribe.
29
+
30
  is_translate: gr.Checkbox
31
  Boolean value that determines whether to translate to English.
32
  It's Whisper's feature to translate speech from another language directly into English end-to-end.
33
+
34
  beam_size: gr.Number
35
  Int value that is used for decoding option.
36
+
37
  log_prob_threshold: gr.Number
38
  If the average log probability over sampled tokens is below this value, treat as failed.
39
+
40
  no_speech_threshold: gr.Number
41
  If the no_speech probability is higher than this value AND
42
  the average log probability over sampled tokens is below `log_prob_threshold`,
43
  consider the segment as silent.
44
+
45
  compute_type: gr.Dropdown
46
  compute type for transcription.
47
  see more info : https://opennmt.net/CTranslate2/quantization.html
48
+
49
  best_of: gr.Number
50
  Number of candidates when sampling with non-zero temperature.
51
+
52
  patience: gr.Number
53
  Beam search patience factor.
54
+
55
  condition_on_previous_text: bool
56
  if True, the previous output of the model is provided as a prompt for the next window;
57
  disabling may make the text inconsistent across windows, but the model becomes less prone to