jhj0517 commited on
Commit
20f9596
·
1 Parent(s): 364597e

Add `prompt_reset_on_temperature` parameter

Browse files
app.py CHANGED
@@ -78,7 +78,7 @@ class App:
78
  nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True,
79
  info="If the average log probability over sampled tokens is below this value, treat as failed.")
80
  nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True,
81
- info="If the No Speech Probability is higher than this value AND the average log probability over sampled tokens is below 'Log Prob Threshold', consider the segment as silent.")
82
  dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types,
83
  value=self.whisper_inf.current_compute_type, interactive=True,
84
  info="Select the type of computation to perform.")
@@ -89,10 +89,14 @@ class App:
89
  cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True,
90
  interactive=True,
91
  info="Condition on previous text during decoding.")
 
 
 
 
92
  tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True,
93
  info="Initial prompt to use for decoding.")
94
  sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True,
95
- info="Temperature for sampling. It can be a tuple of temperatures, which will be successively used upon failures according to either `compression_ratio_threshold` or `log_prob_threshold`.")
96
  nb_compression_ratio_threshold = gr.Number(label="Compression Ratio Threshold", value=2.4, interactive=True,
97
  info="If the gzip compression ratio is above this value, treat as failed.")
98
  with gr.Group(visible=isinstance(self.whisper_inf, FasterWhisperInference)):
@@ -170,7 +174,8 @@ class App:
170
  append_punctuations=tb_append_punctuations, max_new_tokens=nb_max_new_tokens, chunk_length=nb_chunk_length,
171
  hallucination_silence_threshold=nb_hallucination_silence_threshold, hotwords=tb_hotwords,
172
  language_detection_threshold=nb_language_detection_threshold,
173
- language_detection_segments=nb_language_detection_segments
 
174
  ),
175
  dd_file_format,
176
  cb_timestamp
 
78
  nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True,
79
  info="If the average log probability over sampled tokens is below this value, treat as failed.")
80
  nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True,
81
+ info="If the no speech probability is higher than this value AND the average log probability over sampled tokens is below 'Log Prob Threshold', consider the segment as silent.")
82
  dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types,
83
  value=self.whisper_inf.current_compute_type, interactive=True,
84
  info="Select the type of computation to perform.")
 
89
  cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True,
90
  interactive=True,
91
  info="Condition on previous text during decoding.")
92
+ sld_prompt_reset_on_temperature = gr.Slider(label="Prompt Reset On Temperature", value=0.5,
93
+ minimum=0, maximum=1, step=0.01, interactive=True,
94
+ info="Resets prompt if temperature is above this value."
95
+ " Arg has effect only if 'Condition On Previous Text' is True.")
96
  tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True,
97
  info="Initial prompt to use for decoding.")
98
  sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True,
99
+ info="Temperature for sampling. It can be a tuple of temperatures, which will be successively used upon failures according to either `Compression Ratio Threshold` or `Log Prob Threshold`.")
100
  nb_compression_ratio_threshold = gr.Number(label="Compression Ratio Threshold", value=2.4, interactive=True,
101
  info="If the gzip compression ratio is above this value, treat as failed.")
102
  with gr.Group(visible=isinstance(self.whisper_inf, FasterWhisperInference)):
 
174
  append_punctuations=tb_append_punctuations, max_new_tokens=nb_max_new_tokens, chunk_length=nb_chunk_length,
175
  hallucination_silence_threshold=nb_hallucination_silence_threshold, hotwords=tb_hotwords,
176
  language_detection_threshold=nb_language_detection_threshold,
177
+ language_detection_segments=nb_language_detection_segments,
178
+ prompt_reset_on_temperature=sld_prompt_reset_on_temperature
179
  ),
180
  dd_file_format,
181
  cb_timestamp
modules/whisper/faster_whisper_inference.py CHANGED
@@ -91,7 +91,8 @@ class FasterWhisperInference(WhisperBase):
91
  hallucination_silence_threshold=params.hallucination_silence_threshold,
92
  hotwords=params.hotwords,
93
  language_detection_threshold=params.language_detection_threshold,
94
- language_detection_segments=params.language_detection_segments
 
95
  )
96
  progress(0, desc="Loading audio..")
97
 
 
91
  hallucination_silence_threshold=params.hallucination_silence_threshold,
92
  hotwords=params.hotwords,
93
  language_detection_threshold=params.language_detection_threshold,
94
+ language_detection_segments=params.language_detection_segments,
95
+ prompt_reset_on_temperature=params.prompt_reset_on_temperature
96
  )
97
  progress(0, desc="Loading audio..")
98
 
modules/whisper/whisper_parameter.py CHANGED
@@ -15,6 +15,7 @@ class WhisperParameters:
15
  best_of: gr.Number
16
  patience: gr.Number
17
  condition_on_previous_text: gr.Checkbox
 
18
  initial_prompt: gr.Textbox
19
  temperature: gr.Slider
20
  compression_ratio_threshold: gr.Number
@@ -240,6 +241,7 @@ class WhisperValues:
240
  best_of: int
241
  patience: float
242
  condition_on_previous_text: bool
 
243
  initial_prompt: Optional[str]
244
  temperature: float
245
  compression_ratio_threshold: float
 
15
  best_of: gr.Number
16
  patience: gr.Number
17
  condition_on_previous_text: gr.Checkbox
18
+ prompt_reset_on_temperature: gr.Slider
19
  initial_prompt: gr.Textbox
20
  temperature: gr.Slider
21
  compression_ratio_threshold: gr.Number
 
241
  best_of: int
242
  patience: float
243
  condition_on_previous_text: bool
244
+ prompt_reset_on_temperature: float
245
  initial_prompt: Optional[str]
246
  temperature: float
247
  compression_ratio_threshold: float