Spaces:
Running
Running
jhj0517
commited on
Commit
·
20f9596
1
Parent(s):
364597e
Add `prompt_reset_on_temperature` parameter
Browse files
app.py
CHANGED
@@ -78,7 +78,7 @@ class App:
|
|
78 |
nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True,
|
79 |
info="If the average log probability over sampled tokens is below this value, treat as failed.")
|
80 |
nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True,
|
81 |
-
info="If the
|
82 |
dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types,
|
83 |
value=self.whisper_inf.current_compute_type, interactive=True,
|
84 |
info="Select the type of computation to perform.")
|
@@ -89,10 +89,14 @@ class App:
|
|
89 |
cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True,
|
90 |
interactive=True,
|
91 |
info="Condition on previous text during decoding.")
|
|
|
|
|
|
|
|
|
92 |
tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True,
|
93 |
info="Initial prompt to use for decoding.")
|
94 |
sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True,
|
95 |
-
info="Temperature for sampling. It can be a tuple of temperatures, which will be successively used upon failures according to either `
|
96 |
nb_compression_ratio_threshold = gr.Number(label="Compression Ratio Threshold", value=2.4, interactive=True,
|
97 |
info="If the gzip compression ratio is above this value, treat as failed.")
|
98 |
with gr.Group(visible=isinstance(self.whisper_inf, FasterWhisperInference)):
|
@@ -170,7 +174,8 @@ class App:
|
|
170 |
append_punctuations=tb_append_punctuations, max_new_tokens=nb_max_new_tokens, chunk_length=nb_chunk_length,
|
171 |
hallucination_silence_threshold=nb_hallucination_silence_threshold, hotwords=tb_hotwords,
|
172 |
language_detection_threshold=nb_language_detection_threshold,
|
173 |
-
language_detection_segments=nb_language_detection_segments
|
|
|
174 |
),
|
175 |
dd_file_format,
|
176 |
cb_timestamp
|
|
|
78 |
nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True,
|
79 |
info="If the average log probability over sampled tokens is below this value, treat as failed.")
|
80 |
nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True,
|
81 |
+
info="If the no speech probability is higher than this value AND the average log probability over sampled tokens is below 'Log Prob Threshold', consider the segment as silent.")
|
82 |
dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types,
|
83 |
value=self.whisper_inf.current_compute_type, interactive=True,
|
84 |
info="Select the type of computation to perform.")
|
|
|
89 |
cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True,
|
90 |
interactive=True,
|
91 |
info="Condition on previous text during decoding.")
|
92 |
+
sld_prompt_reset_on_temperature = gr.Slider(label="Prompt Reset On Temperature", value=0.5,
|
93 |
+
minimum=0, maximum=1, step=0.01, interactive=True,
|
94 |
+
info="Resets prompt if temperature is above this value."
|
95 |
+
" Arg has effect only if 'Condition On Previous Text' is True.")
|
96 |
tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True,
|
97 |
info="Initial prompt to use for decoding.")
|
98 |
sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True,
|
99 |
+
info="Temperature for sampling. It can be a tuple of temperatures, which will be successively used upon failures according to either `Compression Ratio Threshold` or `Log Prob Threshold`.")
|
100 |
nb_compression_ratio_threshold = gr.Number(label="Compression Ratio Threshold", value=2.4, interactive=True,
|
101 |
info="If the gzip compression ratio is above this value, treat as failed.")
|
102 |
with gr.Group(visible=isinstance(self.whisper_inf, FasterWhisperInference)):
|
|
|
174 |
append_punctuations=tb_append_punctuations, max_new_tokens=nb_max_new_tokens, chunk_length=nb_chunk_length,
|
175 |
hallucination_silence_threshold=nb_hallucination_silence_threshold, hotwords=tb_hotwords,
|
176 |
language_detection_threshold=nb_language_detection_threshold,
|
177 |
+
language_detection_segments=nb_language_detection_segments,
|
178 |
+
prompt_reset_on_temperature=sld_prompt_reset_on_temperature
|
179 |
),
|
180 |
dd_file_format,
|
181 |
cb_timestamp
|
modules/whisper/faster_whisper_inference.py
CHANGED
@@ -91,7 +91,8 @@ class FasterWhisperInference(WhisperBase):
|
|
91 |
hallucination_silence_threshold=params.hallucination_silence_threshold,
|
92 |
hotwords=params.hotwords,
|
93 |
language_detection_threshold=params.language_detection_threshold,
|
94 |
-
language_detection_segments=params.language_detection_segments
|
|
|
95 |
)
|
96 |
progress(0, desc="Loading audio..")
|
97 |
|
|
|
91 |
hallucination_silence_threshold=params.hallucination_silence_threshold,
|
92 |
hotwords=params.hotwords,
|
93 |
language_detection_threshold=params.language_detection_threshold,
|
94 |
+
language_detection_segments=params.language_detection_segments,
|
95 |
+
prompt_reset_on_temperature=params.prompt_reset_on_temperature
|
96 |
)
|
97 |
progress(0, desc="Loading audio..")
|
98 |
|
modules/whisper/whisper_parameter.py
CHANGED
@@ -15,6 +15,7 @@ class WhisperParameters:
|
|
15 |
best_of: gr.Number
|
16 |
patience: gr.Number
|
17 |
condition_on_previous_text: gr.Checkbox
|
|
|
18 |
initial_prompt: gr.Textbox
|
19 |
temperature: gr.Slider
|
20 |
compression_ratio_threshold: gr.Number
|
@@ -240,6 +241,7 @@ class WhisperValues:
|
|
240 |
best_of: int
|
241 |
patience: float
|
242 |
condition_on_previous_text: bool
|
|
|
243 |
initial_prompt: Optional[str]
|
244 |
temperature: float
|
245 |
compression_ratio_threshold: float
|
|
|
15 |
best_of: gr.Number
|
16 |
patience: gr.Number
|
17 |
condition_on_previous_text: gr.Checkbox
|
18 |
+
prompt_reset_on_temperature: gr.Slider
|
19 |
initial_prompt: gr.Textbox
|
20 |
temperature: gr.Slider
|
21 |
compression_ratio_threshold: gr.Number
|
|
|
241 |
best_of: int
|
242 |
patience: float
|
243 |
condition_on_previous_text: bool
|
244 |
+
prompt_reset_on_temperature: float
|
245 |
initial_prompt: Optional[str]
|
246 |
temperature: float
|
247 |
compression_ratio_threshold: float
|