Spaces:
Running
Running
Merge pull request #151 from jhj0517/feature/add-params
Browse files- app.py +8 -3
- modules/faster_whisper_inference.py +2 -1
- modules/whisper_Inference.py +2 -1
- modules/whisper_parameter.py +7 -1
app.py
CHANGED
@@ -68,6 +68,7 @@ class App:
|
|
68 |
cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
|
69 |
tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
|
70 |
sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True)
|
|
|
71 |
with gr.Row():
|
72 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
73 |
with gr.Row():
|
@@ -87,7 +88,8 @@ class App:
|
|
87 |
patience=nb_patience,
|
88 |
condition_on_previous_text=cb_condition_on_previous_text,
|
89 |
initial_prompt=tb_initial_prompt,
|
90 |
-
temperature=sd_temperature
|
|
|
91 |
btn_run.click(fn=self.whisper_inf.transcribe_file,
|
92 |
inputs=params + whisper_params.to_list(),
|
93 |
outputs=[tb_indicator, files_subtitles])
|
@@ -124,6 +126,7 @@ class App:
|
|
124 |
cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
|
125 |
tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
|
126 |
sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True)
|
|
|
127 |
with gr.Row():
|
128 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
129 |
with gr.Row():
|
@@ -143,7 +146,8 @@ class App:
|
|
143 |
patience=nb_patience,
|
144 |
condition_on_previous_text=cb_condition_on_previous_text,
|
145 |
initial_prompt=tb_initial_prompt,
|
146 |
-
temperature=sd_temperature
|
|
|
147 |
btn_run.click(fn=self.whisper_inf.transcribe_youtube,
|
148 |
inputs=params + whisper_params.to_list(),
|
149 |
outputs=[tb_indicator, files_subtitles])
|
@@ -192,7 +196,8 @@ class App:
|
|
192 |
patience=nb_patience,
|
193 |
condition_on_previous_text=cb_condition_on_previous_text,
|
194 |
initial_prompt=tb_initial_prompt,
|
195 |
-
temperature=sd_temperature
|
|
|
196 |
btn_run.click(fn=self.whisper_inf.transcribe_mic,
|
197 |
inputs=params + whisper_params.to_list(),
|
198 |
outputs=[tb_indicator, files_subtitles])
|
|
|
68 |
cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
|
69 |
tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
|
70 |
sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True)
|
71 |
+
nb_compression_ratio_threshold = gr.Number(label="Compression Ratio Threshold", value=2.4, interactive=True)
|
72 |
with gr.Row():
|
73 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
74 |
with gr.Row():
|
|
|
88 |
patience=nb_patience,
|
89 |
condition_on_previous_text=cb_condition_on_previous_text,
|
90 |
initial_prompt=tb_initial_prompt,
|
91 |
+
temperature=sd_temperature,
|
92 |
+
compression_ratio_threshold=nb_compression_ratio_threshold)
|
93 |
btn_run.click(fn=self.whisper_inf.transcribe_file,
|
94 |
inputs=params + whisper_params.to_list(),
|
95 |
outputs=[tb_indicator, files_subtitles])
|
|
|
126 |
cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
|
127 |
tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
|
128 |
sd_temperature = gr.Slider(label="Temperature", value=0, step=0.01, maximum=1.0, interactive=True)
|
129 |
+
nb_compression_ratio_threshold = gr.Number(label="Compression Ratio Threshold", value=2.4, interactive=True)
|
130 |
with gr.Row():
|
131 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
132 |
with gr.Row():
|
|
|
146 |
patience=nb_patience,
|
147 |
condition_on_previous_text=cb_condition_on_previous_text,
|
148 |
initial_prompt=tb_initial_prompt,
|
149 |
+
temperature=sd_temperature,
|
150 |
+
compression_ratio_threshold=nb_compression_ratio_threshold)
|
151 |
btn_run.click(fn=self.whisper_inf.transcribe_youtube,
|
152 |
inputs=params + whisper_params.to_list(),
|
153 |
outputs=[tb_indicator, files_subtitles])
|
|
|
196 |
patience=nb_patience,
|
197 |
condition_on_previous_text=cb_condition_on_previous_text,
|
198 |
initial_prompt=tb_initial_prompt,
|
199 |
+
temperature=sd_temperature,
|
200 |
+
compression_ratio_threshold=nb_compression_ratio_threshold)
|
201 |
btn_run.click(fn=self.whisper_inf.transcribe_mic,
|
202 |
inputs=params + whisper_params.to_list(),
|
203 |
outputs=[tb_indicator, files_subtitles])
|
modules/faster_whisper_inference.py
CHANGED
@@ -269,7 +269,8 @@ class FasterWhisperInference(BaseInterface):
|
|
269 |
no_speech_threshold=params.no_speech_threshold,
|
270 |
best_of=params.best_of,
|
271 |
patience=params.patience,
|
272 |
-
temperature=params.temperature
|
|
|
273 |
)
|
274 |
progress(0, desc="Loading audio..")
|
275 |
|
|
|
269 |
no_speech_threshold=params.no_speech_threshold,
|
270 |
best_of=params.best_of,
|
271 |
patience=params.patience,
|
272 |
+
temperature=params.temperature,
|
273 |
+
compression_ratio_threshold=params.compression_ratio_threshold,
|
274 |
)
|
275 |
progress(0, desc="Loading audio..")
|
276 |
|
modules/whisper_Inference.py
CHANGED
@@ -258,7 +258,8 @@ class WhisperInference(BaseInterface):
|
|
258 |
best_of=params.best_of,
|
259 |
patience=params.patience,
|
260 |
temperature=params.temperature,
|
261 |
-
|
|
|
262 |
elapsed_time = time.time() - start_time
|
263 |
|
264 |
return segments_result, elapsed_time
|
|
|
258 |
best_of=params.best_of,
|
259 |
patience=params.patience,
|
260 |
temperature=params.temperature,
|
261 |
+
compression_ratio_threshold=params.compression_ratio_threshold,
|
262 |
+
progress_callback=progress_callback,)["segments"]
|
263 |
elapsed_time = time.time() - start_time
|
264 |
|
265 |
return segments_result, elapsed_time
|
modules/whisper_parameter.py
CHANGED
@@ -17,6 +17,7 @@ class WhisperGradioComponents:
|
|
17 |
condition_on_previous_text: gr.Checkbox
|
18 |
initial_prompt: gr.Textbox
|
19 |
temperature: gr.Slider
|
|
|
20 |
"""
|
21 |
A data class for Gradio components of the Whisper Parameters. Use "before" Gradio pre-processing.
|
22 |
See more about Gradio pre-processing: https://www.gradio.app/docs/components
|
@@ -64,9 +65,13 @@ class WhisperGradioComponents:
|
|
64 |
"prompt-engineer" a context for transcription, e.g. custom vocabularies or proper nouns
|
65 |
to make it more likely to predict those word correctly.
|
66 |
|
67 |
-
temperature:
|
|
|
68 |
which will be successively used upon failures according to either
|
69 |
`compression_ratio_threshold` or `log_prob_threshold`.
|
|
|
|
|
|
|
70 |
"""
|
71 |
|
72 |
def to_list(self) -> list:
|
@@ -95,6 +100,7 @@ class WhisperValues:
|
|
95 |
condition_on_previous_text: bool
|
96 |
initial_prompt: Optional[str]
|
97 |
temperature: float
|
|
|
98 |
"""
|
99 |
A data class to use Whisper parameters. Use "after" Gradio pre-processing.
|
100 |
See more about Gradio pre-processing: : https://www.gradio.app/docs/components
|
|
|
17 |
condition_on_previous_text: gr.Checkbox
|
18 |
initial_prompt: gr.Textbox
|
19 |
temperature: gr.Slider
|
20 |
+
compression_ratio_threshold: gr.Number
|
21 |
"""
|
22 |
A data class for Gradio components of the Whisper Parameters. Use "before" Gradio pre-processing.
|
23 |
See more about Gradio pre-processing: https://www.gradio.app/docs/components
|
|
|
65 |
"prompt-engineer" a context for transcription, e.g. custom vocabularies or proper nouns
|
66 |
to make it more likely to predict those word correctly.
|
67 |
|
68 |
+
temperature: gr.Slider
|
69 |
+
Temperature for sampling. It can be a tuple of temperatures,
|
70 |
which will be successively used upon failures according to either
|
71 |
`compression_ratio_threshold` or `log_prob_threshold`.
|
72 |
+
|
73 |
+
compression_ratio_threshold: gr.Number
|
74 |
+
If the gzip compression ratio is above this value, treat as failed
|
75 |
"""
|
76 |
|
77 |
def to_list(self) -> list:
|
|
|
100 |
condition_on_previous_text: bool
|
101 |
initial_prompt: Optional[str]
|
102 |
temperature: float
|
103 |
+
compression_ratio_threshold: float
|
104 |
"""
|
105 |
A data class to use Whisper parameters. Use "after" Gradio pre-processing.
|
106 |
See more about Gradio pre-processing: : https://www.gradio.app/docs/components
|