Spaces:
Running
Running
jhj0517
commited on
Commit
·
84a6b12
1
Parent(s):
875278a
add parameters
Browse files- app.py +15 -3
- modules/faster_whisper_inference.py +2 -0
- modules/whisper_Inference.py +2 -0
- modules/whisper_data_class.py +8 -0
app.py
CHANGED
@@ -63,6 +63,8 @@ class App:
|
|
63 |
nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
|
64 |
nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
|
65 |
dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
|
|
|
|
|
66 |
with gr.Row():
|
67 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
68 |
with gr.Row():
|
@@ -77,7 +79,9 @@ class App:
|
|
77 |
beam_size=nb_beam_size,
|
78 |
log_prob_threshold=nb_log_prob_threshold,
|
79 |
no_speech_threshold=nb_no_speech_threshold,
|
80 |
-
compute_type=dd_compute_type
|
|
|
|
|
81 |
btn_run.click(fn=self.whisper_inf.transcribe_file,
|
82 |
inputs=params + whisper_params.to_list(),
|
83 |
outputs=[tb_indicator, files_subtitles])
|
@@ -109,6 +113,8 @@ class App:
|
|
109 |
nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
|
110 |
nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
|
111 |
dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
|
|
|
|
|
112 |
with gr.Row():
|
113 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
114 |
with gr.Row():
|
@@ -123,7 +129,9 @@ class App:
|
|
123 |
beam_size=nb_beam_size,
|
124 |
log_prob_threshold=nb_log_prob_threshold,
|
125 |
no_speech_threshold=nb_no_speech_threshold,
|
126 |
-
compute_type=dd_compute_type
|
|
|
|
|
127 |
btn_run.click(fn=self.whisper_inf.transcribe_youtube,
|
128 |
inputs=params + whisper_params.to_list(),
|
129 |
outputs=[tb_indicator, files_subtitles])
|
@@ -148,6 +156,8 @@ class App:
|
|
148 |
nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
|
149 |
nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
|
150 |
dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
|
|
|
|
|
151 |
with gr.Row():
|
152 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
153 |
with gr.Row():
|
@@ -162,7 +172,9 @@ class App:
|
|
162 |
beam_size=nb_beam_size,
|
163 |
log_prob_threshold=nb_log_prob_threshold,
|
164 |
no_speech_threshold=nb_no_speech_threshold,
|
165 |
-
compute_type=dd_compute_type
|
|
|
|
|
166 |
btn_run.click(fn=self.whisper_inf.transcribe_mic,
|
167 |
inputs=params + whisper_params.to_list(),
|
168 |
outputs=[tb_indicator, files_subtitles])
|
|
|
63 |
nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
|
64 |
nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
|
65 |
dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
|
66 |
+
nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
|
67 |
+
nb_patience = gr.Number(label="Patience", value=1, interactive=True)
|
68 |
with gr.Row():
|
69 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
70 |
with gr.Row():
|
|
|
79 |
beam_size=nb_beam_size,
|
80 |
log_prob_threshold=nb_log_prob_threshold,
|
81 |
no_speech_threshold=nb_no_speech_threshold,
|
82 |
+
compute_type=dd_compute_type,
|
83 |
+
best_of=nb_best_of,
|
84 |
+
patience=nb_patience)
|
85 |
btn_run.click(fn=self.whisper_inf.transcribe_file,
|
86 |
inputs=params + whisper_params.to_list(),
|
87 |
outputs=[tb_indicator, files_subtitles])
|
|
|
113 |
nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
|
114 |
nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
|
115 |
dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
|
116 |
+
nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
|
117 |
+
nb_patience = gr.Number(label="Patience", value=1, interactive=True)
|
118 |
with gr.Row():
|
119 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
120 |
with gr.Row():
|
|
|
129 |
beam_size=nb_beam_size,
|
130 |
log_prob_threshold=nb_log_prob_threshold,
|
131 |
no_speech_threshold=nb_no_speech_threshold,
|
132 |
+
compute_type=dd_compute_type,
|
133 |
+
best_of=nb_best_of,
|
134 |
+
patience=nb_patience)
|
135 |
btn_run.click(fn=self.whisper_inf.transcribe_youtube,
|
136 |
inputs=params + whisper_params.to_list(),
|
137 |
outputs=[tb_indicator, files_subtitles])
|
|
|
156 |
nb_log_prob_threshold = gr.Number(label="Log Probability Threshold", value=-1.0, interactive=True)
|
157 |
nb_no_speech_threshold = gr.Number(label="No Speech Threshold", value=0.6, interactive=True)
|
158 |
dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
|
159 |
+
nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
|
160 |
+
nb_patience = gr.Number(label="Patience", value=1, interactive=True)
|
161 |
with gr.Row():
|
162 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
163 |
with gr.Row():
|
|
|
172 |
beam_size=nb_beam_size,
|
173 |
log_prob_threshold=nb_log_prob_threshold,
|
174 |
no_speech_threshold=nb_no_speech_threshold,
|
175 |
+
compute_type=dd_compute_type,
|
176 |
+
best_of=nb_best_of,
|
177 |
+
patience=nb_patience)
|
178 |
btn_run.click(fn=self.whisper_inf.transcribe_mic,
|
179 |
inputs=params + whisper_params.to_list(),
|
180 |
outputs=[tb_indicator, files_subtitles])
|
modules/faster_whisper_inference.py
CHANGED
@@ -264,6 +264,8 @@ class FasterWhisperInference(BaseInterface):
|
|
264 |
beam_size=params.beam_size,
|
265 |
log_prob_threshold=params.log_prob_threshold,
|
266 |
no_speech_threshold=params.no_speech_threshold,
|
|
|
|
|
267 |
)
|
268 |
progress(0, desc="Loading audio..")
|
269 |
|
|
|
264 |
beam_size=params.beam_size,
|
265 |
log_prob_threshold=params.log_prob_threshold,
|
266 |
no_speech_threshold=params.no_speech_threshold,
|
267 |
+
best_of=params.best_of,
|
268 |
+
patience=params.patience
|
269 |
)
|
270 |
progress(0, desc="Loading audio..")
|
271 |
|
modules/whisper_Inference.py
CHANGED
@@ -255,6 +255,8 @@ class WhisperInference(BaseInterface):
|
|
255 |
no_speech_threshold=params.no_speech_threshold,
|
256 |
task="translate" if params.is_translate and self.current_model_size in self.translatable_model else "transcribe",
|
257 |
fp16=True if params.compute_type == "float16" else False,
|
|
|
|
|
258 |
progress_callback=progress_callback)["segments"]
|
259 |
elapsed_time = time.time() - start_time
|
260 |
|
|
|
255 |
no_speech_threshold=params.no_speech_threshold,
|
256 |
task="translate" if params.is_translate and self.current_model_size in self.translatable_model else "transcribe",
|
257 |
fp16=True if params.compute_type == "float16" else False,
|
258 |
+
best_of=params.best_of,
|
259 |
+
patience=params.patience,
|
260 |
progress_callback=progress_callback)["segments"]
|
261 |
elapsed_time = time.time() - start_time
|
262 |
|
modules/whisper_data_class.py
CHANGED
@@ -11,6 +11,8 @@ class WhisperGradioComponents:
|
|
11 |
log_prob_threshold: gr.Number
|
12 |
no_speech_threshold: gr.Number
|
13 |
compute_type: gr.Dropdown
|
|
|
|
|
14 |
"""
|
15 |
A data class to pass Gradio components to the function before Gradio pre-processing.
|
16 |
See this documentation for more information about Gradio pre-processing: https://www.gradio.app/docs/components
|
@@ -35,6 +37,10 @@ class WhisperGradioComponents:
|
|
35 |
compute_type: gr.Dropdown
|
36 |
compute type for transcription.
|
37 |
see more info : https://opennmt.net/CTranslate2/quantization.html
|
|
|
|
|
|
|
|
|
38 |
"""
|
39 |
|
40 |
def to_list(self) -> list:
|
@@ -74,6 +80,8 @@ class WhisperValues:
|
|
74 |
log_prob_threshold: float
|
75 |
no_speech_threshold: float
|
76 |
compute_type: str
|
|
|
|
|
77 |
"""
|
78 |
A data class to use Whisper parameters in the function after Gradio pre-processing.
|
79 |
See this documentation for more information about Gradio pre-processing: : https://www.gradio.app/docs/components
|
|
|
11 |
log_prob_threshold: gr.Number
|
12 |
no_speech_threshold: gr.Number
|
13 |
compute_type: gr.Dropdown
|
14 |
+
best_of: gr.Number
|
15 |
+
patience: gr.Number
|
16 |
"""
|
17 |
A data class to pass Gradio components to the function before Gradio pre-processing.
|
18 |
See this documentation for more information about Gradio pre-processing: https://www.gradio.app/docs/components
|
|
|
37 |
compute_type: gr.Dropdown
|
38 |
compute type for transcription.
|
39 |
see more info : https://opennmt.net/CTranslate2/quantization.html
|
40 |
+
best_of: gr.Number
|
41 |
+
Number of candidates when sampling with non-zero temperature.
|
42 |
+
patience: gr.Number
|
43 |
+
Beam search patience factor.
|
44 |
"""
|
45 |
|
46 |
def to_list(self) -> list:
|
|
|
80 |
log_prob_threshold: float
|
81 |
no_speech_threshold: float
|
82 |
compute_type: str
|
83 |
+
best_of: int
|
84 |
+
patience: float
|
85 |
"""
|
86 |
A data class to use Whisper parameters in the function after Gradio pre-processing.
|
87 |
See this documentation for more information about Gradio pre-processing: : https://www.gradio.app/docs/components
|