Spaces:
Running
Running
Merge pull request #141 from jhj0517/feature/add-parameters
Browse files- app.py +15 -3
- modules/whisper_data_class.py +23 -0
app.py
CHANGED
@@ -65,6 +65,8 @@ class App:
|
|
65 |
dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
|
66 |
nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
|
67 |
nb_patience = gr.Number(label="Patience", value=1, interactive=True)
|
|
|
|
|
68 |
with gr.Row():
|
69 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
70 |
with gr.Row():
|
@@ -81,7 +83,9 @@ class App:
|
|
81 |
no_speech_threshold=nb_no_speech_threshold,
|
82 |
compute_type=dd_compute_type,
|
83 |
best_of=nb_best_of,
|
84 |
-
patience=nb_patience
|
|
|
|
|
85 |
btn_run.click(fn=self.whisper_inf.transcribe_file,
|
86 |
inputs=params + whisper_params.to_list(),
|
87 |
outputs=[tb_indicator, files_subtitles])
|
@@ -115,6 +119,8 @@ class App:
|
|
115 |
dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
|
116 |
nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
|
117 |
nb_patience = gr.Number(label="Patience", value=1, interactive=True)
|
|
|
|
|
118 |
with gr.Row():
|
119 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
120 |
with gr.Row():
|
@@ -131,7 +137,9 @@ class App:
|
|
131 |
no_speech_threshold=nb_no_speech_threshold,
|
132 |
compute_type=dd_compute_type,
|
133 |
best_of=nb_best_of,
|
134 |
-
patience=nb_patience
|
|
|
|
|
135 |
btn_run.click(fn=self.whisper_inf.transcribe_youtube,
|
136 |
inputs=params + whisper_params.to_list(),
|
137 |
outputs=[tb_indicator, files_subtitles])
|
@@ -158,6 +166,8 @@ class App:
|
|
158 |
dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
|
159 |
nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
|
160 |
nb_patience = gr.Number(label="Patience", value=1, interactive=True)
|
|
|
|
|
161 |
with gr.Row():
|
162 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
163 |
with gr.Row():
|
@@ -174,7 +184,9 @@ class App:
|
|
174 |
no_speech_threshold=nb_no_speech_threshold,
|
175 |
compute_type=dd_compute_type,
|
176 |
best_of=nb_best_of,
|
177 |
-
patience=nb_patience
|
|
|
|
|
178 |
btn_run.click(fn=self.whisper_inf.transcribe_mic,
|
179 |
inputs=params + whisper_params.to_list(),
|
180 |
outputs=[tb_indicator, files_subtitles])
|
|
|
65 |
dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
|
66 |
nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
|
67 |
nb_patience = gr.Number(label="Patience", value=1, interactive=True)
|
68 |
+
cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
|
69 |
+
tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
|
70 |
with gr.Row():
|
71 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
72 |
with gr.Row():
|
|
|
83 |
no_speech_threshold=nb_no_speech_threshold,
|
84 |
compute_type=dd_compute_type,
|
85 |
best_of=nb_best_of,
|
86 |
+
patience=nb_patience,
|
87 |
+
condition_on_previous_text=cb_condition_on_previous_text,
|
88 |
+
initial_prompt=tb_initial_prompt)
|
89 |
btn_run.click(fn=self.whisper_inf.transcribe_file,
|
90 |
inputs=params + whisper_params.to_list(),
|
91 |
outputs=[tb_indicator, files_subtitles])
|
|
|
119 |
dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
|
120 |
nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
|
121 |
nb_patience = gr.Number(label="Patience", value=1, interactive=True)
|
122 |
+
cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
|
123 |
+
tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
|
124 |
with gr.Row():
|
125 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
126 |
with gr.Row():
|
|
|
137 |
no_speech_threshold=nb_no_speech_threshold,
|
138 |
compute_type=dd_compute_type,
|
139 |
best_of=nb_best_of,
|
140 |
+
patience=nb_patience,
|
141 |
+
condition_on_previous_text=cb_condition_on_previous_text,
|
142 |
+
initial_prompt=tb_initial_prompt)
|
143 |
btn_run.click(fn=self.whisper_inf.transcribe_youtube,
|
144 |
inputs=params + whisper_params.to_list(),
|
145 |
outputs=[tb_indicator, files_subtitles])
|
|
|
166 |
dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
|
167 |
nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
|
168 |
nb_patience = gr.Number(label="Patience", value=1, interactive=True)
|
169 |
+
cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
|
170 |
+
tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
|
171 |
with gr.Row():
|
172 |
btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
|
173 |
with gr.Row():
|
|
|
184 |
no_speech_threshold=nb_no_speech_threshold,
|
185 |
compute_type=dd_compute_type,
|
186 |
best_of=nb_best_of,
|
187 |
+
patience=nb_patience,
|
188 |
+
condition_on_previous_text=cb_condition_on_previous_text,
|
189 |
+
initial_prompt=tb_initial_prompt)
|
190 |
btn_run.click(fn=self.whisper_inf.transcribe_mic,
|
191 |
inputs=params + whisper_params.to_list(),
|
192 |
outputs=[tb_indicator, files_subtitles])
|
modules/whisper_data_class.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
from dataclasses import dataclass, fields
|
2 |
import gradio as gr
|
|
|
3 |
|
4 |
|
5 |
@dataclass
|
@@ -13,6 +14,8 @@ class WhisperGradioComponents:
|
|
13 |
compute_type: gr.Dropdown
|
14 |
best_of: gr.Number
|
15 |
patience: gr.Number
|
|
|
|
|
16 |
"""
|
17 |
A data class to pass Gradio components to the function before Gradio pre-processing.
|
18 |
See this documentation for more information about Gradio pre-processing: https://www.gradio.app/docs/components
|
@@ -21,26 +24,44 @@ class WhisperGradioComponents:
|
|
21 |
----------
|
22 |
model_size: gr.Dropdown
|
23 |
Whisper model size.
|
|
|
24 |
lang: gr.Dropdown
|
25 |
Source language of the file to transcribe.
|
|
|
26 |
is_translate: gr.Checkbox
|
27 |
Boolean value that determines whether to translate to English.
|
28 |
It's Whisper's feature to translate speech from another language directly into English end-to-end.
|
|
|
29 |
beam_size: gr.Number
|
30 |
Int value that is used for decoding option.
|
|
|
31 |
log_prob_threshold: gr.Number
|
32 |
If the average log probability over sampled tokens is below this value, treat as failed.
|
|
|
33 |
no_speech_threshold: gr.Number
|
34 |
If the no_speech probability is higher than this value AND
|
35 |
the average log probability over sampled tokens is below `log_prob_threshold`,
|
36 |
consider the segment as silent.
|
|
|
37 |
compute_type: gr.Dropdown
|
38 |
compute type for transcription.
|
39 |
see more info : https://opennmt.net/CTranslate2/quantization.html
|
|
|
40 |
best_of: gr.Number
|
41 |
Number of candidates when sampling with non-zero temperature.
|
|
|
42 |
patience: gr.Number
|
43 |
Beam search patience factor.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
"""
|
45 |
|
46 |
def to_list(self) -> list:
|
@@ -66,6 +87,8 @@ class WhisperValues:
|
|
66 |
compute_type: str
|
67 |
best_of: int
|
68 |
patience: float
|
|
|
|
|
69 |
"""
|
70 |
A data class to use Whisper parameters in your function after Gradio pre-processing.
|
71 |
See this documentation for more information about Gradio pre-processing: : https://www.gradio.app/docs/components
|
|
|
1 |
from dataclasses import dataclass, fields
|
2 |
import gradio as gr
|
3 |
+
from typing import Optional
|
4 |
|
5 |
|
6 |
@dataclass
|
|
|
14 |
compute_type: gr.Dropdown
|
15 |
best_of: gr.Number
|
16 |
patience: gr.Number
|
17 |
+
condition_on_previous_text: gr.Checkbox
|
18 |
+
initial_prompt: gr.Textbox
|
19 |
"""
|
20 |
A data class to pass Gradio components to the function before Gradio pre-processing.
|
21 |
See this documentation for more information about Gradio pre-processing: https://www.gradio.app/docs/components
|
|
|
24 |
----------
|
25 |
model_size: gr.Dropdown
|
26 |
Whisper model size.
|
27 |
+
|
28 |
lang: gr.Dropdown
|
29 |
Source language of the file to transcribe.
|
30 |
+
|
31 |
is_translate: gr.Checkbox
|
32 |
Boolean value that determines whether to translate to English.
|
33 |
It's Whisper's feature to translate speech from another language directly into English end-to-end.
|
34 |
+
|
35 |
beam_size: gr.Number
|
36 |
Int value that is used for decoding option.
|
37 |
+
|
38 |
log_prob_threshold: gr.Number
|
39 |
If the average log probability over sampled tokens is below this value, treat as failed.
|
40 |
+
|
41 |
no_speech_threshold: gr.Number
|
42 |
If the no_speech probability is higher than this value AND
|
43 |
the average log probability over sampled tokens is below `log_prob_threshold`,
|
44 |
consider the segment as silent.
|
45 |
+
|
46 |
compute_type: gr.Dropdown
|
47 |
compute type for transcription.
|
48 |
see more info : https://opennmt.net/CTranslate2/quantization.html
|
49 |
+
|
50 |
best_of: gr.Number
|
51 |
Number of candidates when sampling with non-zero temperature.
|
52 |
+
|
53 |
patience: gr.Number
|
54 |
Beam search patience factor.
|
55 |
+
|
56 |
+
condition_on_previous_text: gr.Checkbox
|
57 |
+
if True, the previous output of the model is provided as a prompt for the next window;
|
58 |
+
disabling may make the text inconsistent across windows, but the model becomes less prone to
|
59 |
+
getting stuck in a failure loop, such as repetition looping or timestamps going out of sync.
|
60 |
+
|
61 |
+
initial_prompt: gr.Textbox
|
62 |
+
Optional text to provide as a prompt for the first window. This can be used to provide, or
|
63 |
+
"prompt-engineer" a context for transcription, e.g. custom vocabularies or proper nouns
|
64 |
+
to make it more likely to predict those word correctly.
|
65 |
"""
|
66 |
|
67 |
def to_list(self) -> list:
|
|
|
87 |
compute_type: str
|
88 |
best_of: int
|
89 |
patience: float
|
90 |
+
condition_on_previous_text: bool
|
91 |
+
initial_prompt: Optional[str]
|
92 |
"""
|
93 |
A data class to use Whisper parameters in your function after Gradio pre-processing.
|
94 |
See this documentation for more information about Gradio pre-processing: : https://www.gradio.app/docs/components
|