Spaces:
Running
Running
jhj0517
commited on
Commit
·
3764662
1
Parent(s):
f3ecc7a
Add infos for the clear use of submodels
Browse files
app.py
CHANGED
@@ -59,6 +59,7 @@ class App:
|
|
59 |
with gr.Row():
|
60 |
cb_timestamp = gr.Checkbox(value=whisper_params["add_timestamp"], label="Add a timestamp to the end of the filename",
|
61 |
interactive=True)
|
|
|
62 |
with gr.Accordion("Advanced Parameters", open=False):
|
63 |
nb_beam_size = gr.Number(label="Beam Size", value=whisper_params["beam_size"], precision=0, interactive=True,
|
64 |
info="Beam size to use for decoding.")
|
@@ -129,30 +130,35 @@ class App:
|
|
129 |
with gr.Group(visible=isinstance(self.whisper_inf, InsanelyFastWhisperInference)):
|
130 |
nb_batch_size = gr.Number(label="Batch Size", value=whisper_params["batch_size"], precision=0)
|
131 |
|
132 |
-
with gr.Accordion("
|
133 |
-
cb_bgm_separation = gr.Checkbox(label="Enable
|
134 |
-
interactive=True
|
|
|
|
|
135 |
dd_uvr_device = gr.Dropdown(label="Device", value=self.whisper_inf.music_separator.device,
|
136 |
choices=self.whisper_inf.music_separator.available_devices)
|
137 |
dd_uvr_model_size = gr.Dropdown(label="Model", value=uvr_params["model_size"],
|
138 |
choices=self.whisper_inf.music_separator.available_models)
|
139 |
nb_uvr_segment_size = gr.Number(label="Segment Size", value=uvr_params["segment_size"], precision=0)
|
140 |
cb_uvr_save_file = gr.Checkbox(label="Save separated files to output", value=uvr_params["save_file"])
|
141 |
-
cb_uvr_enable_offload = gr.Checkbox(label="Offload
|
142 |
value=uvr_params["enable_offload"])
|
143 |
|
144 |
-
with gr.Accordion("
|
145 |
cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=vad_params["vad_filter"],
|
146 |
-
interactive=True
|
147 |
-
|
|
|
|
|
148 |
info="Lower it to be more sensitive to small sounds.")
|
149 |
-
nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0,
|
|
|
150 |
info="Final speech chunks shorter than this time are thrown out")
|
151 |
-
nb_max_speech_duration_s = gr.Number(label="Maximum Speech Duration (s)",
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
info="In the end of each speech chunk wait for this time"
|
157 |
" before separating it")
|
158 |
nb_speech_pad_ms = gr.Number(label="Speech Padding (ms)", precision=0, value=vad_params["speech_pad_ms"],
|
@@ -161,7 +167,10 @@ class App:
|
|
161 |
with gr.Accordion("Diarization", open=False):
|
162 |
cb_diarize = gr.Checkbox(label="Enable Diarization", value=diarization_params["is_diarize"])
|
163 |
tb_hf_token = gr.Text(label="HuggingFace Token", value=diarization_params["hf_token"],
|
164 |
-
info="This is only needed the first time you download the model. If you already have
|
|
|
|
|
|
|
165 |
dd_diarization_device = gr.Dropdown(label="Device",
|
166 |
choices=self.whisper_inf.diarizer.get_available_device(),
|
167 |
value=self.whisper_inf.diarizer.get_device())
|
|
|
59 |
with gr.Row():
|
60 |
cb_timestamp = gr.Checkbox(value=whisper_params["add_timestamp"], label="Add a timestamp to the end of the filename",
|
61 |
interactive=True)
|
62 |
+
|
63 |
with gr.Accordion("Advanced Parameters", open=False):
|
64 |
nb_beam_size = gr.Number(label="Beam Size", value=whisper_params["beam_size"], precision=0, interactive=True,
|
65 |
info="Beam size to use for decoding.")
|
|
|
130 |
with gr.Group(visible=isinstance(self.whisper_inf, InsanelyFastWhisperInference)):
|
131 |
nb_batch_size = gr.Number(label="Batch Size", value=whisper_params["batch_size"], precision=0)
|
132 |
|
133 |
+
with gr.Accordion("Background Music Remover Filter", open=False):
|
134 |
+
cb_bgm_separation = gr.Checkbox(label="Enable Background Music Remover Filter", value=uvr_params["is_separate_bgm"],
|
135 |
+
interactive=True,
|
136 |
+
info="Enabling this will remove background music by submodel before"
|
137 |
+
" transcribing ")
|
138 |
dd_uvr_device = gr.Dropdown(label="Device", value=self.whisper_inf.music_separator.device,
|
139 |
choices=self.whisper_inf.music_separator.available_devices)
|
140 |
dd_uvr_model_size = gr.Dropdown(label="Model", value=uvr_params["model_size"],
|
141 |
choices=self.whisper_inf.music_separator.available_models)
|
142 |
nb_uvr_segment_size = gr.Number(label="Segment Size", value=uvr_params["segment_size"], precision=0)
|
143 |
cb_uvr_save_file = gr.Checkbox(label="Save separated files to output", value=uvr_params["save_file"])
|
144 |
+
cb_uvr_enable_offload = gr.Checkbox(label="Offload sub model after removing background music",
|
145 |
value=uvr_params["enable_offload"])
|
146 |
|
147 |
+
with gr.Accordion("Voice Detection Filter", open=False):
|
148 |
cb_vad_filter = gr.Checkbox(label="Enable Silero VAD Filter", value=vad_params["vad_filter"],
|
149 |
+
interactive=True,
|
150 |
+
info="Enable this to transcribe only detected voice parts by submodel.")
|
151 |
+
sd_threshold = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Speech Threshold",
|
152 |
+
value=vad_params["threshold"],
|
153 |
info="Lower it to be more sensitive to small sounds.")
|
154 |
+
nb_min_speech_duration_ms = gr.Number(label="Minimum Speech Duration (ms)", precision=0,
|
155 |
+
value=vad_params["min_speech_duration_ms"],
|
156 |
info="Final speech chunks shorter than this time are thrown out")
|
157 |
+
nb_max_speech_duration_s = gr.Number(label="Maximum Speech Duration (s)",
|
158 |
+
value=vad_params["max_speech_duration_s"],
|
159 |
+
info="Maximum duration of speech chunks in \"seconds\".")
|
160 |
+
nb_min_silence_duration_ms = gr.Number(label="Minimum Silence Duration (ms)", precision=0,
|
161 |
+
value=vad_params["min_silence_duration_ms"],
|
162 |
info="In the end of each speech chunk wait for this time"
|
163 |
" before separating it")
|
164 |
nb_speech_pad_ms = gr.Number(label="Speech Padding (ms)", precision=0, value=vad_params["speech_pad_ms"],
|
|
|
167 |
with gr.Accordion("Diarization", open=False):
|
168 |
cb_diarize = gr.Checkbox(label="Enable Diarization", value=diarization_params["is_diarize"])
|
169 |
tb_hf_token = gr.Text(label="HuggingFace Token", value=diarization_params["hf_token"],
|
170 |
+
info="This is only needed the first time you download the model. If you already have"
|
171 |
+
" models, you don't need to enter. To download the model, you must manually go "
|
172 |
+
"to \"https://huggingface.co/pyannote/speaker-diarization-3.1\" and agree to"
|
173 |
+
" their requirement.")
|
174 |
dd_diarization_device = gr.Dropdown(label="Device",
|
175 |
choices=self.whisper_inf.diarizer.get_available_device(),
|
176 |
value=self.whisper_inf.diarizer.get_device())
|