Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -14,6 +14,10 @@ import os
|
|
14 |
import gradio as gr
|
15 |
import uuid
|
16 |
import io
|
|
|
|
|
|
|
|
|
17 |
|
18 |
|
19 |
set_seed(0)
|
@@ -35,6 +39,7 @@ speaker_embeddings = sorted([key for key in processor.speaker_embeddings.keys()
|
|
35 |
|
36 |
SAMPLE_RATE = 24_000
|
37 |
|
|
|
38 |
|
39 |
# import model
|
40 |
if device == "cpu":
|
@@ -55,18 +60,26 @@ def generate_audio(text, voice_preset = None, lag = 0):
|
|
55 |
inputs = processor(sentences, voice_preset=voice_preset).to(device)
|
56 |
# Run the generation in a separate thread, so that we can fetch the generated text in a non-blocking way.
|
57 |
|
58 |
-
|
59 |
-
**inputs, coarse_temperature = 0.8,
|
60 |
)
|
61 |
-
|
62 |
-
|
63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
|
66 |
# Gradio blocks demo
|
67 |
with gr.Blocks() as demo_blocks:
|
68 |
gr.Markdown("""<h1 align="center">🐶BARK with Vocos</h1>""")
|
69 |
-
gr.HTML("""<h3 style="text-align:center;">📢
|
70 |
with gr.Group():
|
71 |
with gr.Row():
|
72 |
inp_text = gr.Textbox(label="What should Bark say?", info="Enter text here")
|
@@ -81,7 +94,9 @@ with gr.Blocks() as demo_blocks:
|
|
81 |
btn = gr.Button("Bark with Vocos TTS")
|
82 |
|
83 |
with gr.Row():
|
84 |
-
|
85 |
-
|
|
|
|
|
86 |
|
87 |
demo_blocks.queue().launch(debug=True)
|
|
|
14 |
import gradio as gr
|
15 |
import uuid
|
16 |
import io
|
17 |
+
from vocos import Vocos
|
18 |
+
|
19 |
+
import os
|
20 |
+
os.environ["GRADIO_TEMP_DIR"] = "/home/yoach/spaces/tmp"
|
21 |
|
22 |
|
23 |
set_seed(0)
|
|
|
39 |
|
40 |
SAMPLE_RATE = 24_000
|
41 |
|
42 |
+
vocos = Vocos.from_pretrained("hubertsiuzdak/vocos-encodec-24khz-v2").to(device)
|
43 |
|
44 |
# import model
|
45 |
if device == "cpu":
|
|
|
60 |
inputs = processor(sentences, voice_preset=voice_preset).to(device)
|
61 |
# Run the generation in a separate thread, so that we can fetch the generated text in a non-blocking way.
|
62 |
|
63 |
+
fine_output = bark.generate(
|
64 |
+
**inputs, coarse_temperature = 0.8, temperature = 0.5, do_sample=True
|
65 |
)
|
66 |
+
|
67 |
+
print("Fine tokens generated")
|
68 |
+
|
69 |
+
with torch.no_grad():
|
70 |
+
|
71 |
+
encodec_waveform = bark.codec_decode(fine_output)
|
72 |
+
|
73 |
+
features = vocos.codes_to_features(fine_output.transpose(0,1))
|
74 |
+
vocos_waveform = vocos.decode(features, bandwidth_id=torch.tensor([2], device=device))
|
75 |
+
|
76 |
+
return (SAMPLE_RATE, encodec_waveform.cpu().squeeze().numpy()), (SAMPLE_RATE, vocos_waveform.cpu().squeeze().numpy())
|
77 |
|
78 |
|
79 |
# Gradio blocks demo
|
80 |
with gr.Blocks() as demo_blocks:
|
81 |
gr.Markdown("""<h1 align="center">🐶BARK with Vocos</h1>""")
|
82 |
+
gr.HTML("""<h3 style="text-align:center;">📢Vocos-enhanced TTS 🦾! </h3>""")
|
83 |
with gr.Group():
|
84 |
with gr.Row():
|
85 |
inp_text = gr.Textbox(label="What should Bark say?", info="Enter text here")
|
|
|
94 |
btn = gr.Button("Bark with Vocos TTS")
|
95 |
|
96 |
with gr.Row():
|
97 |
+
out_audio_encodec = gr.Audio(type="numpy", autoplay=False, label="original output", show_label=True)
|
98 |
+
out_audio_vocos = gr.Audio(type="numpy", autoplay=False, label="vocos enhanced output", show_label=True)
|
99 |
+
|
100 |
+
btn.click(generate_audio, [inp_text, dd], [out_audio_encodec, out_audio_vocos])
|
101 |
|
102 |
demo_blocks.queue().launch(debug=True)
|