Spaces:

ShoukanLabs
/

Vokan

Running on Zero

App Files Files Community

ButterCream commited on Mar 28, 2024

Commit

9ef0c39

1 Parent(s): 359caa8

add examples

Browse files

Files changed (1) hide show

app.py +42 -12

app.py CHANGED Viewed

@@ -51,8 +51,6 @@ INTRO = """
 <hr>
 """
 js_func = """
 function refresh() {
     const url = new URL(window.location);
@@ -64,8 +62,27 @@ function refresh() {
 }
 """
 theme = gr.themes.Soft(
-    primary_hue=gr.themes.Color(c100="#ffd7d1", c200="#ff593e", c300="#ff593e", c400="#ff593e", c50="#fff0f0", c500="#ff593e", c600="#ea580c", c700="#c2410c", c800="#9a3412", c900="#7c2d12", c950="#6c2e12"),
     secondary_hue="orange",
     radius_size=gr.themes.Size(lg="20px", md="8px", sm="6px", xl="30px", xs="4px", xxl="40px", xxs="2px"),
     font=[gr.themes.GoogleFont('M PLUS Rounded 1c'), 'ui-sans-serif', 'system-ui', 'sans-serif'],
@@ -183,6 +200,7 @@ def generate(audio_path, ins, speed, alpha, beta, embedding, steps=100):
     return 24000, scaled
 if torch.cuda.is_available():
     other_tts = tts.StyleTTS2(model_checkpoint_path='./epoch_2nd_00012.pth', config_path="models/config_ft.yml")
 else:
@@ -192,17 +210,29 @@ with gr.Blocks(theme=theme, js=js_func) as clone:
     gr.HTML(INTRO)
     with gr.Row():
         with gr.Column(scale=1):
-            inp = gr.Textbox(label="Text", info="What do you want Vokan to say? -- Longform generation currently produces artifacts in between certain sentences, this will be resolved in the next version", interactive=True)
-            voice = gr.Audio(label="Voice", interactive=True, type='filepath', max_length=1000, waveform_options={'waveform_progress_color': '#FF593E'})
-            steps = gr.Slider(minimum=3, maximum=100, value=20, step=1, label="Diffusion Steps", info="Higher produces better results typically", interactive=True)
-            embscale = gr.Slider(minimum=1, maximum=10, value=2, step=0.1, label="Embedding Scale", info="Defaults to 2 | High scales may produce unexpected results but may produce more emotional texts", interactive=True)
-            alpha = gr.Slider(minimum=0, maximum=1, value=0.3, step=0.1, label="Alpha", info="Defaults to 0.3 | Resemblance to speakers voice - lower = more similar", interactive=True)
-            beta = gr.Slider(minimum=0, maximum=1, value=0.7, step=0.1, label="Beta", info="Defaults to 0.7 | Resemblance to speakers prosody - lower = more similar - higher = based on sentence", interactive=True)
-            speed = gr.Slider(minimum=0.5, maximum=1.5, value=1, step=0.1, label="Speed of speech", info="Defaults to 1", interactive=True)
         with gr.Column(scale=1):
             clbtn = gr.Button("Synthesize", variant="primary")
-            claudio = gr.Audio(interactive=False, label="Synthesized Audio", waveform_options={'waveform_progress_color': '#FF593E'})
-            clbtn.click(generate, inputs=[voice, inp, speed, alpha, beta, embscale, steps], outputs=[claudio], concurrency_limit=4)
 if __name__ == "__main__":
     # demo.queue(api_open=False, max_size=15).launch(show_api=False)

 <hr>
 """
 js_func = """
 function refresh() {
     const url = new URL(window.location);
 }
 """
+examples = [
+    ["./Examples/David Attenborough.mp3",
+     "An understanding of the natural world is a source of not only great curiosity, but great fulfilment.",
+     1, 0.2, 0.5, 2, 100],
+    ["./Examples/Linus Tech Tips.mp3",
+     "sometimes I get so in the zone while building a computer it's like an out of body experience.",
+     1, 0.3, 0.8, 2, 100],
+    ["./Examples/Melina.mp3",
+     "If you intend to claim the Frenzied Flame, I ask that you cease. It is not to be meddled with. It is chaos, "
+     "devouring life and thought unending. However ruined this world has become, "
+     "however mired in torment and despair, life endures.",
+     1, 0.3, 0.5, 2, 100],
+    ["./Examples/Patrick Bateman.mp3",
+     "My Pain Is Constant And Sharp, And I Do Not Wish For A Better World For Anyone.",
+     1, 0.3, 0.6, 2, 100]
+]
 theme = gr.themes.Soft(
+    primary_hue=gr.themes.Color(c100="#ffd7d1", c200="#ff593e", c300="#ff593e", c400="#ff593e", c50="#fff0f0",
+                                c500="#ff593e", c600="#ea580c", c700="#c2410c", c800="#9a3412", c900="#7c2d12",
+                                c950="#6c2e12"),
     secondary_hue="orange",
     radius_size=gr.themes.Size(lg="20px", md="8px", sm="6px", xl="30px", xs="4px", xxl="40px", xxs="2px"),
     font=[gr.themes.GoogleFont('M PLUS Rounded 1c'), 'ui-sans-serif', 'system-ui', 'sans-serif'],
     return 24000, scaled
 if torch.cuda.is_available():
     other_tts = tts.StyleTTS2(model_checkpoint_path='./epoch_2nd_00012.pth', config_path="models/config_ft.yml")
 else:
     gr.HTML(INTRO)
     with gr.Row():
         with gr.Column(scale=1):
+            inp = gr.Textbox(label="Text", info="What do you want Vokan to say?", interactive=True)
+            voice = gr.Audio(label="Voice", interactive=True, type='filepath', max_length=300,
+                             waveform_options={'waveform_progress_color': '#FF593E'})
+            steps = gr.Slider(minimum=3, maximum=60, value=20, step=1, label="Diffusion Steps",
+                              info="Higher produces better results typically", interactive=True)
+            embscale = gr.Slider(minimum=1, maximum=10, value=2, step=0.1, label="Embedding Scale",
+                                 info="Defaults to 2 | low scales may produce unexpected results", interactive=True)
+            alpha = gr.Slider(minimum=0, maximum=1, value=0.3, step=0.1, label="Alpha", info="Defaults to 0.3",
+                              interactive=True)
+            beta = gr.Slider(minimum=0, maximum=1, value=0.7, step=0.1, label="Beta", info="Defaults to 0.7",
+                             interactive=True)
+            speed = gr.Slider(minimum=0.5, maximum=1.5, value=1, step=0.1, label="Speed of speech",
+                              info="Defaults to 1", interactive=True)
         with gr.Column(scale=1):
             clbtn = gr.Button("Synthesize", variant="primary")
+            claudio = gr.Audio(interactive=False, label="Synthesized Audio",
+                               waveform_options={'waveform_progress_color': '#FF593E'})
+            clbtn.click(generate, inputs=[voice, inp, speed, alpha, beta, embscale, steps], outputs=[claudio],
+                        concurrency_limit=4)
+            gr.Examples(examples=examples,
+                        inputs=[voice, inp, speed, alpha, beta, embscale, steps],
+                        outputs=[claudio])
 if __name__ == "__main__":
     # demo.queue(api_open=False, max_size=15).launch(show_api=False)