clone_vox

Running

Amamrnaf commited on Dec 6, 2024

Commit

0d6259a

1 Parent(s): e134535

final update

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import gradio as gr
 import os
 from coqui_tts import run_audio_generation_v1
-# from metaVoice import run_audio_generation_v2
 import shutil
@@ -29,8 +29,8 @@ def process_audio(input_text, speaker_audio, speaker_name, option_selected):
             # Generate TTS audio using run_audio_generation_v1
             run_audio_generation_v1(input_text)
         elif option_selected =="metaVoice":
-            return f"The option is {option_selected }not implemented yet."
-            # run_audio_generation_v2(input_text)
         else:
             return f"The option is not implemented yet."

 import gradio as gr
 import os
 from coqui_tts import run_audio_generation_v1
+from metaVoice import run_audio_generation_v2
 import shutil
             # Generate TTS audio using run_audio_generation_v1
             run_audio_generation_v1(input_text)
         elif option_selected =="metaVoice":
+            # return f"The option is {option_selected }not implemented yet."
+            run_audio_generation_v2(input_text)
         else:
             return f"The option is not implemented yet."

metaVoice.py CHANGED Viewed

@@ -1,30 +1,30 @@
-# from fam.llm.fast_inference import TTS
-# import string
-# import soundfile as sf
-# def remove_punctuation(sentence):
-#     translator = str.maketrans('', '', string.punctuation)
-#     sentence = sentence.translate(translator)
-#     # Remove line breaks
-#     sentence = sentence.replace('\n', ' ').replace('\r', '')
-#     return sentence
-# def run_audio_generation_v2(new_text,accent='None'):
-#     tts = TTS()
-#     new_text =  new_text.replace('\n', ' ').replace('\r', '')
-#     new_text_mod = remove_punctuation(new_text)
-#     new_text_split = new_text_mod.split()
-#     for word in new_text_split:
-#         if len(word)>=2 and word.isupper():
-#             new_text = new_text.replace(word, " ".join([*word]))
-#     wav_file = tts.synthesise(
-#     text=new_text,
-#     spk_ref_path="./tmp/audio/speaker_wav.wav" # you can use any speaker reference file (WAV, OGG, MP3, FLAC, etc.)
-#     )
-#     sf.write('audio/output.wav', wav_file, samplerate=22050)

+from fam.llm.fast_inference import TTS
+import string
+import soundfile as sf
+def remove_punctuation(sentence):
+    translator = str.maketrans('', '', string.punctuation)
+    sentence = sentence.translate(translator)
+    # Remove line breaks
+    sentence = sentence.replace('\n', ' ').replace('\r', '')
+    return sentence
+def run_audio_generation_v2(new_text,accent='None'):
+    tts = TTS()
+    new_text =  new_text.replace('\n', ' ').replace('\r', '')
+    new_text_mod = remove_punctuation(new_text)
+    new_text_split = new_text_mod.split()
+    for word in new_text_split:
+        if len(word)>=2 and word.isupper():
+            new_text = new_text.replace(word, " ".join([*word]))
+    wav_file = tts.synthesise(
+    text=new_text,
+    spk_ref_path="./tmp/audio/speaker_wav.wav" # you can use any speaker reference file (WAV, OGG, MP3, FLAC, etc.)
+    )
+    sf.write('audio/output.wav', wav_file, samplerate=22050)