clone_vox

Running

File size: 956 Bytes

0d6259a
 
 
f023da7
0d6259a
 
 
f023da7
0d6259a
 
f023da7
0d6259a
f023da7
0d6259a
 
 
 
f023da7
0d6259a
 
 
 
f023da7
0d6259a
 
 
 
 
f023da7

from fam.llm.fast_inference import TTS
import string
import soundfile as sf

def remove_punctuation(sentence):
    translator = str.maketrans('', '', string.punctuation)
    sentence = sentence.translate(translator)

    # Remove line breaks
    sentence = sentence.replace('\n', ' ').replace('\r', '')

    return sentence

def run_audio_generation_v2(new_text,accent='None'):
    tts = TTS()
    new_text =  new_text.replace('\n', ' ').replace('\r', '')
    new_text_mod = remove_punctuation(new_text)

    new_text_split = new_text_mod.split()
    for word in new_text_split:
        if len(word)>=2 and word.isupper():
            new_text = new_text.replace(word, " ".join([*word]))
            
    wav_file = tts.synthesise(
    text=new_text,
    spk_ref_path="./tmp/audio/speaker_wav.wav" # you can use any speaker reference file (WAV, OGG, MP3, FLAC, etc.)
    )     
    sf.write('audio/output.wav', wav_file, samplerate=22050)