from fam.llm.fast_inference import TTS import string import soundfile as sf def remove_punctuation(sentence): translator = str.maketrans('', '', string.punctuation) sentence = sentence.translate(translator) # Remove line breaks sentence = sentence.replace('\n', ' ').replace('\r', '') return sentence def run_audio_generation_v2(new_text,accent='None'): tts = TTS() new_text = new_text.replace('\n', ' ').replace('\r', '') new_text_mod = remove_punctuation(new_text) new_text_split = new_text_mod.split() for word in new_text_split: if len(word)>=2 and word.isupper(): new_text = new_text.replace(word, " ".join([*word])) wav_file = tts.synthesise( text=new_text, spk_ref_path="./tmp/audio/speaker_wav.wav" # you can use any speaker reference file (WAV, OGG, MP3, FLAC, etc.) ) sf.write('audio/output.wav', wav_file, samplerate=22050)