File size: 1,715 Bytes
46a25e1 5092159 46a25e1 f023da7 46a25e1 f023da7 46a25e1 9232118 f023da7 bb14413 46a25e1 f023da7 46a25e1 f023da7 46a25e1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import os
import noisereduce as nr
import soundfile as sf
# from moviepy.editor import *
import string
import json
from glob import glob
import torchaudio
import subprocess
import shutil
import pyloudnorm as pyln
import torch
from TTS.api import TTS
import string
def remove_punctuation(sentence):
translator = str.maketrans('', '', string.punctuation)
sentence = sentence.translate(translator)
# Remove line breaks
sentence = sentence.replace('\n', ' ').replace('\r', '')
return sentence
def run_audio_generation_v1(new_text,accent='None'):
new_text = new_text.replace('\n', ' ').replace('\r', '')
new_text_mod = remove_punctuation(new_text)
new_text_split = new_text_mod.split()
for word in new_text_split:
if len(word)>=2 and word.isupper():
new_text = new_text.replace(word, " ".join([*word]))
models = TTS().list_models()
with open('models.txt', 'w') as f:
f.writelines(f"{model}\n" for model in models)
gpu = True if torch.cuda.is_available() else False
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=gpu) # gpu should be true when server (cuda)
# if not gpu:
# pre-process story audio file
# convert to 16 bit mono
# remove noise
speaker_wav_data, speaker_wav_rate = sf.read("./tmp/audio/input_src/0.wav")
speaker_wav_data_no_noise = nr.reduce_noise(y=speaker_wav_data, sr=speaker_wav_rate)
sf.write('./tmp/audio/speaker_wav.wav', speaker_wav_data_no_noise, speaker_wav_rate, subtype='PCM_16')
tts.tts_to_file(
new_text,
speaker_wav="./tmp/audio/speaker_wav.wav",
language="en",
file_path="audio/output.wav"
) |