|
import argparse
|
|
import os
|
|
import sys
|
|
import tempfile
|
|
|
|
import librosa.display
|
|
import numpy as np
|
|
|
|
import os
|
|
import torch
|
|
import torchaudio
|
|
import traceback
|
|
from TTS.demos.xtts_ft_demo.utils.formatter import format_audio_list
|
|
from TTS.demos.xtts_ft_demo.utils.gpt_train import train_gpt
|
|
|
|
from TTS.tts.configs.xtts_config import XttsConfig
|
|
from TTS.tts.models.xtts import Xtts
|
|
|
|
|
|
def clear_gpu_cache():
|
|
|
|
if torch.cuda.is_available():
|
|
torch.cuda.empty_cache()
|
|
|
|
|
|
def preprocess_dataset(audio_path, language, out_path):
|
|
"""
|
|
Prepara los datos de audio para el entrenamiento del modelo.
|
|
|
|
Args:
|
|
audio_path (list): Lista de rutas de los archivos de audio.
|
|
language (str): C贸digo del idioma del dataset.
|
|
out_path (str): Ruta de salida para el dataset procesado.
|
|
|
|
Returns:
|
|
tuple: Tupla con las rutas de los archivos CSV de entrenamiento y evaluaci贸n.
|
|
"""
|
|
out_path = os.path.join(out_path, "dataset")
|
|
os.makedirs(out_path, exist_ok=True)
|
|
train_meta, eval_meta, _ = format_audio_list(audio_path, target_language=language, out_path=out_path)
|
|
train_csv = os.path.join(out_path, "train.csv")
|
|
eval_csv = os.path.join(out_path, "eval.csv")
|
|
return train_csv, eval_csv
|
|
|
|
def main(dataset_path, output_path, language):
|
|
|
|
audio_path = dataset_path
|
|
language = language
|
|
out_path = output_path
|
|
|
|
|
|
train_csv, eval_csv = preprocess_dataset(audio_path.split(), language, out_path)
|
|
|
|
print(f"Los archivos CSV se han creado en: {out_path}")
|
|
print(f"train.csv: {train_csv}")
|
|
print(f"eval.csv: {eval_csv}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--dataset_path", type=str, required=True, help="Ruta del dataset de audio")
|
|
parser.add_argument("--output_path", type=str, required=True, help="Ruta de salida para el dataset procesado")
|
|
parser.add_argument("--language", type=str, required=True, help="Idioma del dataset")
|
|
args = parser.parse_args()
|
|
|
|
main(args.dataset_path, args.output_path, args.language) |