tts / tests /aux_tests /test_numpy_transforms.py

Upload 542 files

127d53c almost 2 years ago

4.36 kB

	import math
	import os
	import unittest
	from dataclasses import dataclass

	import librosa
	import numpy as np
	from coqpit import Coqpit

	from tests import get_tests_input_path, get_tests_output_path, get_tests_path
	from TTS.utils.audio import numpy_transforms as np_transforms

	TESTS_PATH = get_tests_path()
	OUT_PATH = os.path.join(get_tests_output_path(), "audio_tests")
	WAV_FILE = os.path.join(get_tests_input_path(), "example_1.wav")

	os.makedirs(OUT_PATH, exist_ok=True)


	# pylint: disable=no-self-use


	class TestNumpyTransforms(unittest.TestCase):
	def setUp(self) -> None:
	@dataclass
	class AudioConfig(Coqpit):
	sample_rate: int = 22050
	fft_size: int = 1024
	num_mels: int = 256
	mel_fmax: int = 1800
	mel_fmin: int = 0
	hop_length: int = 256
	win_length: int = 1024
	pitch_fmax: int = 640
	pitch_fmin: int = 1
	trim_db: int = -1
	min_silence_sec: float = 0.01
	gain: float = 1.0
	base: float = 10.0

	self.config = AudioConfig()
	self.sample_wav, _ = librosa.load(WAV_FILE, sr=self.config.sample_rate)

	def test_build_mel_basis(self):
	"""Check if the mel basis is correctly built"""
	print(" > Testing mel basis building.")
	mel_basis = np_transforms.build_mel_basis(**self.config)
	self.assertEqual(mel_basis.shape, (self.config.num_mels, self.config.fft_size // 2 + 1))

	def test_millisec_to_length(self):
	"""Check if the conversion from milliseconds to length is correct"""
	print(" > Testing millisec to length conversion.")
	win_len, hop_len = np_transforms.millisec_to_length(
	frame_length_ms=1000, frame_shift_ms=12.5, sample_rate=self.config.sample_rate
	)
	self.assertEqual(hop_len, int(12.5 / 1000.0 * self.config.sample_rate))
	self.assertEqual(win_len, self.config.sample_rate)

	def test_amplitude_db_conversion(self):
	di = np.random.rand(11)
	o1 = np_transforms.amp_to_db(x=di, gain=1.0, base=10)
	o2 = np_transforms.db_to_amp(x=o1, gain=1.0, base=10)
	np.testing.assert_almost_equal(di, o2, decimal=5)

	def test_preemphasis_deemphasis(self):
	di = np.random.rand(11)
	o1 = np_transforms.preemphasis(x=di, coeff=0.95)
	o2 = np_transforms.deemphasis(x=o1, coeff=0.95)
	np.testing.assert_almost_equal(di, o2, decimal=5)

	def test_spec_to_mel(self):
	mel_basis = np_transforms.build_mel_basis(**self.config)
	spec = np.random.rand(self.config.fft_size // 2 + 1, 20) # [C, T]
	mel = np_transforms.spec_to_mel(spec=spec, mel_basis=mel_basis)
	self.assertEqual(mel.shape, (self.config.num_mels, 20))

	def mel_to_spec(self):
	mel_basis = np_transforms.build_mel_basis(**self.config)
	mel = np.random.rand(self.config.num_mels, 20) # [C, T]
	spec = np_transforms.mel_to_spec(mel=mel, mel_basis=mel_basis)
	self.assertEqual(spec.shape, (self.config.fft_size // 2 + 1, 20))

	def test_wav_to_spec(self):
	spec = np_transforms.wav_to_spec(wav=self.sample_wav, **self.config)
	self.assertEqual(
	spec.shape, (self.config.fft_size // 2 + 1, math.ceil(self.sample_wav.shape[0] / self.config.hop_length))
	)

	def test_wav_to_mel(self):
	mel_basis = np_transforms.build_mel_basis(**self.config)
	mel = np_transforms.wav_to_mel(wav=self.sample_wav, mel_basis=mel_basis, **self.config)
	self.assertEqual(
	mel.shape, (self.config.num_mels, math.ceil(self.sample_wav.shape[0] / self.config.hop_length))
	)

	def test_compute_f0(self):
	pitch = np_transforms.compute_f0(x=self.sample_wav, **self.config)
	mel_basis = np_transforms.build_mel_basis(**self.config)
	mel = np_transforms.wav_to_mel(wav=self.sample_wav, mel_basis=mel_basis, **self.config)
	assert pitch.shape[0] == mel.shape[1]

	def test_load_wav(self):
	wav = np_transforms.load_wav(filename=WAV_FILE, resample=False, sample_rate=22050)
	wav_resample = np_transforms.load_wav(filename=WAV_FILE, resample=True, sample_rate=16000)
	self.assertEqual(wav.shape, (self.sample_wav.shape[0],))
	self.assertNotEqual(wav_resample.shape, (self.sample_wav.shape[0],))