Spaces:
Sleeping
Sleeping
To compute spectrograms and resample audio wave
Browse files- tasks/lib/__init__.py +0 -0
- tasks/lib/preprocessing.py +8 -0
- tasks/lib/spectrogram.py +37 -0
tasks/lib/__init__.py
ADDED
File without changes
|
tasks/lib/preprocessing.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import librosa
|
2 |
+
import numpy as np
|
3 |
+
|
4 |
+
|
5 |
+
def resample_audio(raw_wave: np.ndarray, orig_sr: int, target_sr: int) -> np.ndarray:
|
6 |
+
if orig_sr == target_sr:
|
7 |
+
return raw_wave
|
8 |
+
return librosa.resample(raw_wave, orig_sr=orig_sr, target_sr=target_sr)
|
tasks/lib/spectrogram.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from math import log2
|
2 |
+
|
3 |
+
import librosa
|
4 |
+
import numpy as np
|
5 |
+
|
6 |
+
|
7 |
+
def _get_n_fft(freq_res_hz: int, sr: int) -> int:
|
8 |
+
"""
|
9 |
+
:freq_res: frequency resolution in Hz = sample_rate / n_fft
|
10 |
+
how good you can differentiate between frequency components
|
11 |
+
which are at least ‘this’ amount far apart.
|
12 |
+
:sr: sampling_rate
|
13 |
+
|
14 |
+
The n_fft specifies the FFT length, i.e. the number of bins.
|
15 |
+
Low frequencies are more distinguishable when n_fft is higher.
|
16 |
+
For computational reason n_fft is a power of 2 (2, 4, 8, 16, ...)
|
17 |
+
"""
|
18 |
+
return 2 ** round(log2(sr / freq_res_hz))
|
19 |
+
|
20 |
+
|
21 |
+
def get_spectrogram_dB(
|
22 |
+
raw_wave: np.ndarray, freq_res_hz: int = 5, sr: int = 12000
|
23 |
+
) -> np.ndarray:
|
24 |
+
spectrogram_complex = librosa.stft(y=raw_wave, n_fft=_get_n_fft(freq_res_hz, sr))
|
25 |
+
spectrogram_amplitude = np.abs(spectrogram_complex)
|
26 |
+
return librosa.amplitude_to_db(spectrogram_amplitude, ref=np.max)
|
27 |
+
|
28 |
+
|
29 |
+
def get_mel_spectrogram_dB(
|
30 |
+
raw_wave: np.ndarray, freq_res_hz: int = 5, sr: int = 12000
|
31 |
+
) -> np.ndarray:
|
32 |
+
spectrogram_complex = librosa.stft(y=raw_wave, n_fft=_get_n_fft(freq_res_hz, sr))
|
33 |
+
spectrogram_amplitude = np.abs(spectrogram_complex)
|
34 |
+
mel_scale_sepctrogram = librosa.feature.melspectrogram(
|
35 |
+
S=spectrogram_amplitude, sr=sr
|
36 |
+
)
|
37 |
+
return librosa.amplitude_to_db(mel_scale_sepctrogram, ref=np.max)
|