Arjea commited on
Commit
848403b
·
1 Parent(s): 04e024e

To compute spectrograms and resample audio wave

Browse files
tasks/lib/__init__.py ADDED
File without changes
tasks/lib/preprocessing.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import librosa
2
+ import numpy as np
3
+
4
+
5
+ def resample_audio(raw_wave: np.ndarray, orig_sr: int, target_sr: int) -> np.ndarray:
6
+ if orig_sr == target_sr:
7
+ return raw_wave
8
+ return librosa.resample(raw_wave, orig_sr=orig_sr, target_sr=target_sr)
tasks/lib/spectrogram.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from math import log2
2
+
3
+ import librosa
4
+ import numpy as np
5
+
6
+
7
+ def _get_n_fft(freq_res_hz: int, sr: int) -> int:
8
+ """
9
+ :freq_res: frequency resolution in Hz = sample_rate / n_fft
10
+ how good you can differentiate between frequency components
11
+ which are at least ‘this’ amount far apart.
12
+ :sr: sampling_rate
13
+
14
+ The n_fft specifies the FFT length, i.e. the number of bins.
15
+ Low frequencies are more distinguishable when n_fft is higher.
16
+ For computational reason n_fft is a power of 2 (2, 4, 8, 16, ...)
17
+ """
18
+ return 2 ** round(log2(sr / freq_res_hz))
19
+
20
+
21
+ def get_spectrogram_dB(
22
+ raw_wave: np.ndarray, freq_res_hz: int = 5, sr: int = 12000
23
+ ) -> np.ndarray:
24
+ spectrogram_complex = librosa.stft(y=raw_wave, n_fft=_get_n_fft(freq_res_hz, sr))
25
+ spectrogram_amplitude = np.abs(spectrogram_complex)
26
+ return librosa.amplitude_to_db(spectrogram_amplitude, ref=np.max)
27
+
28
+
29
+ def get_mel_spectrogram_dB(
30
+ raw_wave: np.ndarray, freq_res_hz: int = 5, sr: int = 12000
31
+ ) -> np.ndarray:
32
+ spectrogram_complex = librosa.stft(y=raw_wave, n_fft=_get_n_fft(freq_res_hz, sr))
33
+ spectrogram_amplitude = np.abs(spectrogram_complex)
34
+ mel_scale_sepctrogram = librosa.feature.melspectrogram(
35
+ S=spectrogram_amplitude, sr=sr
36
+ )
37
+ return librosa.amplitude_to_db(mel_scale_sepctrogram, ref=np.max)