|
import os |
|
from pydub import AudioSegment |
|
from pydub.silence import detect_silence |
|
import glob |
|
from tqdm import tqdm |
|
import numpy as np |
|
|
|
def detect_and_trim_silence(audio_array, frame_rate, min_silence_duration=1000, silence_threshold=-40): |
|
|
|
audio_array = audio_array.detach().cpu().numpy() |
|
audio_bytes = (audio_array * 32767).astype(np.int16).tobytes() |
|
|
|
|
|
audio = AudioSegment( |
|
data=audio_bytes, |
|
sample_width=2, |
|
frame_rate=frame_rate, |
|
channels=1 |
|
) |
|
|
|
|
|
silence_intervals = detect_silence( |
|
audio, |
|
min_silence_len=min_silence_duration, |
|
silence_thresh=silence_threshold |
|
) |
|
|
|
|
|
silence_intervals_seconds = [(start / 1000, end / 1000) for start, end in silence_intervals] |
|
|
|
first_silence_end = silence_intervals_seconds[0][1] |
|
|
|
trimmed_audio = audio_array[:,:,int(first_silence_end * frame_rate):] |
|
|
|
|
|
return trimmed_audio |
|
|
|
def process_all_audio_files(root_dir, output_dir): |
|
|
|
wav_files = glob.glob(os.path.join(root_dir, '**', '*.wav'), recursive=True) |
|
for input_path in tqdm(wav_files, desc="Processing audio files"): |
|
relative_path = os.path.relpath(input_path, root_dir) |
|
output_dir = os.path.join(output_dir, os.path.dirname(relative_path)) |
|
os.makedirs(output_dir, exist_ok=True) |
|
output_path = os.path.join(output_dir, os.path.basename(input_path)) |
|
detect_and_trim_silence(input_path, output_path) |
|
|
|
|
|
if __name__ == "__main__": |
|
root_directory = "/workspace/tezuesh/omega-v2v/.predictions_warmup/moshi/audio/" |
|
output_directory = "/workspace/tezuesh/omega-v2v/.predictions_warmup/moshi/trimmed/" |
|
process_all_audio_files(root_directory, output_directory) |
|
|
|
|