Spaces:

atsushieee
/

improvisation-lab

Sleeping

App Files Files Community

improvisation-lab / improvisation_lab /domain /analysis /pitch_detector.py

atsushieee

Upload folder using huggingface_hub

c1e08a0 verified 2 months ago

raw

history blame

2.1 kB

	"""PitchDetector class for real-time pitch detection using FCPE."""

	import numpy as np
	import torch
	from torchfcpe import spawn_bundled_infer_model

	from improvisation_lab.config import PitchDetectorConfig


	class PitchDetector:
	"""Class for real-time pitch detection using FCPE."""

	def __init__(self, config: PitchDetectorConfig):
	"""Initialize pitch detector.

	Args:
	config: Configuration settings for pitch detection.
	"""
	self.sample_rate = config.sample_rate
	self.hop_length = config.hop_length
	self.decoder_mode = config.decoder_mode
	self.threshold = config.threshold
	self.f0_min = config.f0_min
	self.f0_max = config.f0_max
	self.interp_uv = config.interp_uv
	self.model = spawn_bundled_infer_model(device=config.device)

	def detect_pitch(self, audio_frame: np.ndarray) -> float:
	"""Detect pitch from audio frame.

	Args:
	audio_frame: Numpy array of audio samples

	Returns:
	Frequency in Hz
	"""
	audio_length = len(audio_frame)
	f0_target_length = (audio_length // self.hop_length) + 1

	# Convert to torch tensor and reshape to match expected dimensions
	# Add batch and channel dimensions
	audio_tensor = torch.from_numpy(audio_frame).float()
	audio_tensor = audio_tensor.unsqueeze(0).unsqueeze(-1)

	pitch = self.model.infer(
	audio_tensor,
	sr=self.sample_rate,
	decoder_mode=self.decoder_mode,
	threshold=self.threshold,
	f0_min=self.f0_min,
	f0_max=self.f0_max,
	interp_uv=self.interp_uv,
	output_interp_target_length=f0_target_length,
	)

	# Extract the middle frequency value from the pitch tensor
	# Taking the middle value helps avoid potential inaccuracies at the edges
	# of the audio frame, providing a more stable frequency estimate.
	middle_index = pitch.size(1) // 2
	frequency = pitch[0, middle_index, 0].item()
	return frequency