audio-electroma

Sleeping

App Files Files Community

electroma commited on 6 days ago

Commit

6dacae5

verified ·

1 Parent(s): 848403b

xgboost api (#5)

Browse files

- add mel spectogram feature (6d851876ab8f3b584824636afac3a94d77e3ce02)
- try first model(too long) (64fbdbfb86cc7f5ac0fd93d0650f035594991c55)
- basic mel spect + cnn model (aa8a37e3bd48b2c0ddba879f02f48054d0b8961d)
- some cleaning (d24b66319978f9a6d14ff29761e63610f0572c5d)
- implement XGBOOST (65f47b2dbf30c1215911b919585b8b419d93ca02)
- save train model xgboost as a pickle file for the api (9bb283c82081df1ce133f09639d80178743cb5e2)
- prepare API (69a1f9bdac532d7bf55ff179416a546a0700d51f)

Files changed (5) hide show

notebooks/template-audio.ipynb +0 -0
requirements.txt +6 -1
tasks/audio.py +46 -6
tasks/utils/preprocess.py +42 -0
train_models/xgboost_audio_model.pkl +0 -0

notebooks/template-audio.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt CHANGED Viewed

@@ -8,4 +8,9 @@ python-dotenv>=1.0.0
 gradio>=4.0.0
 requests>=2.31.0
 librosa==0.10.2.post1
-ipywidgets==8.1.5

 gradio>=4.0.0
 requests>=2.31.0
 librosa==0.10.2.post1
+ipywidgets==8.1.5
+seaborn==0.13.2
+tensorflow==2.17.0
+torch==2.5.1
+torchvision==0.20.1
+xgboost==2.1.3

tasks/audio.py CHANGED Viewed

@@ -2,11 +2,17 @@ from fastapi import APIRouter
 from datetime import datetime
 from datasets import load_dataset
 from sklearn.metrics import accuracy_score
 import random
 import os
 from .utils.evaluation import AudioEvaluationRequest
 from .utils.emissions import tracker, clean_emissions_data, get_space_info
 from dotenv import load_dotenv
 load_dotenv()
@@ -41,8 +47,30 @@ async def evaluate_audio(request: AudioEvaluationRequest):
     dataset = load_dataset(request.dataset_name,token=os.getenv("HF_TOKEN"))
     # Split dataset
-    train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
-    test_dataset = train_test["test"]
     # Start tracking emissions
     tracker.start()
@@ -54,9 +82,21 @@ async def evaluate_audio(request: AudioEvaluationRequest):
     #--------------------------------------------------------------------------------------------
     # Make random predictions (placeholder for actual model inference)
-    true_labels = test_dataset["label"]
-    predictions = [random.randint(0, 1) for _ in range(len(true_labels))]
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE
     #--------------------------------------------------------------------------------------------
@@ -65,7 +105,7 @@ async def evaluate_audio(request: AudioEvaluationRequest):
     emissions_data = tracker.stop_task()
     # Calculate accuracy
-    accuracy = accuracy_score(true_labels, predictions)
     # Prepare results dictionary
     results = {

 from datetime import datetime
 from datasets import load_dataset
 from sklearn.metrics import accuracy_score
+import pandas as pd
+import numpy as np
+import pickle
+import xgboost
 import random
 import os
 from .utils.evaluation import AudioEvaluationRequest
 from .utils.emissions import tracker, clean_emissions_data, get_space_info
+from .utils.preprocess import resample_audio, create_mel_spectrogram
 from dotenv import load_dotenv
 load_dotenv()
     dataset = load_dataset(request.dataset_name,token=os.getenv("HF_TOKEN"))
     # Split dataset
+    train = dataset["train"]
+    test = dataset["test"]
+    #preprocess data: resample data to be on the same sampling rate
+    target_sr = 12000
+    test_df = pd.DataFrame(test)
+    test_df["array"] = test_df["audio"].apply(lambda x: x['array'])
+    test_df["sampling_rate"] = test_df["audio"].apply(lambda x: x['sampling_rate'])
+    test_df["resampled_array"] = test_df.apply(
+        lambda row: resample_audio(row["array"], row["sampling_rate"], target_sr=target_sr), axis=1
+    )
+    test_df["sampling_rate"] = target_sr
+    features = []
+    for idx, row in test_df.iterrows():
+        features.append(create_mel_spectrogram(row['resampled_array'], row['sampling_rate']))
+    # Convert features to a numpy array and add to the DataFrame
+    test_df['basic_melspect'] = features
+    # Filter on samples with the same mel spectogram shape
+    test_df["shape"] = test_df['basic_melspect'].apply(lambda x: x.shape[1])
+    test_df = test_df[test_df["shape"]==71]
     # Start tracking emissions
     tracker.start()
     #--------------------------------------------------------------------------------------------
     # Make random predictions (placeholder for actual model inference)
+    with open("./train_models/xgboost_audio_model.pkl", "rb") as f:
+        loaded_model = pickle.load(f)
+    # Flatten Mel Spectrograms into 1D Features
+    test_df["flattened_mel"] = test_df["basic_melspect"].apply(lambda x: x.flatten())
+    # Convert to NumPy arrays
+    X = np.stack(test_df["flattened_mel"].values)  # Features
+    y = test_df["label"].values  # Labels (0: chainsaw, 1: rainforest)
+    dtest = xgboost.DMatrix(X, label=y)
+    # Make Predictions
+    y_pred_probs = loaded_model.predict(dtest)
+    y_pred = (y_pred_probs > 0.5).astype(int)  # Convert probabilities to binary labels
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE
     #--------------------------------------------------------------------------------------------
     emissions_data = tracker.stop_task()
     # Calculate accuracy
+    accuracy = accuracy_score(y, y_pred)
     # Prepare results dictionary
     results = {

tasks/utils/preprocess.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import librosa
+import numpy as np
+# Function to resample the audio array
+def resample_audio(array, orig_sr, target_sr):
+    array = np.array(array)  # Ensure it's a numpy array
+    if orig_sr != target_sr:
+        array = librosa.resample(array, orig_sr=orig_sr, target_sr=target_sr)
+    return array
+def create_mel_spectrogram(waveform, sr, n_mels=128, n_fft=2048, hop_length=512):
+    """
+    Create a Mel spectrogram from a waveform.
+    Args:
+        waveform (np.ndarray): 1D NumPy array of the audio waveform.
+        sr (int): Sampling rate of the waveform.
+        n_mels (int): Number of Mel bands to generate.
+        n_fft (int): Length of the FFT window.
+        hop_length (int): Number of samples between successive frames.
+    Returns:
+        np.ndarray: 2D NumPy array of the Mel spectrogram (shape: [n_mels, time]).
+    """
+    # Create Mel spectrogram
+    mel_spectrogram = librosa.feature.melspectrogram(
+        y=waveform,
+        sr=sr,
+        n_fft=n_fft,
+        hop_length=hop_length,
+        n_mels=n_mels
+    )
+    # Convert power spectrogram (amplitude squared) to decibel (log scale)
+    mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
+    # Ensure consistent length for each feature
+    #log_mel_spec = librosa.util.fix_length(log_mel_spec, 1300)
+    return mel_spectrogram_db

train_models/xgboost_audio_model.pkl ADDED Viewed

Binary file (416 kB). View file