Spaces:
Sleeping
Sleeping
xgboost api (#5)
Browse files- add mel spectogram feature (6d851876ab8f3b584824636afac3a94d77e3ce02)
- try first model(too long) (64fbdbfb86cc7f5ac0fd93d0650f035594991c55)
- basic mel spect + cnn model (aa8a37e3bd48b2c0ddba879f02f48054d0b8961d)
- some cleaning (d24b66319978f9a6d14ff29761e63610f0572c5d)
- implement XGBOOST (65f47b2dbf30c1215911b919585b8b419d93ca02)
- save train model xgboost as a pickle file for the api (9bb283c82081df1ce133f09639d80178743cb5e2)
- prepare API (69a1f9bdac532d7bf55ff179416a546a0700d51f)
- notebooks/template-audio.ipynb +0 -0
- requirements.txt +6 -1
- tasks/audio.py +46 -6
- tasks/utils/preprocess.py +42 -0
- train_models/xgboost_audio_model.pkl +0 -0
notebooks/template-audio.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
CHANGED
@@ -8,4 +8,9 @@ python-dotenv>=1.0.0
|
|
8 |
gradio>=4.0.0
|
9 |
requests>=2.31.0
|
10 |
librosa==0.10.2.post1
|
11 |
-
ipywidgets==8.1.5
|
|
|
|
|
|
|
|
|
|
|
|
8 |
gradio>=4.0.0
|
9 |
requests>=2.31.0
|
10 |
librosa==0.10.2.post1
|
11 |
+
ipywidgets==8.1.5
|
12 |
+
seaborn==0.13.2
|
13 |
+
tensorflow==2.17.0
|
14 |
+
torch==2.5.1
|
15 |
+
torchvision==0.20.1
|
16 |
+
xgboost==2.1.3
|
tasks/audio.py
CHANGED
@@ -2,11 +2,17 @@ from fastapi import APIRouter
|
|
2 |
from datetime import datetime
|
3 |
from datasets import load_dataset
|
4 |
from sklearn.metrics import accuracy_score
|
|
|
|
|
|
|
|
|
|
|
5 |
import random
|
6 |
import os
|
7 |
|
8 |
from .utils.evaluation import AudioEvaluationRequest
|
9 |
from .utils.emissions import tracker, clean_emissions_data, get_space_info
|
|
|
10 |
|
11 |
from dotenv import load_dotenv
|
12 |
load_dotenv()
|
@@ -41,8 +47,30 @@ async def evaluate_audio(request: AudioEvaluationRequest):
|
|
41 |
dataset = load_dataset(request.dataset_name,token=os.getenv("HF_TOKEN"))
|
42 |
|
43 |
# Split dataset
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
# Start tracking emissions
|
48 |
tracker.start()
|
@@ -54,9 +82,21 @@ async def evaluate_audio(request: AudioEvaluationRequest):
|
|
54 |
#--------------------------------------------------------------------------------------------
|
55 |
|
56 |
# Make random predictions (placeholder for actual model inference)
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
#--------------------------------------------------------------------------------------------
|
61 |
# YOUR MODEL INFERENCE STOPS HERE
|
62 |
#--------------------------------------------------------------------------------------------
|
@@ -65,7 +105,7 @@ async def evaluate_audio(request: AudioEvaluationRequest):
|
|
65 |
emissions_data = tracker.stop_task()
|
66 |
|
67 |
# Calculate accuracy
|
68 |
-
accuracy = accuracy_score(
|
69 |
|
70 |
# Prepare results dictionary
|
71 |
results = {
|
|
|
2 |
from datetime import datetime
|
3 |
from datasets import load_dataset
|
4 |
from sklearn.metrics import accuracy_score
|
5 |
+
import pandas as pd
|
6 |
+
import numpy as np
|
7 |
+
import pickle
|
8 |
+
import xgboost
|
9 |
+
|
10 |
import random
|
11 |
import os
|
12 |
|
13 |
from .utils.evaluation import AudioEvaluationRequest
|
14 |
from .utils.emissions import tracker, clean_emissions_data, get_space_info
|
15 |
+
from .utils.preprocess import resample_audio, create_mel_spectrogram
|
16 |
|
17 |
from dotenv import load_dotenv
|
18 |
load_dotenv()
|
|
|
47 |
dataset = load_dataset(request.dataset_name,token=os.getenv("HF_TOKEN"))
|
48 |
|
49 |
# Split dataset
|
50 |
+
train = dataset["train"]
|
51 |
+
test = dataset["test"]
|
52 |
+
|
53 |
+
#preprocess data: resample data to be on the same sampling rate
|
54 |
+
target_sr = 12000
|
55 |
+
test_df = pd.DataFrame(test)
|
56 |
+
test_df["array"] = test_df["audio"].apply(lambda x: x['array'])
|
57 |
+
test_df["sampling_rate"] = test_df["audio"].apply(lambda x: x['sampling_rate'])
|
58 |
+
test_df["resampled_array"] = test_df.apply(
|
59 |
+
lambda row: resample_audio(row["array"], row["sampling_rate"], target_sr=target_sr), axis=1
|
60 |
+
)
|
61 |
+
test_df["sampling_rate"] = target_sr
|
62 |
+
|
63 |
+
features = []
|
64 |
+
for idx, row in test_df.iterrows():
|
65 |
+
features.append(create_mel_spectrogram(row['resampled_array'], row['sampling_rate']))
|
66 |
+
|
67 |
+
# Convert features to a numpy array and add to the DataFrame
|
68 |
+
test_df['basic_melspect'] = features
|
69 |
+
|
70 |
+
# Filter on samples with the same mel spectogram shape
|
71 |
+
test_df["shape"] = test_df['basic_melspect'].apply(lambda x: x.shape[1])
|
72 |
+
test_df = test_df[test_df["shape"]==71]
|
73 |
+
|
74 |
|
75 |
# Start tracking emissions
|
76 |
tracker.start()
|
|
|
82 |
#--------------------------------------------------------------------------------------------
|
83 |
|
84 |
# Make random predictions (placeholder for actual model inference)
|
85 |
+
with open("./train_models/xgboost_audio_model.pkl", "rb") as f:
|
86 |
+
loaded_model = pickle.load(f)
|
87 |
+
|
88 |
+
# Flatten Mel Spectrograms into 1D Features
|
89 |
+
test_df["flattened_mel"] = test_df["basic_melspect"].apply(lambda x: x.flatten())
|
90 |
+
|
91 |
+
# Convert to NumPy arrays
|
92 |
+
X = np.stack(test_df["flattened_mel"].values) # Features
|
93 |
+
y = test_df["label"].values # Labels (0: chainsaw, 1: rainforest)
|
94 |
+
|
95 |
+
dtest = xgboost.DMatrix(X, label=y)
|
96 |
+
# Make Predictions
|
97 |
+
y_pred_probs = loaded_model.predict(dtest)
|
98 |
+
y_pred = (y_pred_probs > 0.5).astype(int) # Convert probabilities to binary labels
|
99 |
+
|
100 |
#--------------------------------------------------------------------------------------------
|
101 |
# YOUR MODEL INFERENCE STOPS HERE
|
102 |
#--------------------------------------------------------------------------------------------
|
|
|
105 |
emissions_data = tracker.stop_task()
|
106 |
|
107 |
# Calculate accuracy
|
108 |
+
accuracy = accuracy_score(y, y_pred)
|
109 |
|
110 |
# Prepare results dictionary
|
111 |
results = {
|
tasks/utils/preprocess.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import librosa
|
2 |
+
import numpy as np
|
3 |
+
|
4 |
+
|
5 |
+
# Function to resample the audio array
|
6 |
+
def resample_audio(array, orig_sr, target_sr):
|
7 |
+
array = np.array(array) # Ensure it's a numpy array
|
8 |
+
if orig_sr != target_sr:
|
9 |
+
array = librosa.resample(array, orig_sr=orig_sr, target_sr=target_sr)
|
10 |
+
return array
|
11 |
+
|
12 |
+
|
13 |
+
def create_mel_spectrogram(waveform, sr, n_mels=128, n_fft=2048, hop_length=512):
|
14 |
+
"""
|
15 |
+
Create a Mel spectrogram from a waveform.
|
16 |
+
|
17 |
+
Args:
|
18 |
+
waveform (np.ndarray): 1D NumPy array of the audio waveform.
|
19 |
+
sr (int): Sampling rate of the waveform.
|
20 |
+
n_mels (int): Number of Mel bands to generate.
|
21 |
+
n_fft (int): Length of the FFT window.
|
22 |
+
hop_length (int): Number of samples between successive frames.
|
23 |
+
|
24 |
+
Returns:
|
25 |
+
np.ndarray: 2D NumPy array of the Mel spectrogram (shape: [n_mels, time]).
|
26 |
+
"""
|
27 |
+
# Create Mel spectrogram
|
28 |
+
mel_spectrogram = librosa.feature.melspectrogram(
|
29 |
+
y=waveform,
|
30 |
+
sr=sr,
|
31 |
+
n_fft=n_fft,
|
32 |
+
hop_length=hop_length,
|
33 |
+
n_mels=n_mels
|
34 |
+
)
|
35 |
+
|
36 |
+
# Convert power spectrogram (amplitude squared) to decibel (log scale)
|
37 |
+
mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
|
38 |
+
|
39 |
+
# Ensure consistent length for each feature
|
40 |
+
#log_mel_spec = librosa.util.fix_length(log_mel_spec, 1300)
|
41 |
+
|
42 |
+
return mel_spectrogram_db
|
train_models/xgboost_audio_model.pkl
ADDED
Binary file (416 kB). View file
|
|