electroma commited on
Commit
6dacae5
·
verified ·
1 Parent(s): 848403b

xgboost api (#5)

Browse files

- add mel spectogram feature (6d851876ab8f3b584824636afac3a94d77e3ce02)
- try first model(too long) (64fbdbfb86cc7f5ac0fd93d0650f035594991c55)
- basic mel spect + cnn model (aa8a37e3bd48b2c0ddba879f02f48054d0b8961d)
- some cleaning (d24b66319978f9a6d14ff29761e63610f0572c5d)
- implement XGBOOST (65f47b2dbf30c1215911b919585b8b419d93ca02)
- save train model xgboost as a pickle file for the api (9bb283c82081df1ce133f09639d80178743cb5e2)
- prepare API (69a1f9bdac532d7bf55ff179416a546a0700d51f)

notebooks/template-audio.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
requirements.txt CHANGED
@@ -8,4 +8,9 @@ python-dotenv>=1.0.0
8
  gradio>=4.0.0
9
  requests>=2.31.0
10
  librosa==0.10.2.post1
11
- ipywidgets==8.1.5
 
 
 
 
 
 
8
  gradio>=4.0.0
9
  requests>=2.31.0
10
  librosa==0.10.2.post1
11
+ ipywidgets==8.1.5
12
+ seaborn==0.13.2
13
+ tensorflow==2.17.0
14
+ torch==2.5.1
15
+ torchvision==0.20.1
16
+ xgboost==2.1.3
tasks/audio.py CHANGED
@@ -2,11 +2,17 @@ from fastapi import APIRouter
2
  from datetime import datetime
3
  from datasets import load_dataset
4
  from sklearn.metrics import accuracy_score
 
 
 
 
 
5
  import random
6
  import os
7
 
8
  from .utils.evaluation import AudioEvaluationRequest
9
  from .utils.emissions import tracker, clean_emissions_data, get_space_info
 
10
 
11
  from dotenv import load_dotenv
12
  load_dotenv()
@@ -41,8 +47,30 @@ async def evaluate_audio(request: AudioEvaluationRequest):
41
  dataset = load_dataset(request.dataset_name,token=os.getenv("HF_TOKEN"))
42
 
43
  # Split dataset
44
- train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
45
- test_dataset = train_test["test"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  # Start tracking emissions
48
  tracker.start()
@@ -54,9 +82,21 @@ async def evaluate_audio(request: AudioEvaluationRequest):
54
  #--------------------------------------------------------------------------------------------
55
 
56
  # Make random predictions (placeholder for actual model inference)
57
- true_labels = test_dataset["label"]
58
- predictions = [random.randint(0, 1) for _ in range(len(true_labels))]
59
-
 
 
 
 
 
 
 
 
 
 
 
 
60
  #--------------------------------------------------------------------------------------------
61
  # YOUR MODEL INFERENCE STOPS HERE
62
  #--------------------------------------------------------------------------------------------
@@ -65,7 +105,7 @@ async def evaluate_audio(request: AudioEvaluationRequest):
65
  emissions_data = tracker.stop_task()
66
 
67
  # Calculate accuracy
68
- accuracy = accuracy_score(true_labels, predictions)
69
 
70
  # Prepare results dictionary
71
  results = {
 
2
  from datetime import datetime
3
  from datasets import load_dataset
4
  from sklearn.metrics import accuracy_score
5
+ import pandas as pd
6
+ import numpy as np
7
+ import pickle
8
+ import xgboost
9
+
10
  import random
11
  import os
12
 
13
  from .utils.evaluation import AudioEvaluationRequest
14
  from .utils.emissions import tracker, clean_emissions_data, get_space_info
15
+ from .utils.preprocess import resample_audio, create_mel_spectrogram
16
 
17
  from dotenv import load_dotenv
18
  load_dotenv()
 
47
  dataset = load_dataset(request.dataset_name,token=os.getenv("HF_TOKEN"))
48
 
49
  # Split dataset
50
+ train = dataset["train"]
51
+ test = dataset["test"]
52
+
53
+ #preprocess data: resample data to be on the same sampling rate
54
+ target_sr = 12000
55
+ test_df = pd.DataFrame(test)
56
+ test_df["array"] = test_df["audio"].apply(lambda x: x['array'])
57
+ test_df["sampling_rate"] = test_df["audio"].apply(lambda x: x['sampling_rate'])
58
+ test_df["resampled_array"] = test_df.apply(
59
+ lambda row: resample_audio(row["array"], row["sampling_rate"], target_sr=target_sr), axis=1
60
+ )
61
+ test_df["sampling_rate"] = target_sr
62
+
63
+ features = []
64
+ for idx, row in test_df.iterrows():
65
+ features.append(create_mel_spectrogram(row['resampled_array'], row['sampling_rate']))
66
+
67
+ # Convert features to a numpy array and add to the DataFrame
68
+ test_df['basic_melspect'] = features
69
+
70
+ # Filter on samples with the same mel spectogram shape
71
+ test_df["shape"] = test_df['basic_melspect'].apply(lambda x: x.shape[1])
72
+ test_df = test_df[test_df["shape"]==71]
73
+
74
 
75
  # Start tracking emissions
76
  tracker.start()
 
82
  #--------------------------------------------------------------------------------------------
83
 
84
  # Make random predictions (placeholder for actual model inference)
85
+ with open("./train_models/xgboost_audio_model.pkl", "rb") as f:
86
+ loaded_model = pickle.load(f)
87
+
88
+ # Flatten Mel Spectrograms into 1D Features
89
+ test_df["flattened_mel"] = test_df["basic_melspect"].apply(lambda x: x.flatten())
90
+
91
+ # Convert to NumPy arrays
92
+ X = np.stack(test_df["flattened_mel"].values) # Features
93
+ y = test_df["label"].values # Labels (0: chainsaw, 1: rainforest)
94
+
95
+ dtest = xgboost.DMatrix(X, label=y)
96
+ # Make Predictions
97
+ y_pred_probs = loaded_model.predict(dtest)
98
+ y_pred = (y_pred_probs > 0.5).astype(int) # Convert probabilities to binary labels
99
+
100
  #--------------------------------------------------------------------------------------------
101
  # YOUR MODEL INFERENCE STOPS HERE
102
  #--------------------------------------------------------------------------------------------
 
105
  emissions_data = tracker.stop_task()
106
 
107
  # Calculate accuracy
108
+ accuracy = accuracy_score(y, y_pred)
109
 
110
  # Prepare results dictionary
111
  results = {
tasks/utils/preprocess.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import librosa
2
+ import numpy as np
3
+
4
+
5
+ # Function to resample the audio array
6
+ def resample_audio(array, orig_sr, target_sr):
7
+ array = np.array(array) # Ensure it's a numpy array
8
+ if orig_sr != target_sr:
9
+ array = librosa.resample(array, orig_sr=orig_sr, target_sr=target_sr)
10
+ return array
11
+
12
+
13
+ def create_mel_spectrogram(waveform, sr, n_mels=128, n_fft=2048, hop_length=512):
14
+ """
15
+ Create a Mel spectrogram from a waveform.
16
+
17
+ Args:
18
+ waveform (np.ndarray): 1D NumPy array of the audio waveform.
19
+ sr (int): Sampling rate of the waveform.
20
+ n_mels (int): Number of Mel bands to generate.
21
+ n_fft (int): Length of the FFT window.
22
+ hop_length (int): Number of samples between successive frames.
23
+
24
+ Returns:
25
+ np.ndarray: 2D NumPy array of the Mel spectrogram (shape: [n_mels, time]).
26
+ """
27
+ # Create Mel spectrogram
28
+ mel_spectrogram = librosa.feature.melspectrogram(
29
+ y=waveform,
30
+ sr=sr,
31
+ n_fft=n_fft,
32
+ hop_length=hop_length,
33
+ n_mels=n_mels
34
+ )
35
+
36
+ # Convert power spectrogram (amplitude squared) to decibel (log scale)
37
+ mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
38
+
39
+ # Ensure consistent length for each feature
40
+ #log_mel_spec = librosa.util.fix_length(log_mel_spec, 1300)
41
+
42
+ return mel_spectrogram_db
train_models/xgboost_audio_model.pkl ADDED
Binary file (416 kB). View file