-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
1,937 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,34 @@ | ||
# MacaW | ||
# MacaW | ||
|
||
## Rainforest Connection Species Sound Classification | ||
|
||
--- | ||
|
||
**NOTE** | ||
|
||
This repo is under continious updates, so the pyscripts are being changed continiously. Have a clone if you need | ||
|
||
--- | ||
|
||
### Dataset | ||
|
||
The dataset has been provided with this kaggle competitions with guidelines to be followed. | ||
If you want to use this dataset, please refer kaggle forums, adher to the rules and regulations listed there. | ||
|
||
### Exploratory Data Analysis | ||
|
||
I have carried out the EDA part of this project using Tableau and the link to the vizs <a href="https://public.tableau.com/profile/thirumalai.kumar#!/vizhome/RFCX/RFCX-DASH">HERE</a>. | ||
|
||
If you want the insights and reports from these ongoing EDA, KIT with my <a href="https://docs.google.com/document/d/1jBp9GeHqm9p7PYN3CgWjFSBo1R2oDh7d-kK-olyIUXk/edit">GOOGLE DOC</a> | ||
|
||
|
||
### Models | ||
|
||
All my models have been constructed by refering to the <a href="https://arxiv.org/abs/1912.10211">PANN</a> research paper. | ||
|
||
|
||
|
||
|
||
- [x] Preprocessing Scripts | ||
- [x] Model Construction | ||
- [x] Training Pipeline |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
from torch.utils.data import Dataset,DataLoader | ||
import pandas as pd | ||
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold | ||
import numpy as np | ||
import h5py | ||
import librosa | ||
import cv2 | ||
import torch | ||
from tqdm import tqdm | ||
import os | ||
from albumentations import Compose | ||
from albumentations.pytorch import ToTensorV2 | ||
|
||
label_cols = ["species_"+str(m) for m in range(24)] | ||
|
||
def h5read(path): | ||
hfile = h5py.File(path,'r') | ||
return np.array(hfile.get('pixels')) | ||
|
||
def label_gen(): | ||
label_dict = {} | ||
files = data.recording_id.unique().tolist() | ||
for f in files: | ||
labels = np.zeros(24) | ||
sets = group.get_group(f) | ||
tmp = sets.species_id.unique() | ||
for i in tmp: | ||
labels[i] = 1. | ||
|
||
label_dict[f] = labels | ||
return label_dict | ||
|
||
|
||
class AudioData(Dataset): | ||
|
||
def __init__(self,records,targets,root_dir,transforms=None): | ||
|
||
self.root_dir = root_dir | ||
self.targets = targets | ||
self.records = records | ||
self.transforms = transforms | ||
|
||
#print(self.records) | ||
|
||
def __len__(self): | ||
return len(self.records) | ||
|
||
def __getitem__(self,idx): | ||
|
||
|
||
img_arr = h5read(os.path.join(self.root_dir,self.records[idx]+'.h5')) | ||
label = self.targets[idx] | ||
|
||
assert img_arr.shape[2] == 3 | ||
if self.transforms is not None: | ||
image = self.transforms(image=img_arr)['image'] | ||
|
||
|
||
|
||
return image,torch.Tensor(label) | ||
|
||
|
||
img = h5read("/home/lustbeast/AudioClass/Dataset/rfcx-species-audio-detection/h5/0a350d11c.h5") | ||
print(img.shape) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
import argparse | ||
import librosa | ||
import os | ||
import h5py | ||
from glob import glob | ||
import numpy as np | ||
from tqdm import tqdm | ||
from spectro import wave_to_spec | ||
import pandas as pd | ||
|
||
|
||
def check_and_create(path): | ||
if os.path.isdir(path): | ||
print("Path already exists") | ||
else: | ||
os.makedirs(path) | ||
|
||
|
||
def save_spec(path,offset=None,dur=None,save_format=".npy",save_path=None): | ||
check_and_create(save_path) | ||
for i in tqdm(range(len(path))): | ||
filename = path[i].split("/")[-1].split('.')[0] | ||
mels = wave_to_spec(path[i],offset=offset,duration=dur) | ||
if save_format == '.npy': | ||
np.save(os.path.join(save_path,filename+save_format),mels) | ||
elif save_format == '.h5': | ||
h5fil = h5py.File(os.path.join(save_path,filename+save_format),'w') | ||
h5fil.create_dataset('pixels',data=mels) | ||
h5fil.close() | ||
|
||
if __name__ == "__main__": | ||
|
||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--csv_path", help="The Path to the csv file.") | ||
parser.add_argument("--root_dir", help="The root dir of dataset.") | ||
parser.add_argument("--format", help="Data Format") | ||
parser.add_argument("--save_format",help="data format to save") | ||
parser.add_argument("--save_path",help="where to save") | ||
|
||
|
||
args = parser.parse_args() | ||
paths = args.csv_path | ||
save_as = args.save_format | ||
save_path = args.save_path | ||
root_dir = args.root_dir | ||
|
||
data = pd.read_csv(paths) | ||
#print(len(data)) | ||
check_and_create(save_path) | ||
for n in tqdm(range(len(data))): | ||
rid = data.recording_id.iloc[n] | ||
p = os.path.join(root_dir,rid+args.format) | ||
mel = wave_to_spec(p,offset=data.t_min.iloc[n],duration=data.duration.iloc[n]) | ||
if save_as == ".h5": | ||
h5fil = h5py.File(os.path.join(save_path,rid+save_as)) | ||
h5fil.create_dataset("pixels",data=mel) | ||
h5fil.close() | ||
elif save_as == ".npy": | ||
np.save(os.path.join(root_dir,rid+save_as),arr=mel) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
import torch.nn as nn | ||
import torch.nn.functional as F | ||
import torch | ||
from torchvision import models | ||
import numpy as np | ||
import timm | ||
import h5py | ||
from efficientnet_pytorch import EfficientNet | ||
|
||
|
||
|
||
class custom_effnet(nn.Module): | ||
def __init__(self): | ||
super(custom_effnet,self).__init__() | ||
self.model = EfficientNet.from_pretrained('efficientnet-b3') | ||
self.model._conv_stem.in_channels = 1 | ||
weight = self.model._conv_stem.weight.mean(1,keepdim=True) | ||
self.model._conv_stem.weight = torch.nn.Parameter(weight) | ||
print(self.model) | ||
num_ftrs = self.model.classifier.in_features | ||
self.model.classifier = nn.Linear(num_ftrs,24) | ||
|
||
def forward(self,x): | ||
return self.model(x) | ||
|
||
class custom_resnet(nn.Module): | ||
def __init__(self): | ||
super(custom_resnet,self).__init__() | ||
self.model = models.resnet18(pretrained=True) | ||
num_ftrs = self.model.fc.in_features | ||
self.model.fc = nn.Linear(num_ftrs,24) | ||
|
||
def forward(self,x): | ||
return self.model(x) | ||
|
||
class ConvMod(nn.Module): | ||
def __init__(self,in_channels,out_channels,kernel_size,act=True,strides=1): | ||
super(ConvMod,self).__init__() | ||
self.conv = nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=kernel_size,stride=strides) | ||
self.bn = nn.BatchNorm2d(out_channels) | ||
self.act = act | ||
|
||
def forward(self,x): | ||
if self.act is True: | ||
x = F.relu(self.bn(self.conv(x))) | ||
else: | ||
x = self.bn(self.conv(x)) | ||
|
||
return x | ||
|
||
class CNN_14(nn.Module): | ||
def __init__(self,in_channels,out_channels,kernel_size): | ||
super(CNN_14,self).__init__() | ||
self.conv1 = nn.Sequential( | ||
ConvMod(in_channels=in_channels,out_channels=out_channels,kernel_size=kernel_size), | ||
ConvMod(64,64,kernel_size=kernel_size) | ||
) | ||
|
||
self.conv2 = nn.Sequential( | ||
ConvMod(64,128,kernel_size=kernel_size), | ||
ConvMod(128,128,kernel_size=kernel_size) | ||
) | ||
self.conv3 = nn.Sequential( | ||
ConvMod(128,256,kernel_size=kernel_size), | ||
ConvMod(256,256,kernel_size=kernel_size) | ||
|
||
) | ||
self.conv4 = nn.Sequential( | ||
ConvMod(256,512,kernel_size=kernel_size), | ||
ConvMod(512,512,kernel_size=kernel_size) | ||
) | ||
self.pool = nn.AvgPool2d(kernel_size=(2,2)) | ||
self.linear = nn.Linear(512,512) | ||
self.classifier = nn.Linear(512,24) | ||
def forward(self,x): | ||
x = self.conv1(x) | ||
x = self.pool(x) | ||
x = self.conv2(x) | ||
x = self.pool(x) | ||
x = self.conv3(x) | ||
x = self.pool(x) | ||
x = self.conv4(x) | ||
x = F.adaptive_avg_pool2d(x,(1,1)) | ||
x = x.view(-1,512) | ||
x = self.classifier(x) | ||
return x | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
df = pd.DataFrame.from_dict(label_gen(),orient='index').reset_index() | ||
df.columns = ['recording_id']+['species_'+str(n) for n in range(24)] | ||
label_cols = [f"species_"+str(m) for m in range(24)] | ||
|
||
mst = MultilabelStratifiedKFold(n_splits=5) | ||
X = df.recording_id.values | ||
y = df[label_cols].values | ||
df['kfold'] = -1 | ||
for i,(train_ind,val_ind) in enumerate(mst.split(X,y)): | ||
|
||
df.loc[val_ind,'kfold'] = i | ||
|
||
df.to_csv("RFCX_kfold.csv",index=False) | ||
|
||
|
||
|
||
dur_sample = [] | ||
data = pd.read_csv("RFCX_kfold.csv") | ||
org_data = pd.read_csv("/home/lustbeast/AudioClass/Dataset/rfcx-species-audio-detection/train_tp.csv") | ||
org_data = org_data.groupby("recording_id").agg({'t_min':lambda x:min(x),'t_max':lambda x:max(x)}).reset_index() | ||
org_data['duration'] = org_data['t_max'] - org_data["t_min"] | ||
org_data['duration'] = np.ceil(org_data.duration.values)+5 | ||
data = data.merge(org_data,on="recording_id",how='left') | ||
data['dur_sample'] = -1 | ||
for i in tqdm(range(len(data))): | ||
song_name = os.path.join("/home/lustbeast/AudioClass/Dataset/rfcx-species-audio-detection/train",data.loc[i,"recording_id"]+".flac") | ||
song,sr = librosa.load(song_name,sr=None) | ||
durs = song.shape[0] / sr | ||
data['dur_sample'].iloc[i] = durs | ||
data.to_csv("RFCX_kfold.csv") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
import h5py | ||
import librosa | ||
import cv2 | ||
import numpy as np | ||
|
||
class params: | ||
|
||
sampling_rate = 48000 | ||
mel_bins = 128 | ||
fmin = 20 | ||
fmax = sampling_rate // 2 | ||
|
||
def mono_to_color(x,eps=1e-6): | ||
# X = np.stack([x,x,x],axis=-1) | ||
mean = x.mean() | ||
x = x-mean | ||
std = x.std() | ||
Xstd = X / std+eps | ||
_min,_max = Xstd.min(),Xstd.max() | ||
norm_max,norm_min = _max,_min | ||
if (_max - _min) > eps: | ||
V = Xstd | ||
V[V<norm_min] = norm_min | ||
V[V>norm_max] = norm_max | ||
V = 255 * (V - norm_min) / (norm_max - norm_min) | ||
V = V.astype(np.uint8) | ||
else: | ||
V = np.zeros_like(Xstd,dtype=np.uint8) | ||
|
||
return V | ||
|
||
def wave_to_spec(path,offset=None,duration=None): | ||
|
||
audio,sr = librosa.load(path,sr=params.sampling_rate,offset=offset,duration=duration) | ||
melspec = librosa.feature.melspectrogram(y=audio,sr=sr,n_mels=params.mel_bins,fmin=params.fmin,fmax=params.fmax) | ||
melspec = librosa.power_to_db(melspec).astype(np.float32) | ||
melspec = mono_to_color(melspec) | ||
return melspec |
Oops, something went wrong.