Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
ThiruRJST authored Aug 31, 2021
1 parent 0e44682 commit be53bf5
Show file tree
Hide file tree
Showing 9 changed files with 1,937 additions and 1 deletion.
369 changes: 369 additions & 0 deletions EDA.ipynb

Large diffs are not rendered by default.

35 changes: 34 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,34 @@
# MacaW
# MacaW

## Rainforest Connection Species Sound Classification

---

**NOTE**

This repo is under continious updates, so the pyscripts are being changed continiously. Have a clone if you need

---

### Dataset

The dataset has been provided with this kaggle competitions with guidelines to be followed.
If you want to use this dataset, please refer kaggle forums, adher to the rules and regulations listed there.

### Exploratory Data Analysis

I have carried out the EDA part of this project using Tableau and the link to the vizs <a href="https://public.tableau.com/profile/thirumalai.kumar#!/vizhome/RFCX/RFCX-DASH">HERE</a>.

If you want the insights and reports from these ongoing EDA, KIT with my <a href="https://docs.google.com/document/d/1jBp9GeHqm9p7PYN3CgWjFSBo1R2oDh7d-kK-olyIUXk/edit">GOOGLE DOC</a>


### Models

All my models have been constructed by refering to the <a href="https://arxiv.org/abs/1912.10211">PANN</a> research paper.




- [x] Preprocessing Scripts
- [x] Model Construction
- [x] Training Pipeline
1,133 changes: 1,133 additions & 0 deletions RFCX_kfold.csv

Large diffs are not rendered by default.

64 changes: 64 additions & 0 deletions dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from torch.utils.data import Dataset,DataLoader
import pandas as pd
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
import numpy as np
import h5py
import librosa
import cv2
import torch
from tqdm import tqdm
import os
from albumentations import Compose
from albumentations.pytorch import ToTensorV2

label_cols = ["species_"+str(m) for m in range(24)]

def h5read(path):
hfile = h5py.File(path,'r')
return np.array(hfile.get('pixels'))

def label_gen():
label_dict = {}
files = data.recording_id.unique().tolist()
for f in files:
labels = np.zeros(24)
sets = group.get_group(f)
tmp = sets.species_id.unique()
for i in tmp:
labels[i] = 1.

label_dict[f] = labels
return label_dict


class AudioData(Dataset):

def __init__(self,records,targets,root_dir,transforms=None):

self.root_dir = root_dir
self.targets = targets
self.records = records
self.transforms = transforms

#print(self.records)

def __len__(self):
return len(self.records)

def __getitem__(self,idx):


img_arr = h5read(os.path.join(self.root_dir,self.records[idx]+'.h5'))
label = self.targets[idx]

assert img_arr.shape[2] == 3
if self.transforms is not None:
image = self.transforms(image=img_arr)['image']



return image,torch.Tensor(label)


img = h5read("/home/lustbeast/AudioClass/Dataset/rfcx-species-audio-detection/h5/0a350d11c.h5")
print(img.shape)
60 changes: 60 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import argparse
import librosa
import os
import h5py
from glob import glob
import numpy as np
from tqdm import tqdm
from spectro import wave_to_spec
import pandas as pd


def check_and_create(path):
if os.path.isdir(path):
print("Path already exists")
else:
os.makedirs(path)


def save_spec(path,offset=None,dur=None,save_format=".npy",save_path=None):
check_and_create(save_path)
for i in tqdm(range(len(path))):
filename = path[i].split("/")[-1].split('.')[0]
mels = wave_to_spec(path[i],offset=offset,duration=dur)
if save_format == '.npy':
np.save(os.path.join(save_path,filename+save_format),mels)
elif save_format == '.h5':
h5fil = h5py.File(os.path.join(save_path,filename+save_format),'w')
h5fil.create_dataset('pixels',data=mels)
h5fil.close()

if __name__ == "__main__":


parser = argparse.ArgumentParser()
parser.add_argument("--csv_path", help="The Path to the csv file.")
parser.add_argument("--root_dir", help="The root dir of dataset.")
parser.add_argument("--format", help="Data Format")
parser.add_argument("--save_format",help="data format to save")
parser.add_argument("--save_path",help="where to save")


args = parser.parse_args()
paths = args.csv_path
save_as = args.save_format
save_path = args.save_path
root_dir = args.root_dir

data = pd.read_csv(paths)
#print(len(data))
check_and_create(save_path)
for n in tqdm(range(len(data))):
rid = data.recording_id.iloc[n]
p = os.path.join(root_dir,rid+args.format)
mel = wave_to_spec(p,offset=data.t_min.iloc[n],duration=data.duration.iloc[n])
if save_as == ".h5":
h5fil = h5py.File(os.path.join(save_path,rid+save_as))
h5fil.create_dataset("pixels",data=mel)
h5fil.close()
elif save_as == ".npy":
np.save(os.path.join(root_dir,rid+save_as),arr=mel)
88 changes: 88 additions & 0 deletions model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import torch.nn as nn
import torch.nn.functional as F
import torch
from torchvision import models
import numpy as np
import timm
import h5py
from efficientnet_pytorch import EfficientNet



class custom_effnet(nn.Module):
def __init__(self):
super(custom_effnet,self).__init__()
self.model = EfficientNet.from_pretrained('efficientnet-b3')
self.model._conv_stem.in_channels = 1
weight = self.model._conv_stem.weight.mean(1,keepdim=True)
self.model._conv_stem.weight = torch.nn.Parameter(weight)
print(self.model)
num_ftrs = self.model.classifier.in_features
self.model.classifier = nn.Linear(num_ftrs,24)

def forward(self,x):
return self.model(x)

class custom_resnet(nn.Module):
def __init__(self):
super(custom_resnet,self).__init__()
self.model = models.resnet18(pretrained=True)
num_ftrs = self.model.fc.in_features
self.model.fc = nn.Linear(num_ftrs,24)

def forward(self,x):
return self.model(x)

class ConvMod(nn.Module):
def __init__(self,in_channels,out_channels,kernel_size,act=True,strides=1):
super(ConvMod,self).__init__()
self.conv = nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=kernel_size,stride=strides)
self.bn = nn.BatchNorm2d(out_channels)
self.act = act

def forward(self,x):
if self.act is True:
x = F.relu(self.bn(self.conv(x)))
else:
x = self.bn(self.conv(x))

return x

class CNN_14(nn.Module):
def __init__(self,in_channels,out_channels,kernel_size):
super(CNN_14,self).__init__()
self.conv1 = nn.Sequential(
ConvMod(in_channels=in_channels,out_channels=out_channels,kernel_size=kernel_size),
ConvMod(64,64,kernel_size=kernel_size)
)

self.conv2 = nn.Sequential(
ConvMod(64,128,kernel_size=kernel_size),
ConvMod(128,128,kernel_size=kernel_size)
)
self.conv3 = nn.Sequential(
ConvMod(128,256,kernel_size=kernel_size),
ConvMod(256,256,kernel_size=kernel_size)

)
self.conv4 = nn.Sequential(
ConvMod(256,512,kernel_size=kernel_size),
ConvMod(512,512,kernel_size=kernel_size)
)
self.pool = nn.AvgPool2d(kernel_size=(2,2))
self.linear = nn.Linear(512,512)
self.classifier = nn.Linear(512,24)
def forward(self,x):
x = self.conv1(x)
x = self.pool(x)
x = self.conv2(x)
x = self.pool(x)
x = self.conv3(x)
x = self.pool(x)
x = self.conv4(x)
x = F.adaptive_avg_pool2d(x,(1,1))
x = x.view(-1,512)
x = self.classifier(x)
return x


30 changes: 30 additions & 0 deletions past.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
df = pd.DataFrame.from_dict(label_gen(),orient='index').reset_index()
df.columns = ['recording_id']+['species_'+str(n) for n in range(24)]
label_cols = [f"species_"+str(m) for m in range(24)]

mst = MultilabelStratifiedKFold(n_splits=5)
X = df.recording_id.values
y = df[label_cols].values
df['kfold'] = -1
for i,(train_ind,val_ind) in enumerate(mst.split(X,y)):

df.loc[val_ind,'kfold'] = i

df.to_csv("RFCX_kfold.csv",index=False)



dur_sample = []
data = pd.read_csv("RFCX_kfold.csv")
org_data = pd.read_csv("/home/lustbeast/AudioClass/Dataset/rfcx-species-audio-detection/train_tp.csv")
org_data = org_data.groupby("recording_id").agg({'t_min':lambda x:min(x),'t_max':lambda x:max(x)}).reset_index()
org_data['duration'] = org_data['t_max'] - org_data["t_min"]
org_data['duration'] = np.ceil(org_data.duration.values)+5
data = data.merge(org_data,on="recording_id",how='left')
data['dur_sample'] = -1
for i in tqdm(range(len(data))):
song_name = os.path.join("/home/lustbeast/AudioClass/Dataset/rfcx-species-audio-detection/train",data.loc[i,"recording_id"]+".flac")
song,sr = librosa.load(song_name,sr=None)
durs = song.shape[0] / sr
data['dur_sample'].iloc[i] = durs
data.to_csv("RFCX_kfold.csv")
38 changes: 38 additions & 0 deletions spectro.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import h5py
import librosa
import cv2
import numpy as np

class params:

sampling_rate = 48000
mel_bins = 128
fmin = 20
fmax = sampling_rate // 2

def mono_to_color(x,eps=1e-6):
# X = np.stack([x,x,x],axis=-1)
mean = x.mean()
x = x-mean
std = x.std()
Xstd = X / std+eps
_min,_max = Xstd.min(),Xstd.max()
norm_max,norm_min = _max,_min
if (_max - _min) > eps:
V = Xstd
V[V<norm_min] = norm_min
V[V>norm_max] = norm_max
V = 255 * (V - norm_min) / (norm_max - norm_min)
V = V.astype(np.uint8)
else:
V = np.zeros_like(Xstd,dtype=np.uint8)

return V

def wave_to_spec(path,offset=None,duration=None):

audio,sr = librosa.load(path,sr=params.sampling_rate,offset=offset,duration=duration)
melspec = librosa.feature.melspectrogram(y=audio,sr=sr,n_mels=params.mel_bins,fmin=params.fmin,fmax=params.fmax)
melspec = librosa.power_to_db(melspec).astype(np.float32)
melspec = mono_to_color(melspec)
return melspec
Loading

0 comments on commit be53bf5

Please sign in to comment.