Add files via upload

ThiruRJST · Aug 31, 2021 · be53bf5 · be53bf5
1 parent 0e44682
commit be53bf5
Show file tree

Hide file tree

Showing 9 changed files with 1,937 additions and 1 deletion.
diff --git a/EDA.ipynb b/EDA.ipynb
diff --git a/README.md b/README.md
@@ -1 +1,34 @@
-# MacaW
+# MacaW
+
+## Rainforest Connection Species Sound Classification
+
+---
+
+**NOTE**
+
+This repo is under continious updates, so the pyscripts are being changed continiously. Have a clone if you need
+
+---
+
+### Dataset
+
+The dataset has been provided with this kaggle competitions with guidelines to be followed.
+If you want to use this dataset, please refer kaggle forums, adher to the rules and regulations listed there.
+
+### Exploratory Data Analysis
+
+I have carried out the EDA part of this project using Tableau and the link to the vizs <a href="https://public.tableau.com/profile/thirumalai.kumar#!/vizhome/RFCX/RFCX-DASH">HERE</a>.
+
+If you want the insights and reports from these ongoing EDA, KIT with my <a href="https://docs.google.com/document/d/1jBp9GeHqm9p7PYN3CgWjFSBo1R2oDh7d-kK-olyIUXk/edit">GOOGLE DOC</a>
+
+
+### Models
+
+All my models have been constructed by refering to the <a href="https://arxiv.org/abs/1912.10211">PANN</a> research paper.
+
+
+
+
+- [x] Preprocessing Scripts 
+- [x] Model Construction
+- [x] Training Pipeline
diff --git a/RFCX_kfold.csv b/RFCX_kfold.csv
diff --git a/dataset.py b/dataset.py
@@ -0,0 +1,64 @@
+from torch.utils.data import Dataset,DataLoader
+import pandas as pd
+from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
+import numpy as np
+import h5py
+import librosa
+import cv2
+import torch
+from tqdm import tqdm
+import os
+from albumentations import Compose
+from albumentations.pytorch import ToTensorV2
+
+label_cols = ["species_"+str(m) for m in range(24)]
+
+def h5read(path):
+    hfile = h5py.File(path,'r')
+    return np.array(hfile.get('pixels'))
+
+def label_gen():
+    label_dict = {}
+    files = data.recording_id.unique().tolist()
+    for f in files:
+        labels = np.zeros(24)
+        sets = group.get_group(f)
+        tmp = sets.species_id.unique()
+        for i in tmp:
+            labels[i] = 1.
+
+        label_dict[f] = labels
+    return label_dict
+
+
+class AudioData(Dataset):
+
+    def __init__(self,records,targets,root_dir,transforms=None):
+
+        self.root_dir = root_dir
+        self.targets = targets
+        self.records = records
+        self.transforms = transforms
+
+        #print(self.records)
+
+    def __len__(self):
+        return len(self.records)
+
+    def __getitem__(self,idx):
+
+
+        img_arr = h5read(os.path.join(self.root_dir,self.records[idx]+'.h5'))
+        label = self.targets[idx]
+
+        assert img_arr.shape[2] == 3
+        if self.transforms is not None:
+            image = self.transforms(image=img_arr)['image']
+
+
+
+        return image,torch.Tensor(label)
+
+
+img = h5read("/home/lustbeast/AudioClass/Dataset/rfcx-species-audio-detection/h5/0a350d11c.h5")
+print(img.shape)
diff --git a/main.py b/main.py
@@ -0,0 +1,60 @@
+import argparse
+import librosa
+import os
+import h5py
+from glob import glob
+import numpy as np
+from tqdm import tqdm
+from spectro import wave_to_spec
+import pandas as pd
+
+
+def check_and_create(path):
+    if os.path.isdir(path):
+        print("Path already exists")
+    else:
+        os.makedirs(path)
+
+
+def save_spec(path,offset=None,dur=None,save_format=".npy",save_path=None):
+    check_and_create(save_path)
+    for i in tqdm(range(len(path))):
+        filename = path[i].split("/")[-1].split('.')[0]
+        mels = wave_to_spec(path[i],offset=offset,duration=dur)
+        if save_format == '.npy':
+            np.save(os.path.join(save_path,filename+save_format),mels)
+        elif save_format == '.h5':
+            h5fil = h5py.File(os.path.join(save_path,filename+save_format),'w')
+            h5fil.create_dataset('pixels',data=mels)
+            h5fil.close()
+
+if __name__ == "__main__":
+
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--csv_path", help="The Path to the csv file.")
+    parser.add_argument("--root_dir", help="The root dir of dataset.")
+    parser.add_argument("--format", help="Data Format")
+    parser.add_argument("--save_format",help="data format to save")
+    parser.add_argument("--save_path",help="where to save")
+
+
+    args = parser.parse_args()
+    paths = args.csv_path
+    save_as = args.save_format
+    save_path = args.save_path
+    root_dir = args.root_dir
+
+    data = pd.read_csv(paths)
+    #print(len(data))
+    check_and_create(save_path)
+    for n in tqdm(range(len(data))):
+        rid = data.recording_id.iloc[n]
+        p = os.path.join(root_dir,rid+args.format)
+        mel = wave_to_spec(p,offset=data.t_min.iloc[n],duration=data.duration.iloc[n])
+        if save_as == ".h5":
+            h5fil = h5py.File(os.path.join(save_path,rid+save_as))
+            h5fil.create_dataset("pixels",data=mel)
+            h5fil.close()
+        elif save_as == ".npy":
+            np.save(os.path.join(root_dir,rid+save_as),arr=mel)
diff --git a/model.py b/model.py
@@ -0,0 +1,88 @@
+import torch.nn as nn
+import torch.nn.functional as F
+import torch
+from torchvision import models
+import numpy as np
+import timm
+import h5py
+from efficientnet_pytorch import EfficientNet
+
+
+
+class custom_effnet(nn.Module):
+    def __init__(self):
+        super(custom_effnet,self).__init__()
+        self.model = EfficientNet.from_pretrained('efficientnet-b3')
+        self.model._conv_stem.in_channels = 1
+        weight = self.model._conv_stem.weight.mean(1,keepdim=True)
+        self.model._conv_stem.weight = torch.nn.Parameter(weight)
+        print(self.model)
+        num_ftrs = self.model.classifier.in_features
+        self.model.classifier = nn.Linear(num_ftrs,24)
+
+    def forward(self,x):
+        return self.model(x)
+
+class custom_resnet(nn.Module):
+    def __init__(self):
+        super(custom_resnet,self).__init__()
+        self.model = models.resnet18(pretrained=True)
+        num_ftrs = self.model.fc.in_features
+        self.model.fc = nn.Linear(num_ftrs,24)
+
+    def forward(self,x):
+        return self.model(x)
+
+class ConvMod(nn.Module):
+    def __init__(self,in_channels,out_channels,kernel_size,act=True,strides=1):
+        super(ConvMod,self).__init__()
+        self.conv = nn.Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=kernel_size,stride=strides)
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.act = act
+
+    def forward(self,x):
+        if self.act is True:
+            x = F.relu(self.bn(self.conv(x)))
+        else:
+            x = self.bn(self.conv(x))
+
+        return x
+
+class CNN_14(nn.Module):
+    def __init__(self,in_channels,out_channels,kernel_size):
+        super(CNN_14,self).__init__()
+        self.conv1 = nn.Sequential(
+            ConvMod(in_channels=in_channels,out_channels=out_channels,kernel_size=kernel_size),
+            ConvMod(64,64,kernel_size=kernel_size)
+        )
+
+        self.conv2 = nn.Sequential(
+            ConvMod(64,128,kernel_size=kernel_size),
+            ConvMod(128,128,kernel_size=kernel_size)
+        )
+        self.conv3 = nn.Sequential(
+            ConvMod(128,256,kernel_size=kernel_size),
+            ConvMod(256,256,kernel_size=kernel_size)
+
+        )
+        self.conv4 = nn.Sequential(
+            ConvMod(256,512,kernel_size=kernel_size),
+            ConvMod(512,512,kernel_size=kernel_size)
+        )
+        self.pool = nn.AvgPool2d(kernel_size=(2,2))
+        self.linear = nn.Linear(512,512)
+        self.classifier = nn.Linear(512,24)
+    def forward(self,x):
+        x = self.conv1(x)
+        x = self.pool(x)
+        x = self.conv2(x)
+        x = self.pool(x)
+        x = self.conv3(x)
+        x = self.pool(x)
+        x = self.conv4(x)
+        x = F.adaptive_avg_pool2d(x,(1,1))
+        x = x.view(-1,512)
+        x = self.classifier(x)
+        return x
+
+
diff --git a/past.py b/past.py
@@ -0,0 +1,30 @@
+df = pd.DataFrame.from_dict(label_gen(),orient='index').reset_index()
+df.columns = ['recording_id']+['species_'+str(n) for n in range(24)]
+label_cols = [f"species_"+str(m) for m in range(24)]
+
+mst = MultilabelStratifiedKFold(n_splits=5)
+X = df.recording_id.values
+y = df[label_cols].values
+df['kfold'] = -1
+for i,(train_ind,val_ind) in enumerate(mst.split(X,y)):
+
+    df.loc[val_ind,'kfold'] = i
+
+df.to_csv("RFCX_kfold.csv",index=False)
+
+
+
+dur_sample = []
+data = pd.read_csv("RFCX_kfold.csv")
+org_data = pd.read_csv("/home/lustbeast/AudioClass/Dataset/rfcx-species-audio-detection/train_tp.csv")
+org_data = org_data.groupby("recording_id").agg({'t_min':lambda x:min(x),'t_max':lambda x:max(x)}).reset_index()
+org_data['duration'] = org_data['t_max'] - org_data["t_min"]
+org_data['duration'] = np.ceil(org_data.duration.values)+5
+data = data.merge(org_data,on="recording_id",how='left')
+data['dur_sample'] = -1
+for i in tqdm(range(len(data))):
+    song_name = os.path.join("/home/lustbeast/AudioClass/Dataset/rfcx-species-audio-detection/train",data.loc[i,"recording_id"]+".flac")
+    song,sr = librosa.load(song_name,sr=None)
+    durs = song.shape[0] / sr
+    data['dur_sample'].iloc[i] = durs
+data.to_csv("RFCX_kfold.csv")
diff --git a/spectro.py b/spectro.py
@@ -0,0 +1,38 @@
+import h5py
+import librosa
+import cv2
+import numpy as np
+
+class params:
+
+    sampling_rate = 48000
+    mel_bins = 128
+    fmin = 20
+    fmax = sampling_rate // 2
+
+def mono_to_color(x,eps=1e-6):
+   # X = np.stack([x,x,x],axis=-1)
+    mean = x.mean()
+    x = x-mean
+    std = x.std()
+    Xstd = X / std+eps
+    _min,_max = Xstd.min(),Xstd.max()
+    norm_max,norm_min = _max,_min
+    if (_max - _min) > eps:
+        V = Xstd
+        V[V<norm_min] = norm_min
+        V[V>norm_max] = norm_max
+        V = 255 * (V - norm_min) / (norm_max - norm_min)
+        V = V.astype(np.uint8)
+    else:
+        V = np.zeros_like(Xstd,dtype=np.uint8)
+
+    return V
+
+def wave_to_spec(path,offset=None,duration=None):
+
+    audio,sr = librosa.load(path,sr=params.sampling_rate,offset=offset,duration=duration)
+    melspec = librosa.feature.melspectrogram(y=audio,sr=sr,n_mels=params.mel_bins,fmin=params.fmin,fmax=params.fmax)
+    melspec = librosa.power_to_db(melspec).astype(np.float32)
+    melspec = mono_to_color(melspec)
+    return melspec