-
Notifications
You must be signed in to change notification settings - Fork 43
/
Copy pathutils.py
122 lines (94 loc) · 3.8 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import librosa
import numpy as np
import matplotlib
# Force matplotlib to not use any Xwindows backend.
matplotlib.use('Agg')
from matplotlib.pyplot import specgram
import matplotlib.pyplot as plt
import sys
def load_sound_files(file_paths):
raw_sounds = []
for fp in file_paths:
X,sr = librosa.load(fp, sr=44100)
raw_sounds.append(X)
return raw_sounds
def plot_waves(sound_names,raw_sounds):
i = 1
fig = plt.figure(figsize=(25,90), dpi = 900)
for n,f in zip(sound_names,raw_sounds):
plt.subplot(10,1,i)
librosa.display.waveplot(np.array(f),sr=44100)
plt.title(n.title())
i += 1
plt.suptitle('Figure 1: Waveplot',x=0.5, y=0.915,fontsize=18)
plt.show()
def plot_specgram(sound_names,raw_sounds):
i = 1
fig = plt.figure(figsize=(25,90), dpi = 900)
for n,f in zip(sound_names,raw_sounds):
plt.subplot(10,1,i)
specgram(np.array(f), Fs=44100)
plt.title(n.title())
i += 1
plt.suptitle('Figure 2: Spectrogram',x=0.5, y=0.915,fontsize=18)
plt.show()
def plot_log_power_specgram(sound_names,raw_sounds):
i = 1
fig = plt.figure(figsize=(25,90), dpi = 900)
for n,f in zip(sound_names,raw_sounds):
plt.subplot(10,1,i)
D = librosa.logamplitude(np.abs(librosa.stft(f))**2, ref_power=np.max)
librosa.display.specshow(D,x_axis='time' ,y_axis='log')
plt.title(n.title())
i += 1
plt.suptitle('Figure 3: Log power spectrogram',x=0.5, y=0.915,fontsize=18)
plt.show()
def specgram_frombuffer(raw_sound, dimx, dimy, fname='', fs=44100, show=False, dpi=900):
""" fs is the frequency sampling rate of the audio file. It has nothing to do with plotted samples
"""
fig = plt.figure(figsize=(dimy,dimx), dpi = dpi)
plt.subplot(3,1,1)
specgram(np.array(raw_sound), Fs=fs)
plt.axis('off')
#ax = plt.Axes(fig, [0., 0., 1., 1.])
#ax.set_axis_off()
if not len(fname):
filename = './data/unnamed.png'
else:
filename = fname
plt.tight_layout()
plt.savefig(filename, bbox_inches='tight', pad_inches = 0)
if show:
plt.show()
plt.close(fig)
def extract_feature(file_name):
X, sample_rate = librosa.load(file_name)
stft = np.abs(librosa.stft(X))
mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
return mfccs,chroma,mel,contrast,tonnetz
def parse_audio_files(parent_dir,sub_dirs,file_ext='*.wav'):
features, labels = np.empty((0,193)), np.empty(0)
for label, sub_dir in enumerate(sub_dirs):
for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
mfccs, chroma, mel, contrast,tonnetz = extract_feature(fn)
ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
features = np.vstack([features,ext_features])
labels = np.append(labels, fn.split('/')[2].split('-')[1])
return np.array(features), np.array(labels, dtype = np.int)
def one_hot_encode(labels):
n_labels = len(labels)
n_unique_labels = len(np.unique(labels))
one_hot_encode = np.zeros((n_labels,n_unique_labels))
one_hot_encode[np.arange(n_labels), labels] = 1
return one_hot_encode
def printStuff(msg, arg):
""" Print msg+perc to stdout and flush
msg = "Converting control data %d%%"
"""
sys.stdout.write('\r')
sys.stdout.write(msg % (arg))
sys.stdout.flush()