-
Notifications
You must be signed in to change notification settings - Fork 23
/
Copy pathutils.py
115 lines (90 loc) · 3.74 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import numpy as np
import tensorflow as tf
from scipy.io import wavfile
def normalize(data):
temp = np.float32(data) - np.average(data)
out = temp / np.max(np.abs(temp))
return out
def make_batch(path):
rate, data = wavfile.read(path)
#only use the 1st channel
data = data[:, 0]
data_ = normalize(data)
bins, bins_center = mu_law_bins(256)
inputs = np.digitize(data_[0:-1], bins, right=False)
inputs = bins_center[inputs][None, :, None]
#predict sample 1 to end using 0 to end-1
targets = np.digitize(data_[1::], bins, right=False)[None, :]
return (inputs, targets)
def make_batch_padded(path, num_layers = 14):
rate, data = wavfile.read(path)
#only use the 1st channel
data = data[:, 0]
data_ = normalize(data)
bins, bins_center = mu_law_bins(256)
inputs = np.digitize(data_[0:-1], bins, right=False)
inputs = bins_center[inputs][None, :, None]
#predict sample 1 to end using 0 to end-1
targets = np.digitize(data_[1::], bins, right=False)[None, :]
base = 2 ** num_layers
_, width, _ = inputs.shape
#crop the width to make it multiple of base
width_cropped = int(np.floor(width * 1.0 / base) * base)
inputs_padded = np.pad(inputs[:, 0:width_cropped, :], ((0, 0), (base - 1, 0), (0, 0)), 'constant')
targets_padded = targets[:, 0:width_cropped]
return (inputs_padded, targets_padded)
def mu_law_bins(num_bins):
"""
this functions returns the mu-law bin (right) edges and bin centers, with num_bins number of bins
"""
#all edges
bins_edge = np.linspace(-1, 1, num_bins + 1)
#center of all edges
bins_center = np.linspace(-1 + 1.0 / num_bins, 1 - 1.0 / num_bins, num_bins)
#get the right edges
bins_trunc = bins_edge[1:]
#if sample >= right edges, it might be assigned to the next bin, add 0.1 to avoid this
bins_trunc[-1] += 0.1
#convert edges and centers to mu-law scale
bins_edge_mu = np.multiply(np.sign(bins_trunc), (num_bins ** np.absolute(bins_trunc) - 1) / (num_bins - 1))
bins_center_mu = np.multiply(np.sign(bins_center), (num_bins ** np.absolute(bins_center) - 1) / (num_bins - 1))
return (bins_edge_mu, bins_center_mu)
def mu_law_bins_tf(num_bins):
"""
this functions returns the mu-law bin (right) edges and bin centers, with num_bins number of bins
"""
#all edges
bins_edge = tf.linspace(-1.0, 1.0, num_bins + 1)
#center of all edges
bins_center = tf.linspace(-1.0 + 1.0 / num_bins, 1.0 - 1.0 / num_bins, num_bins)
#get the right edges
bins_trunc = tf.concat([bins_edge[1:-1], [1.1]], 0)
#if sample >= right edges, it might be assigned to the next bin, add 0.1 to avoid this
#convert edges and centers to mu-law scale
bins_edge_mu = tf.multiply(tf.sign(bins_trunc), (num_bins ** tf.abs(bins_trunc) - 1) / (num_bins - 1))
bins_center_mu = tf.multiply(tf.sign(bins_center), (num_bins ** tf.abs(bins_center) - 1) / (num_bins - 1))
return (bins_edge_mu, bins_center_mu)
def random_samples(bins, dist):
"""
returns random samples from multiple distributions
dist: N * 256 array
samples : N * 1
"""
N = dist.shape[0]
samples = np.empty([N,1], dtype=np.float32)
for i in range(N):
smpl = np.random.choice(bins, p=dist[i,:]/np.sum(dist[i,:]))
samples[i,0] = smpl.astype(np.float32)
return samples
def random_bins(num_classes, dist):
"""
returns random bins from multiple distributions
dist: N * 256 array
bins : N * 1
"""
N = dist.shape[0]
bins = np.empty([N,1], dtype=np.int32)
for i in range(N):
smpl = np.random.choice(num_classes, p=dist[i,:]/np.sum(dist[i,:]))
bins[i,0] = smpl
return bins