-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtf_nn.py
346 lines (264 loc) · 12.7 KB
/
tf_nn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
import json
import tqdm
from math import sqrt
import numpy as np
from scipy.sparse import load_npz
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.contrib import rnn
from tensorflow.examples.tutorials.mnist import input_data
def dnn(x, _num_features, n_classes):
# Define network architecture
input_size = _num_features
n_nodes_hl = [500] * 3 # hidden layer sizes
activation_fn = [tf.nn.relu] * 3 # allows for different activation fn per layer
architecture = [input_size] + n_nodes_hl + [n_classes] # concatenate lists
# Define layer variables
layers = []
for i, n_nodes in enumerate(architecture):
if i == 0:
continue # can't reference previous size
layers.append({
'weights': tf.Variable(tf.random_normal(shape=[architecture[i - 1], n_nodes])),
'biases': tf.Variable(tf.random_normal(shape=[n_nodes]))
})
# Define model for feed forward
model = None
for i, layer in enumerate(layers[:-1]): # leave out output layer (only input & hidden layers)
_input = x if i == 0 else model
model = activation_fn[i](tf.matmul(_input, layer['weights']) + layer['biases'])
# Return output
output_layer = layers[-1]
return tf.matmul(model, output_layer['weights']) + output_layer['biases']
def rnn_lstm(x, _num_features, _n_classes, **kwargs):
# Set parameter default
rnn_size = kwargs['rnn_size'] or 128
# Reshape x
x = tf.transpose(x, [1, 0, 2])
x = tf.reshape(x, shape=[-1, kwargs['chunk_size']])
x = tf.split(x, kwargs['n_chunks'])
# Create LSTM cell
lstm_cell = rnn.BasicLSTMCell(rnn_size)
outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
# Define layer variables
weights = tf.Variable(tf.random_normal(shape=[rnn_size, _n_classes])),
biases = tf.Variable(tf.random_normal(shape=[_n_classes]))
# Return output
return tf.matmul(outputs[-1:], weights) + biases
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def maxpool2d(x):
# size of window movement of window
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
def cnn(x, _num_features, _n_classes, **kwargs):
# Set parameter default
keep_rate = kwargs['keep_rate'] or 0.8
# Reshape x (from flat image to 28x28 image
x = tf.reshape(x, shape=[-1, 28, 28, 1])
# Define layer variables
weights = {
# 5x5 convolution, 1 input, will output 32 features
'W_conv_1': tf.Variable(tf.random_normal(shape=[5, 5, 1, 32])),
# 5x5 convolution, 32 inputs, will output 64 features
'W_conv_2': tf.Variable(tf.random_normal(shape=[5, 5, 32, 64])),
# Each convolution layer downsamples the image in half: 28x28 - 14x14 - 7x7
# 7x7 image (after convolution), 64 features, 1024 nodes in dense layer
'W_fc': tf.Variable(tf.random_normal(shape=[7 * 7 * 64, 1024])),
'out': tf.Variable(tf.random_normal(shape=[1024, _n_classes])),
}
biases = {
'b_conv_1': tf.Variable(tf.random_normal(shape=[32])),
'b_conv_2': tf.Variable(tf.random_normal(shape=[64])),
'b_fc': tf.Variable(tf.random_normal(shape=[1024])),
'out': tf.Variable(tf.random_normal(shape=[_n_classes])),
}
# Create convolution (with pool) layers
conv_1 = tf.nn.relu(conv2d(x, weights['W_conv_1']) + biases['b_conv_1'])
conv_1 = maxpool2d(conv_1)
conv_2 = tf.nn.relu(conv2d(conv_1, weights['W_conv_2']) + biases['b_conv_2'])
conv_2 = maxpool2d(conv_2)
# Create fully connected layer
fc = tf.reshape(conv_2, [-1, 7 * 7 * 64])
fc = tf.nn.relu(tf.matmul(fc, weights['W_fc']) + biases['b_fc'])
# Apply dropout regularization on the fc layer
fc = tf.nn.dropout(fc, keep_rate)
# Return output
return tf.matmul(fc, weights['out']) + biases['out']
def train_neural_network(_data, model_fn, n_epochs=10, batch_sz=100, do_chunks=False, unpack_sparse=False, **kwargs):
num_samples, num_features = _data['train']['x'].shape
n_classes = _data['train']['y'].shape[1]
n_chunks, chunk_size = None, None
if do_chunks:
chunk_size = int(sqrt(num_features))
n_chunks = num_features // chunk_size
kwargs['chunk_size'] = chunk_size
kwargs['n_chunks'] = n_chunks
# chunk_size * n_chunks likely to be < num_features so
# we update it & the training data (drop a few features)
num_features = chunk_size * n_chunks
_data['train']['x'] = _data['train']['x'][:num_features]
_data['train']['y'] = _data['train']['y'][:num_features]
# Create placeholder for x
x = tf.placeholder('float', shape=[None, n_chunks, chunk_size])
else:
# Create placeholder for x
x = tf.placeholder('float', shape=[None, num_features])
# Create placeholder for y
y = tf.placeholder('float')
# Define tf operations
prediction = model_fn(x, num_features, n_classes, **kwargs)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y))
optimiser = tf.train.AdamOptimizer().minimize(cost)
# Define scoring function
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
# Start tensorflow session
with tf.Session() as session:
session.run(tf.global_variables_initializer())
losses = []
# Train
with tqdm.tqdm(total=n_epochs, unit='epoch') as pbar:
for epoch in range(n_epochs):
train_loss = 0
# In batches
for i in range(num_samples // batch_sz):
x_batch = _data['train']['x'][i * batch_sz:(i * batch_sz + batch_sz), ]
y_batch = _data['train']['y'][i * batch_sz:(i * batch_sz + batch_sz), ]
if unpack_sparse:
# The data is made out of sparse matrices.
# We need to convert to arrays before passing it to tf, hence `.toarray()`
x_batch = x_batch.toarray()
y_batch = y_batch.toarray()
# TODO: LSTM (RNN) model is buggy (chunking part during training)
if do_chunks:
# Reshape x batch
x_batch = x_batch.reshape((batch_sz, n_chunks, chunk_size))
_, c = session.run([optimiser, cost], feed_dict={x: x_batch, y: y_batch})
train_loss += c
losses.append(train_loss)
pbar.set_description('Epoch {0:>3} / {1} -> Loss: {2}'.format(epoch + 1, n_epochs, train_loss))
pbar.update()
# Limit the size of test set to prevent out of GPU VRAM errors
_x_test = _data['test']['x'][:256]
_y_test = _data['test']['y'][:256]
if unpack_sparse:
# The data is made out of sparse matrices.
# We need to convert to arrays before passing it to tf, hence `.toarray()`
_x_test = _x_test.toarray()
_y_test = _y_test.toarray()
# TODO: LSTM (RNN) model is buggy (chunking part during training)
if do_chunks:
# Reshape arrays
_x_test = _x_test.reshape((-1, n_chunks, chunk_size))
_y_test = _y_test.reshape((-1, n_chunks, chunk_size))
acc = accuracy.eval({x: _x_test, y: _y_test})
return acc, losses
def get_lexicon_details(f_in):
with open(f_in, 'r') as f:
content = f.read()
granularity, size = content.split(',')
return granularity, size
def load_small_data():
# Load lexicon
with open('preprocessing/small_data/saved/lexicon.json', 'r', encoding='latin-1') as f:
_lexicon = json.load(f)
# Load sparse matrices
_x_train = load_npz('preprocessing/small_data/saved/x_train.npz')
_y_train = load_npz('preprocessing/small_data/saved/y_train.npz')
_x_test = load_npz('preprocessing/small_data/saved/x_test.npz')
_y_test = load_npz('preprocessing/small_data/saved/y_test.npz')
return _lexicon, _x_train, _y_train, _x_test, _y_test
def load_large_data(test_size=0.1):
# Filenames & lexicon details
lexicon_details_filename = 'preprocessing/large_data/saved/lexicon-details.csv'
lexicon_granularity, lexicon_size = get_lexicon_details(lexicon_details_filename)
lexicon_filename = 'preprocessing/large_data/saved/lexicon-{}-{}.json'.format(lexicon_granularity, lexicon_size)
features_filename = 'preprocessing/large_data/saved/processed-train-set-{}-{}-features.npz' \
.format(lexicon_granularity, lexicon_size)
labels_filename = 'preprocessing/large_data/saved/processed-train-set-{}-{}-labels.npz' \
.format(lexicon_granularity, lexicon_size)
# Start loading stuff
print('Loaded lexicon details from `{}` -> granularity: {}\tsize: {}'
.format(lexicon_details_filename, lexicon_granularity, lexicon_size))
# Load lexicon
print('-- loading lexicon from `{}`'.format(lexicon_filename))
with open(lexicon_filename, 'r', encoding='latin-1') as f:
_lexicon = json.load(f)
# Load sparse matrices
print('-- loading features from `{}'.format(features_filename))
features = load_npz(features_filename)
print('-- loading labels from `{}'.format(labels_filename))
labels = load_npz(labels_filename)
# Split train & test sets
print('-- creating train & test sets...')
testing_size = int(test_size * features.shape[0])
_x_train = features[:-testing_size]
_y_train = labels[:-testing_size]
_x_test = features[-testing_size:]
_y_test = labels[-testing_size:]
return _lexicon, _x_train, _y_train, _x_test, _y_test
def load_mnist_data():
mnist = input_data.read_data_sets(r'D:\tf_data', one_hot=True)
_x_train = mnist.train.images
_y_train = mnist.train.labels
_x_test = mnist.test.images
_y_test = mnist.test.labels
return _x_train, _y_train, _x_test, _y_test
if __name__ == '__main__':
# Set program parameters
SMALL_DATA = False
USE_MNIST = True
MODELS = [
{'fn': 'dnn', 'kwargs': {}},
{'fn': 'rnn_lstm', 'kwargs': {'do_chunks': True, 'rnn_size': 128}},
{'fn': 'cnn', 'kwargs': {'keep_rate': 0.8}},
]
for k, model in enumerate(MODELS):
# MNIST data does not use sparse matrices
model['kwargs']['unpack_sparse'] = False if USE_MNIST else True
MODELS[k] = model
# Load data
print('Loading data...')
if SMALL_DATA and not USE_MNIST:
lexicon, x_train, y_train, x_test, y_test = load_small_data()
elif USE_MNIST:
x_train, y_train, x_test, y_test = load_mnist_data()
else:
# For computing the accuracy on the large data test set with test_size of 0.1 (10%) you will need at
# least 2GB of GPU RAM. I'm running this on a laptop with 1GB of GPU RAM so I'm only using 1% for testing.
lexicon, x_train, y_train, x_test, y_test = load_large_data(test_size=0.01)
# Set training parameters
data = {
"train": {"x": x_train, "y": y_train},
"test": {"x": x_test, "y": y_test},
}
# TODO: LSTM (RNN) model is buggy (chunking part during training)
network_model = MODELS[2] # (0 - DNN, 1 - LSTM (RNN), 2 - CNN)
if USE_MNIST:
num_epochs, batch_size = 10, 128
else:
# Be careful when setting the batch size.
# A low batch size will have a small memory footprint but training will take forever.
# A large batch size might lead to a suboptimal accuracy do to less training.
# (remember we do backpropagation after each batch)
#
# For a problem (x_train) with 2461 features, a 10K batch size will take about 180MB of RAM
# A batch size of 100K will take about 1.75GB of RAM (and 1.75GB of GPU RAM).
#
# This is because during training we must unpack the batch (sparse matrix) into a numpy array.
num_epochs, batch_size = (100, 100) if SMALL_DATA else (10, 20000)
# Train network
print('\nTraining network `{}` ->\tx_train size: {} x {}\t\tEpochs: {}\t\tBatch size: {}\n'
.format(network_model['fn'], x_train.shape[0], x_train.shape[1], num_epochs, batch_size))
_accuracy, train_losses = train_neural_network(data,
eval(network_model['fn']),
num_epochs, batch_size,
**(network_model['kwargs']))
# Print results
print('Network accuracy:', _accuracy)
# Plot train error (loss)
plt.plot(np.arange(len(train_losses)) + 1, train_losses)
plt.xlabel('Epoch')
plt.ylabel('Train Error')
plt.title('Train Error over Epochs')
plt.show()