-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathneural_network_interp.py
executable file
·245 lines (184 loc) · 8.85 KB
/
neural_network_interp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
#!/usr/bin/env python
import numpy as np
import datetime as dt
import sys, os, pickle, time, math
import pandas as pd
import multiprocessing
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["CUDA_VISIBLE_DEVICES"]="-1"
from sklearn import metrics
from scipy import spatial
from scipy.stats import spearmanr
from scipy.cluster import hierarchy
from vis.visualization import visualize_activation
from vis.utils import utils
from keras import activations
from keras.models import Model, save_model, load_model
import tensorflow as tf
import keras.backend as K
from ml_functions import read_csv_files, normalize_multivariate_data, log, get_features
def brier_score_keras(obs, preds):
return K.mean((preds - obs) ** 2)
def brier_skill_score_keras(obs, preds):
climo = K.mean((obs - K.mean(obs)) ** 2)
bs = brier_score_keras(obs, preds)
ratio = (bs / climo)
return climo
def auc(obs, preds):
auc = tf.metrics.auc(obs, preds)[1]
#K.get_session().run(tf.local_variables_initializer())
return auc
def compute_bss(obs, preds):
bs = np.mean((preds - obs) ** 2)
climo = np.mean((obs - np.mean(obs)) ** 2)
return 1.0 - (bs/climo)
def make_labels():
d = nn_params['report_window_space'][0]
#labels = ((df['hail_rptdist'+twin] < d) & (df['hail_rptdist'+twin] > 0)) | \
labels = ((df['hailone_rptdist'+twin] < d) & (df['hailone_rptdist'+twin] > 0)) | \
((df['wind_rptdist'+twin] < d) & (df['wind_rptdist'+twin] > 0)) | \
((df['torn_rptdist'+twin] < d) & (df['torn_rptdist'+twin] > 0))
labels_wind = ((df['wind_rptdist'+twin] < d) & (df['wind_rptdist'+twin] > 0))
labels_hailone = ((df['hailone_rptdist'+twin] < d) & (df['hailone_rptdist'+twin] > 0))
labels_torn = ((df['torn_rptdist'+twin] < d) & (df['torn_rptdist'+twin] > 0))
labels_sighail = ((df['sighail_rptdist'+twin] < d) & (df['sighail_rptdist'+twin] > 0))
labels_sigwind = ((df['sigwind_rptdist'+twin] < d) & (df['sigwind_rptdist'+twin] > 0))
# labels for multi-class neural network
if multiclass: labels = np.array([ labels, labels_wind, labels_hailone, labels_torn, labels_sighail, labels_sigwind ]).T
else: labels = np.array([ labels ]).T
return labels
def print_scores(fcst, obs):
# BSS
bss_val = compute_bss(obs, fcst)
# ROC auc
auc = metrics.roc_auc_score(obs, fcst)
return (bss_val, auc)
def permutation_importance(idx):
from keras.models import load_model
import tensorflow as tf
dense_model = load_model(model_fname, custom_objects={'brier_score_keras': brier_score_keras, 'brier_skill_score_keras':brier_skill_score_keras, 'auc':auc })
# make copy of data to use for shuffling in place
this_norm_in_data = np.copy(norm_in_data)
#this_norm_in_data[:,idx] = 0.0
# shuffle in place
# idx < 0 means do not shuffle
if idx >= 0 and idx not in importance_list:
np.random.shuffle(this_norm_in_data[:,idx])
# make predictions with shuffled data
these_predictions = dense_model.predict(this_norm_in_data)
# compute scores
this_bss, this_auc = print_scores(these_predictions[:,hazard_idx], labels[:,hazard_idx])
log((idx, fields_to_shuffle[idx], this_bss, this_auc))
return {'idx': idx, 'bss': this_bss, 'auc': this_auc, 'data': this_norm_in_data[:,idx]}
### NEURAL NETWORK PARAMETERS ###
nn_params = { 'num_layers': 1, 'num_neurons': [ 1024 ], 'dropout': 0.1, 'lr': 0.001, 'num_epochs': 10, \
'report_window_space':[ int(sys.argv[1]) ], 'report_window_time':[ int(sys.argv[2]) ] }
rf_params = { 'ntrees': 100, 'max_depth': 20, 'min_samples_split': 2 }
years = [2011,2012,2013,2014,2015,2016] #k-fold cross validation for these years
#years = [ int(sys.argv[3]) ]
model = 'nn'
multiclass = True
dataset = 'NSC3km-12sec'
trained_models_dir = '/glade/work/sobash/NSC_objects/trained_models_paper'
sdate = dt.datetime(2011,4,1,0,0,0)
edate = dt.datetime(2011,4,30,0,0,0)
dateinc = dt.timedelta(days=1)
##################################
twin = "_%dhr"%nn_params['report_window_time'][0]
features = get_features('basiclarge')
log('Number of features %d'%len(features))
log(nn_params)
log(rf_params)
log('Reading Data')
# read data and reassign data types to float32 to save memory
type_dict = {}
for f in features: type_dict[f]='float32'
df, numfcsts = read_csv_files(sdate, edate, dataset)
log('Predicting Begin')
scaling_values = pickle.load(open('scaling_values_all_NSC3km-12sec.pk', 'rb'))
norm_in_data, scaling_values = normalize_multivariate_data(df[features].values.astype(np.float32), features, scaling_values=scaling_values)
# neural network uses normalized data
model_fname = '%s/neural_network_2011_%dkm%s_nn%d_drop%.1f_basicplus.h5'%(trained_models_dir,nn_params['report_window_space'][0],twin,nn_params['num_neurons'][0],nn_params['dropout'])
labels = make_labels()
# filter forecast data to only compute importance for selected points
forecast_hour_mask = (df['fhr'] >= 12)
norm_in_data = norm_in_data[forecast_hour_mask,:]
labels = labels[forecast_hour_mask,:]
print(norm_in_data.shape, labels.shape)
### COMPUTE permutation importance
fields_to_shuffle = features
log('running upscaling in parallel')
hazard_idx = 0
nprocs = 6
nfeatures = len(fields_to_shuffle)
chunksize = int(math.ceil(nfeatures / float(nprocs)))
# implement multi-pass permutation importance
log('begin multi-pass permutation importance')
importance_list = []
for i in range(len(fields_to_shuffle)):
bss_all, auc_all, idx_all = [], [], []
shuffled_norm_in_data = norm_in_data.copy()
# shuffle each column in parallel and return the AUC/BSS
pool = multiprocessing.Pool(processes=nprocs)
original_predictions = pool.map(permutation_importance, [-1], 1)
data = pool.map(permutation_importance, range(nfeatures), chunksize)
pool.close()
# put values and shuffled columns into numpy arrays
for d in data:
bss_all.append(d['bss'])
auc_all.append(d['auc'])
idx_all.append(d['idx'])
shuffled_norm_in_data[:,d['idx']] = d['data']
bss_all, auc_all = np.array(bss_all), np.array(auc_all)
# determine which feature is most important
bss_loss_idx = np.argmax(original_predictions[0]['bss'] - bss_all)
bss_loss_idx = idx_all[bss_loss_idx]
# replace original data with shuffled column for most important feature
norm_in_data[:,bss_loss_idx] = shuffled_norm_in_data[:,bss_loss_idx].copy()
importance_list.append(bss_loss_idx)
print(fields_to_shuffle[bss_loss_idx])
log('Finished')
#### COMPUTE partial dependence here
# partial dependence for W and prob of any severe weather
#idx = features.index('SHR06')
#idx2 = features.index('MUCAPE')
#for i in range(0,51,5):
# for j in range(0,5001,500):
# this_val = (i - scaling_values.loc['SHR06', "mean"]) / scaling_values.loc['SHR06', "std"]
# this_val2 = (j - scaling_values.loc['MUCAPE', "mean"]) / scaling_values.loc['MUCAPE', "std"]
# norm_in_data[:,idx] = this_val
# norm_in_data[:,idx2] = this_val2
# predictions = dense_model.predict(norm_in_data)
# print(i, predictions[:,0].mean())
# create correlation plot and dendrogram
#fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 8))
#corr = spearmanr(norm_in_data).correlation
#corr_linkage = hierarchy.ward(corr)
#dendro = hierarchy.dendrogram(corr_linkage, labels=features, ax=ax1,
# leaf_rotation=90)
#dendro_idx = np.arange(0, len(dendro['ivl']))
#ax2.imshow(corr[dendro['leaves'], :][:, dendro['leaves']])
#ax2.set_xticks(dendro_idx)
#ax2.set_yticks(dendro_idx)
#ax2.set_xticklabels(dendro['ivl'], rotation='vertical')
#ax2.set_yticklabels(dendro['ivl'])
#fig.tight_layout()
#plt.savefig('test.png')
#from collections import defaultdict
#cluster_ids = hierarchy.fcluster(corr_linkage, 1, criterion='distance')
#cluster_id_to_feature_ids = defaultdict(list)
#for idx, cluster_id in enumerate(cluster_ids):
# cluster_id_to_feature_ids[cluster_id].append(idx)
#selected_features = [v[0] for v in cluster_id_to_feature_ids.values()]
#print(cluster_id_to_feature_ids, selected_features)
#K.set_image_data_format('channels_first')
# try activation maximization
#layer_index = utils.find_layer_idx(dense_model, 'dense_2')
#for z in range(len(dense_model.layers)):
# print(dense_model.layers[z])
#dense_model.layers[layer_index].activation = activations.linear
#dense_model = utils.apply_modifications(dense_model, custom_objects={'brier_score_keras': brier_score_keras, 'brier_skill_score_keras':brier_skill_score_keras, 'auc':auc})
#print(dense_model.input)
#dense_model.summary()
#visualization = visualize_activation(dense_model, layer_index, filter_indices=0, lp_norm_weight=None, tv_weight=None)
#sys.exit()