Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimization and refactoring #13

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
## Grad-CAM implementation in Keras ##

---
#### My more recent version of repository: https://github.com/eclique/keras-gradcam.
---

Gradient class activation maps are a visualization technique for deep learning networks.

See the paper: https://arxiv.org/pdf/1610.02391v1.pdf
Expand Down
211 changes: 118 additions & 93 deletions grad-cam.py
Original file line number Diff line number Diff line change
@@ -1,72 +1,60 @@
from keras.applications.vgg16 import (
VGG16, preprocess_input, decode_predictions)
from keras.applications.vgg16 import VGG16, preprocess_input, decode_predictions
from keras.preprocessing import image
from keras.layers.core import Lambda
from keras.models import Sequential
from tensorflow.python.framework import ops
from matplotlib import pyplot as plt
import keras.backend as K
import tensorflow as tf
import numpy as np
import keras
import sys
import cv2

def target_category_loss(x, category_index, nb_classes):
return tf.multiply(x, K.one_hot([category_index], nb_classes))

def target_category_loss_output_shape(input_shape):
return input_shape
def build_model():
"""Function returning keras model instance.

Model can be
- Trained here
- Loaded with load_model
- Loaded from keras.applications
"""
return VGG16(weights='imagenet')

def normalize(x):
# utility function to normalize a tensor by its L2 norm
return x / (K.sqrt(K.mean(K.square(x))) + 1e-5)

def load_image(path):
img_path = sys.argv[1]
img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
return x

def register_gradient():
if "GuidedBackProp" not in ops._gradient_registry._registry:
@ops.RegisterGradient("GuidedBackProp")
def build_guided_model():
"""Function returning modified model.

Changes gradient function for all ReLu activations
according to Guided Backpropagation.
"""
if 'GuidedBackProp' not in ops._gradient_registry._registry:
@ops.RegisterGradient('GuidedBackProp')
def _GuidedBackProp(op, grad):
dtype = op.inputs[0].dtype
return grad * tf.cast(grad > 0., dtype) * \
tf.cast(op.inputs[0] > 0., dtype)

def compile_saliency_function(model, activation_layer='block5_conv3'):
input_img = model.input
layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]])
layer_output = layer_dict[activation_layer].output
max_output = K.max(layer_output, axis=3)
saliency = K.gradients(K.sum(max_output), input_img)[0]
return K.function([input_img, K.learning_phase()], [saliency])
tf.cast(op.inputs[0] > 0., dtype)

def modify_backprop(model, name):
g = tf.get_default_graph()
with g.gradient_override_map({'Relu': name}):
with g.gradient_override_map({'Relu': 'GuidedBackProp'}):
new_model = build_model()
return new_model

# get layers that have an activation
layer_dict = [layer for layer in model.layers[1:]
if hasattr(layer, 'activation')]

# replace relu activation
for layer in layer_dict:
if layer.activation == keras.activations.relu:
layer.activation = tf.nn.relu
def load_image(path, preprocess=True):
"""Function to load and preprocess image."""
x = image.load_img(path, target_size=(224, 224))
if preprocess:
x = image.img_to_array(x)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
return x

# re-instanciate a new model
new_model = VGG16(weights='imagenet')
return new_model

def deprocess_image(x):
'''
"""
Same normalization as in:
https://github.com/fchollet/keras/blob/master/examples/conv_filter_visualization.py
'''
"""
x = x.copy()
if np.ndim(x) > 3:
x = np.squeeze(x)
# normalize tensor: center on 0., ensure std is 0.1
Expand All @@ -85,59 +73,96 @@ def deprocess_image(x):
x = np.clip(x, 0, 255).astype('uint8')
return x

def grad_cam(input_model, image, category_index, layer_name):
model = Sequential()
model.add(input_model)

nb_classes = 1000
target_layer = lambda x: target_category_loss(x, category_index, nb_classes)
model.add(Lambda(target_layer,
output_shape = target_category_loss_output_shape))

loss = K.sum(model.layers[-1].output)
conv_output = [l for l in model.layers[0].layers if l.name is layer_name][0].output
grads = normalize(K.gradients(loss, conv_output)[0])
gradient_function = K.function([model.layers[0].input], [conv_output, grads])

output, grads_val = gradient_function([image])
output, grads_val = output[0, :], grads_val[0, :, :, :]

weights = np.mean(grads_val, axis = (0, 1))
cam = np.ones(output.shape[0 : 2], dtype = np.float32)
def normalize(x):
"""Utility function to normalize a tensor by its L2 norm"""
return (x + 1e-10) / (K.sqrt(K.mean(K.square(x))) + 1e-10)

for i, w in enumerate(weights):
cam += w * output[:, :, i]

cam = cv2.resize(cam, (224, 224))
cam = np.maximum(cam, 0)
heatmap = cam / np.max(cam)
def guided_backprop(model, img, activation_layer):
"""Compute gradients of conv. activation w.r.t. the input image.

#Return to BGR [0..255] from the preprocessed image
image = image[0, :]
image -= np.min(image)
image = np.minimum(image, 255)

cam = cv2.applyColorMap(np.uint8(255*heatmap), cv2.COLORMAP_JET)
cam = np.float32(cam) + np.float32(image)
cam = 255 * cam / np.max(cam)
return np.uint8(cam), heatmap
If model is modified properly this will result in Guided Backpropagation
method for visualizing input saliency.
See https://arxiv.org/abs/1412.6806 """
input_img = model.input
layer_output = model.get_layer(activation_layer).output
grads = K.gradients(layer_output, input_img)[0]
gradient_fn = K.function([input_img, K.learning_phase()], [grads])
grads_val = gradient_fn([img, 0])[0]
return grads_val

preprocessed_input = load_image(sys.argv[1])

model = VGG16(weights='imagenet')
def grad_cam(input_model, img, category_index, activation_layer):
"""GradCAM method for visualizing input saliency."""
loss = input_model.output[0, category_index]
layer_output = input_model.get_layer(activation_layer).output
grads = normalize(K.gradients(loss, layer_output)[0])
gradient_fn = K.function([input_model.input, K.learning_phase()], [layer_output, grads])

predictions = model.predict(preprocessed_input)
top_1 = decode_predictions(predictions)[0][0]
print('Predicted class:')
print('%s (%s) with probability %.2f' % (top_1[1], top_1[0], top_1[2]))
conv_output, grads_val = gradient_fn([img, 0])
conv_output, grads_val = conv_output[0], grads_val[0]

predicted_class = np.argmax(predictions)
cam, heatmap = grad_cam(model, preprocessed_input, predicted_class, "block5_conv3")
cv2.imwrite("gradcam.jpg", cam)
weights = np.mean(grads_val, axis=(0, 1))
cam = np.dot(conv_output, weights)

register_gradient()
guided_model = modify_backprop(model, 'GuidedBackProp')
saliency_fn = compile_saliency_function(guided_model)
saliency = saliency_fn([preprocessed_input, 0])
gradcam = saliency[0] * heatmap[..., np.newaxis]
cv2.imwrite("guided_gradcam.jpg", deprocess_image(gradcam))
cam = cv2.resize(cam, (224, 224), cv2.INTER_LINEAR)
cam = np.maximum(cam, 0)
cam = cam / cam.max()
return cam


def compute_saliency(model, guided_model, layer_name, img_path, cls=-1, visualize=True, save=True):
preprocessed_input = load_image(img_path)

predictions = model.predict(preprocessed_input)
top_n = 5
top = decode_predictions(predictions, top=top_n)[0]
classes = np.argsort(predictions[0])[-top_n:][::-1]
print('Model prediction:')
for c, p in zip(classes, top):
print('\t({}) {:20s}\twith probability {:.3f}'.format(c, p[1],p[2]))
if cls == -1:
cls = np.argmax(predictions)
nb_classes = 1000
class_name = decode_predictions(np.eye(1, nb_classes, cls))[0][0][1]
print("Computing saliency for '{}'".format(class_name))

gradcam = grad_cam(model, preprocessed_input, cls, activation_layer=layer_name)
gb = guided_backprop(guided_model, img=preprocessed_input, activation_layer=layer_name)
guided_gradcam = gb * gradcam[..., np.newaxis]

if save:
jetcam = cv2.applyColorMap(np.uint8(255 * gradcam), cv2.COLORMAP_JET)
jetcam = (np.float32(jetcam) + np.float32(cv2.imread(sys.argv[1]))) / 2

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would put the arguments of np.float32( cv2.imread (...) ) loading of the image as np.float32(image.load_img(sys.argv[1], target_size=(224, 224))) instead.

Otherwise, it would break if the image has a different size when being loaded by the load_image function.
In best case scenario, the size of the image needs to be taken out as a separate parameter - e.g. top of the file.

cv2.imwrite('gradcam.jpg', np.uint8(jetcam))
cv2.imwrite('guided_backprop.jpg', deprocess_image(gb[0]))
cv2.imwrite('guided_gradcam.jpg', deprocess_image(guided_gradcam[0]))

if visualize:
plt.figure(figsize=(15, 6))
plt.subplot(131)
plt.title('GradCAM')
plt.axis('off')
plt.imshow(load_image(img_path, preprocess=False))
plt.imshow(gradcam, cmap='jet', alpha=0.5)

plt.subplot(132)
plt.title('Guided Backprop')
plt.axis('off')
plt.imshow(np.flip(deprocess_image(gb[0]), -1))

plt.subplot(133)
plt.title('Guided GradCAM')
plt.axis('off')
plt.imshow(np.flip(deprocess_image(guided_gradcam[0]), -1))
plt.show()

return gradcam, gb, grad_cam


if __name__ == '__main__':
model = build_model()
guided_model = build_guided_model()
gradcam, gb, grad_cam = compute_saliency(model, guided_model, layer_name='block5_conv3',
img_path=sys.argv[1], cls=-1)