first commit

kuhung · Dec 8, 2017 · ab9daeb · ab9daeb
commit ab9daeb
Show file tree

Hide file tree

Showing 26 changed files with 1,783 additions and 0 deletions.
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2016 Andrey Rykov
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,18 @@
+[![license](https://img.shields.io/github/license/mashape/apistatus.svg)](LICENSE)
+
+## A port of SSD: Single Shot MultiBox Detector to Keras framework.
+Refer to [arXiv paper](http://arxiv.org/abs/1512.02325).
+
+- For forward pass for 300x300 model, please, follow `SSD.ipynb` for examples. 
+- For training procedure for 300x300 model, please, follow `SSD_training.ipynb` for examples. 
+- Moreover, in `testing_utils` folder there is a useful script to test `SSD` on video or on camera input.
+
+---
+- Weights are ported from the original models and are available [here](https://mega.nz/#F!7RowVLCL!q3cEVRK9jyOSB9el3SssIA). You need `weights_SSD300.hdf5`, `weights_300x300_old.hdf5` is for the old version of architecture with 3x3 convolution for `pool6`.
+
+
+- Weights for chinese [Evernote link](https://app.yinxiang.com/shard/s51/nl/10565191/1944fa71-d815-46b3-ac3b-56ca58ca5b47?title=weights_SSD300.hdf5)
+
+
+This code was tested with `Keras` v1.2.2, `Tensorflow` v1.0.0, `OpenCV` v3.1.0-dev
+
diff --git a/SSD.ipynb b/SSD.ipynb
diff --git a/SSD_crop.py b/SSD_crop.py
@@ -0,0 +1,101 @@
+import cv2
+import keras
+from keras.applications.imagenet_utils import preprocess_input
+from keras.backend.tensorflow_backend import set_session
+from keras.models import Model
+from keras.preprocessing import image
+import matplotlib.pyplot as plt
+import numpy as np
+from scipy.misc import imread
+import tensorflow as tf
+
+import sys
+
+from ssd import SSD300
+from ssd_utils import BBoxUtility
+
+plt.rcParams['figure.figsize'] = (8, 8)
+plt.rcParams['image.interpolation'] = 'nearest'
+
+np.set_printoptions(suppress=True)
+
+config = tf.ConfigProto()
+config.gpu_options.per_process_gpu_memory_fraction = 0.8
+set_session(tf.Session(config=config))
+
+voc_classes = ['Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle',
+               'Bus', 'Car', 'Cat', 'Chair', 'Cow', 'Diningtable',
+               'Dog', 'Horse','Motorbike', 'Person', 'Pottedplant',
+               'Sheep', 'Sofa', 'Train', 'Tvmonitor']
+NUM_CLASSES = len(voc_classes) + 1
+
+input_shape=(300, 300, 3)
+model = SSD300(input_shape, num_classes=NUM_CLASSES)
+model.load_weights('weights_SSD300.hdf5', by_name=True)
+bbox_util = BBoxUtility(NUM_CLASSES)
+
+from PIL import Image
+
+def get_rectangle(img_file,img_name,target_file,target_label):
+
+    inputs = []
+    images = []
+    img_path = '{}/{}.jpg'.format(img_file,img_name)
+    im = Image.open(img_path)
+    img = image.load_img(img_path, target_size=(300, 300))
+    img = image.img_to_array(img)
+    images.append(imread(img_path))
+    inputs.append(img.copy())
+    inputs = preprocess_input(np.array(inputs))
+
+    preds = model.predict(inputs, batch_size=1, verbose=1)
+    results = bbox_util.detection_out(preds)
+
+    for i, img in enumerate(images):
+        det_label = results[i][:, 0]
+        det_conf = results[i][:, 1]
+        det_xmin = results[i][:, 2]
+        det_ymin = results[i][:, 3]
+        det_xmax = results[i][:, 4]
+        det_ymax = results[i][:, 5]
+
+        top_indices = [i for i, conf in enumerate(det_conf) if conf >= 0.6]
+
+        top_conf = det_conf[top_indices]
+        top_label_indices = det_label[top_indices].tolist()
+        top_xmin = det_xmin[top_indices]
+        top_ymin = det_ymin[top_indices]
+        top_xmax = det_xmax[top_indices]
+        top_ymax = det_ymax[top_indices]
+
+        for i in range(top_conf.shape[0]):
+            xmin = int(round(top_xmin[i] * img.shape[1]))
+            ymin = int(round(top_ymin[i] * img.shape[0]))
+            xmax = int(round(top_xmax[i] * img.shape[1]))
+            ymax = int(round(top_ymax[i] * img.shape[0]))
+
+            label = int(top_label_indices[i])
+            label_name = voc_classes[label - 1]
+
+            if label_name=="Person":
+                region = im.crop((xmin, ymin, xmax, ymax))
+                region.save('{}/{}.jpg'.format(target_file,img_name))
+
+import os
+import sys
+from tqdm import *
+target_label=sys.argv[1]
+img_file=sys.argv[2]
+target_file=sys.argv[3]
+
+if  os.path.exists(target_file):
+	pass    
+else:
+	os.mkdir(target_file)
+
+files = os.listdir(img_file)  
+for file in tqdm(files):
+	if 'jpg' in file:
+		img_name=file[:-4]
+		get_rectangle(img_file,img_name,target_file,target_label)
+
diff --git a/SSD_crop.py~ b/SSD_crop.py~
@@ -0,0 +1,101 @@
+import cv2
+import keras
+from keras.applications.imagenet_utils import preprocess_input
+from keras.backend.tensorflow_backend import set_session
+from keras.models import Model
+from keras.preprocessing import image
+import matplotlib.pyplot as plt
+import numpy as np
+from scipy.misc import imread
+import tensorflow as tf
+
+import sys
+
+from ssd import SSD300
+from ssd_utils import BBoxUtility
+
+plt.rcParams['figure.figsize'] = (8, 8)
+plt.rcParams['image.interpolation'] = 'nearest'
+
+np.set_printoptions(suppress=True)
+
+config = tf.ConfigProto()
+config.gpu_options.per_process_gpu_memory_fraction = 0.8
+set_session(tf.Session(config=config))
+
+voc_classes = ['Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle',
+               'Bus', 'Car', 'Cat', 'Chair', 'Cow', 'Diningtable',
+               'Dog', 'Horse','Motorbike', 'Person', 'Pottedplant',
+               'Sheep', 'Sofa', 'Train', 'Tvmonitor']
+NUM_CLASSES = len(voc_classes) + 1
+
+input_shape=(300, 300, 3)
+model = SSD300(input_shape, num_classes=NUM_CLASSES)
+model.load_weights('weights_SSD300.hdf5', by_name=True)
+bbox_util = BBoxUtility(NUM_CLASSES)
+
+from PIL import Image
+
+def get_rectangle(img_file,img_name,target_file,target_label):
+
+    inputs = []
+    images = []
+    img_path = '{}/{}.jpg'.format(img_file,img_name)
+    im = Image.open(img_path)
+    img = image.load_img(img_path, target_size=(300, 300))
+    img = image.img_to_array(img)
+    images.append(imread(img_path))
+    inputs.append(img.copy())
+    inputs = preprocess_input(np.array(inputs))
+
+    preds = model.predict(inputs, batch_size=1, verbose=1)
+    results = bbox_util.detection_out(preds)
+
+    for i, img in enumerate(images):
+        det_label = results[i][:, 0]
+        det_conf = results[i][:, 1]
+        det_xmin = results[i][:, 2]
+        det_ymin = results[i][:, 3]
+        det_xmax = results[i][:, 4]
+        det_ymax = results[i][:, 5]
+
+        top_indices = [i for i, conf in enumerate(det_conf) if conf >= 0.6]
+
+        top_conf = det_conf[top_indices]
+        top_label_indices = det_label[top_indices].tolist()
+        top_xmin = det_xmin[top_indices]
+        top_ymin = det_ymin[top_indices]
+        top_xmax = det_xmax[top_indices]
+        top_ymax = det_ymax[top_indices]
+
+        for i in range(top_conf.shape[0]):
+            xmin = int(round(top_xmin[i] * img.shape[1]))
+            ymin = int(round(top_ymin[i] * img.shape[0]))
+            xmax = int(round(top_xmax[i] * img.shape[1]))
+            ymax = int(round(top_ymax[i] * img.shape[0]))
+
+            label = int(top_label_indices[i])
+            label_name = voc_classes[label - 1]
+
+            if label_name==target_label:
+                region = im.crop((xmin, ymin, xmax, ymax))
+                region.save('{}/{}.jpg'.format(target_file,img_name))
+
+import os
+import sys
+from tqdm import *
+target_label=sys.argv[1]
+img_file=sys.argv[2]
+target_file=sys.argv[3]
+
+if  os.path.exists(target_file):
+	pass    
+else:
+	os.mkdir(target_file)
+
+files = os.listdir(img_file)  
+for file in tqdm(files):
+	if 'jpg' in file:
+		img_name=file[:-4]
+		get_rectangle(img_file,img_name,target_file,target_label)
+
diff --git a/pics/boys.jpg b/pics/boys.jpg
diff --git a/pics/car_cat.jpg b/pics/car_cat.jpg
diff --git a/pics/car_cat2.jpg b/pics/car_cat2.jpg
diff --git a/pics/cat.jpg b/pics/cat.jpg
diff --git a/pics/fish-bike.jpg b/pics/fish-bike.jpg