first commit

deepaktalwardt · Apr 3, 2018 · bd72721 · bd72721
1 parent 2383b8e
commit bd72721
Show file tree

Hide file tree

Showing 11 changed files with 1,546 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -1,2 +1,23 @@
 # keras-yolo3
-A Keras implementation of YOLOv3 (Tensorflow backend)
+
+[![license](https://img.shields.io/github/license/mashape/apistatus.svg)](LICENSE)
+
+## Introduction
+
+A Keras implementation of YOLOv3 (Tensorflow backend) inspired by [allanzelener/YAD2K](https://github.com/allanzelener/YAD2K).
+
+Training is not supported.
+
+---
+
+## Quick Start
+
+- Download YOLOv3 weights from [YOLO website](http://pjreddie.com/darknet/yolo/).
+- Convert the Darknet YOLO model to a Keras model.
+- Run YOLO detection.
+
+```
+wget https://pjreddie.com/media/files/yolov3.weights
+python convert.py yolov3.cfg yolov3.weights model_data/yolo.h5
+python yolo.py
+```
diff --git a/convert.py b/convert.py
@@ -0,0 +1,242 @@
+#! /usr/bin/env python
+"""
+Reads Darknet config and weights and creates Keras model with TF backend.
+
+"""
+
+import argparse
+import configparser
+import io
+import os
+from collections import defaultdict
+
+import numpy as np
+from keras import backend as K
+from keras.layers import (Conv2D, Input, Add, UpSampling2D, Concatenate)
+from keras.layers.advanced_activations import LeakyReLU
+from keras.layers.normalization import BatchNormalization
+from keras.models import Model
+from keras.regularizers import l2
+from keras.utils.vis_utils import plot_model as plot
+
+
+parser = argparse.ArgumentParser(description='Darknet To Keras Converter.')
+parser.add_argument('config_path', help='Path to Darknet cfg file.')
+parser.add_argument('weights_path', help='Path to Darknet weights file.')
+parser.add_argument('output_path', help='Path to output Keras model file.')
+parser.add_argument(
+    '-p',
+    '--plot_model',
+    help='Plot generated Keras model and save as image.',
+    action='store_true')
+
+
+def unique_config_sections(config_file):
+    """Convert all config sections to have unique names.
+
+    Adds unique suffixes to config sections for compability with configparser.
+    """
+    section_counters = defaultdict(int)
+    output_stream = io.StringIO()
+    with open(config_file) as fin:
+        for line in fin:
+            if line.startswith('['):
+                section = line.strip().strip('[]')
+                _section = section + '_' + str(section_counters[section])
+                section_counters[section] += 1
+                line = line.replace(section, _section)
+            output_stream.write(line)
+    output_stream.seek(0)
+    return output_stream
+
+# %%
+def _main(args):
+    config_path = os.path.expanduser(args.config_path)
+    weights_path = os.path.expanduser(args.weights_path)
+    assert config_path.endswith('.cfg'), '{} is not a .cfg file'.format(
+        config_path)
+    assert weights_path.endswith(
+        '.weights'), '{} is not a .weights file'.format(weights_path)
+
+    output_path = os.path.expanduser(args.output_path)
+    assert output_path.endswith(
+        '.h5'), 'output path {} is not a .h5 file'.format(output_path)
+    output_root = os.path.splitext(output_path)[0]
+
+    # Load weights and config.
+    print('Loading weights.')
+    weights_file = open(weights_path, 'rb')
+    weights_header = np.ndarray(
+        shape=(5, ), dtype='int32', buffer=weights_file.read(20))
+    print('Weights Header: ', weights_header)
+
+    print('Parsing Darknet config.')
+    unique_config_file = unique_config_sections(config_path)
+    cfg_parser = configparser.ConfigParser()
+    cfg_parser.read_file(unique_config_file)
+
+    print('Creating Keras model.')
+    image_height = int(cfg_parser['net_0']['height'])
+    image_width = int(cfg_parser['net_0']['width'])
+    input_layer = Input(shape=(image_height, image_width, 3))
+    prev_layer = input_layer
+    all_layers = []
+
+    weight_decay = float(cfg_parser['net_0']['decay']
+                         ) if 'net_0' in cfg_parser.sections() else 5e-4
+    count = 0
+    out_index = []
+    for section in cfg_parser.sections():
+        print('Parsing section {}'.format(section))
+        if section.startswith('convolutional'):
+            filters = int(cfg_parser[section]['filters'])
+            size = int(cfg_parser[section]['size'])
+            stride = int(cfg_parser[section]['stride'])
+            pad = int(cfg_parser[section]['pad'])
+            activation = cfg_parser[section]['activation']
+            batch_normalize = 'batch_normalize' in cfg_parser[section]
+
+            # padding='same' is equivalent to Darknet pad=1
+            padding = 'same' if pad == 1 else 'valid'
+
+            # Setting weights.
+            # Darknet serializes convolutional weights as:
+            # [bias/beta, [gamma, mean, variance], conv_weights]
+            prev_layer_shape = K.int_shape(prev_layer)
+
+            # TODO: This assumes channel last dim_ordering.
+            weights_shape = (size, size, prev_layer_shape[-1], filters)
+            darknet_w_shape = (filters, weights_shape[2], size, size)
+            weights_size = np.product(weights_shape)
+
+            print('conv2d', 'bn'
+                  if batch_normalize else '  ', activation, weights_shape)
+
+            conv_bias = np.ndarray(
+                shape=(filters, ),
+                dtype='float32',
+                buffer=weights_file.read(filters * 4))
+            count += filters
+
+            if batch_normalize:
+                bn_weights = np.ndarray(
+                    shape=(3, filters),
+                    dtype='float32',
+                    buffer=weights_file.read(filters * 12))
+                count += 3 * filters
+
+                # TODO: Keras BatchNormalization mistakenly refers to var
+                # as std.
+                bn_weight_list = [
+                    bn_weights[0],  # scale gamma
+                    conv_bias,  # shift beta
+                    bn_weights[1],  # running mean
+                    bn_weights[2]  # running var
+                ]
+
+            conv_weights = np.ndarray(
+                shape=darknet_w_shape,
+                dtype='float32',
+                buffer=weights_file.read(weights_size * 4))
+            count += weights_size
+
+            # DarkNet conv_weights are serialized Caffe-style:
+            # (out_dim, in_dim, height, width)
+            # We would like to set these to Tensorflow order:
+            # (height, width, in_dim, out_dim)
+            # TODO: Add check for Theano dim ordering.
+            conv_weights = np.transpose(conv_weights, [2, 3, 1, 0])
+            conv_weights = [conv_weights] if batch_normalize else [
+                conv_weights, conv_bias
+            ]
+
+            # Handle activation.
+            act_fn = None
+            if activation == 'leaky':
+                pass  # Add advanced activation later.
+            elif activation != 'linear':
+                raise ValueError(
+                    'Unknown activation function `{}` in section {}'.format(
+                        activation, section))
+
+            # Create Conv2D layer
+            conv_layer = (Conv2D(
+                filters, (size, size),
+                strides=(stride, stride),
+                kernel_regularizer=l2(weight_decay),
+                use_bias=not batch_normalize,
+                weights=conv_weights,
+                activation=act_fn,
+                padding=padding))(prev_layer)
+
+            if batch_normalize:
+                conv_layer = (BatchNormalization(
+                    weights=bn_weight_list))(conv_layer)
+            prev_layer = conv_layer
+
+            if activation == 'linear':
+                all_layers.append(prev_layer)
+            elif activation == 'leaky':
+                act_layer = LeakyReLU(alpha=0.1)(prev_layer)
+                prev_layer = act_layer
+                all_layers.append(act_layer)
+
+        elif section.startswith('route'):
+            ids = [int(i) for i in cfg_parser[section]['layers'].split(',')]
+            layers = [all_layers[i] for i in ids]
+            if len(layers) > 1:
+                print('Concatenating route layers:', layers)
+                concatenate_layer = Concatenate()(layers)
+                all_layers.append(concatenate_layer)
+                prev_layer = concatenate_layer
+            else:
+                skip_layer = layers[0]  # only one layer to route
+                all_layers.append(skip_layer)
+                prev_layer = skip_layer
+
+        elif section.startswith('shortcut'):
+            index = int(cfg_parser[section]['from'])
+            activation = cfg_parser[section]['activation']
+            assert activation == 'linear', 'Only linear activation supported.'
+            all_layers.append(Add()([all_layers[index], prev_layer]))
+            prev_layer = all_layers[-1]
+
+        elif section.startswith('upsample'):
+            stride = int(cfg_parser[section]['stride'])
+            assert stride == 2, 'Only stride=2 supported.'
+            all_layers.append(UpSampling2D(stride)(prev_layer))
+            prev_layer = all_layers[-1]
+
+        # TODO: Further implement needed.
+        elif section.startswith('yolo'):
+            out_index.append(len(all_layers)-1)
+            all_layers.append(None)
+            prev_layer = all_layers[-1]
+
+        elif section.startswith('net'):
+            pass
+
+        else:
+            raise ValueError(
+                'Unsupported section header type: {}'.format(section))
+
+    # Create and save model.
+    model = Model(inputs=input_layer, outputs=[all_layers[i] for i in out_index])
+    print(model.summary())
+    model.save('{}'.format(output_path))
+    print('Saved Keras model to {}'.format(output_path))
+    # Check to see if all weights have been read.
+    remaining_weights = len(weights_file.read()) / 4
+    weights_file.close()
+    print('Read {} of {} from Darknet weights.'.format(count, count +
+                                                       remaining_weights))
+    if remaining_weights > 0:
+        print('Warning: {} unused weights'.format(remaining_weights))
+
+    if args.plot_model:
+        plot(model, to_file='{}.png'.format(output_root), show_shapes=True)
+        print('Saved model plot to {}.png'.format(output_root))
+
+
+if __name__ == '__main__':
+    _main(parser.parse_args())
diff --git a/font/FiraMono-Medium.otf b/font/FiraMono-Medium.otf
diff --git a/font/SIL Open Font License.txt b/font/SIL Open Font License.txt
@@ -0,0 +1,45 @@
+Copyright (c) 2014, Mozilla Foundation https://mozilla.org/ with Reserved Font Name Fira Mono.
+
+Copyright (c) 2014, Telefonica S.A.
+
+This Font Software is licensed under the SIL Open Font License, Version 1.1.
+This license is copied below, and is also available with a FAQ at: http://scripts.sil.org/OFL
+
+-----------------------------------------------------------
+SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
+-----------------------------------------------------------
+
+PREAMBLE
+The goals of the Open Font License (OFL) are to stimulate worldwide development of collaborative font projects, to support the font creation efforts of academic and linguistic communities, and to provide a free and open framework in which fonts may be shared and improved in partnership with others.
+
+The OFL allows the licensed fonts to be used, studied, modified and redistributed freely as long as they are not sold by themselves. The fonts, including any derivative works, can be bundled, embedded, redistributed and/or sold with any software provided that any reserved names are not used by derivative works. The fonts and derivatives, however, cannot be released under any other type of license. The requirement for fonts to remain under this license does not apply to any document created using the fonts or their derivatives.
+
+DEFINITIONS
+"Font Software" refers to the set of files released by the Copyright Holder(s) under this license and clearly marked as such. This may include source files, build scripts and documentation.
+
+"Reserved Font Name" refers to any names specified as such after the copyright statement(s).
+
+"Original Version" refers to the collection of Font Software components as distributed by the Copyright Holder(s).
+
+"Modified Version" refers to any derivative made by adding to, deleting, or substituting -- in part or in whole -- any of the components of the Original Version, by changing formats or by porting the Font Software to a new environment.
+
+"Author" refers to any designer, engineer, programmer, technical writer or other person who contributed to the Font Software.
+
+PERMISSION & CONDITIONS
+Permission is hereby granted, free of charge, to any person obtaining a copy of the Font Software, to use, study, copy, merge, embed, modify, redistribute, and sell modified and unmodified copies of the Font Software, subject to the following conditions:
+
+1) Neither the Font Software nor any of its individual components, in Original or Modified Versions, may be sold by itself.
+
+2) Original or Modified Versions of the Font Software may be bundled, redistributed and/or sold with any software, provided that each copy contains the above copyright notice and this license. These can be included either as stand-alone text files, human-readable headers or in the appropriate machine-readable metadata fields within text or binary files as long as those fields can be easily viewed by the user.
+
+3) No Modified Version of the Font Software may use the Reserved Font Name(s) unless explicit written permission is granted by the corresponding Copyright Holder. This restriction only applies to the primary font name as presented to the users.
+
+4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font Software shall not be used to promote, endorse or advertise any Modified Version, except to acknowledge the contribution(s) of the Copyright Holder(s) and the Author(s) or with their explicit written permission.
+
+5) The Font Software, modified or unmodified, in part or in whole, must be distributed entirely under this license, and must not be distributed under any other license. The requirement for fonts to remain under this license does not apply to any document created using the Font Software.
+
+TERMINATION
+This license becomes null and void if any of the above conditions are not met.
+
+DISCLAIMER
+THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM OTHER DEALINGS IN THE FONT SOFTWARE.
diff --git a/model_data/coco_classes.txt b/model_data/coco_classes.txt
@@ -0,0 +1,80 @@
+person
+bicycle
+car
+motorbike
+aeroplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+sofa
+pottedplant
+bed
+diningtable
+toilet
+tvmonitor
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
diff --git a/model_data/yolo_anchors.txt b/model_data/yolo_anchors.txt
@@ -0,0 +1 @@
+10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326