From 8c061d60a4eaed31b900855b11b68b5efa4ff819 Mon Sep 17 00:00:00 2001 From: Jeong Hansol Date: Wed, 2 Sep 2020 14:23:37 +0900 Subject: [PATCH 1/3] Fix error when using multi-gpu --- keras_retinanet/bin/evaluate.py | 2 +- keras_retinanet/bin/train.py | 2 +- keras_retinanet/utils/gpu.py | 41 ++++++++++++++++++--------------- 3 files changed, 24 insertions(+), 21 deletions(-) diff --git a/keras_retinanet/bin/evaluate.py b/keras_retinanet/bin/evaluate.py index fa0304150..d0a7f88d4 100755 --- a/keras_retinanet/bin/evaluate.py +++ b/keras_retinanet/bin/evaluate.py @@ -99,7 +99,7 @@ def parse_args(args): parser.add_argument('model', help='Path to RetinaNet model.') parser.add_argument('--convert-model', help='Convert the model to an inference model (ie. the input is a training model).', action='store_true') parser.add_argument('--backbone', help='The backbone of the model.', default='resnet50') - parser.add_argument('--gpu', help='Id of the GPU to use (as reported by nvidia-smi).', type=int) + parser.add_argument('--gpu', help='Id of the GPU to use (as reported by nvidia-smi).') parser.add_argument('--score-threshold', help='Threshold on score to filter detections with (defaults to 0.05).', default=0.05, type=float) parser.add_argument('--iou-threshold', help='IoU Threshold to count for a positive detection (defaults to 0.5).', default=0.5, type=float) parser.add_argument('--max-detections', help='Max Detections per image (defaults to 100).', default=100, type=int) diff --git a/keras_retinanet/bin/train.py b/keras_retinanet/bin/train.py index b267d0d19..2ec4792fa 100755 --- a/keras_retinanet/bin/train.py +++ b/keras_retinanet/bin/train.py @@ -426,7 +426,7 @@ def csv_list(string): group.add_argument('--no-weights', help='Don\'t initialize the model with any weights.', dest='imagenet_weights', action='store_const', const=False) parser.add_argument('--backbone', help='Backbone model used by retinanet.', default='resnet50', type=str) parser.add_argument('--batch-size', help='Size of the batches.', default=1, type=int) - parser.add_argument('--gpu', help='Id of the GPU to use (as reported by nvidia-smi).', type=int) + parser.add_argument('--gpu', help='Id of the GPU to use (as reported by nvidia-smi).') parser.add_argument('--multi-gpu', help='Number of GPUs to use for parallel processing.', type=int, default=0) parser.add_argument('--multi-gpu-force', help='Extra flag needed to enable (experimental) multi-gpu support.', action='store_true') parser.add_argument('--initial-epoch', help='Epoch from which to begin the train, useful if resuming from snapshot.', type=int, default=0) diff --git a/keras_retinanet/utils/gpu.py b/keras_retinanet/utils/gpu.py index 7c15e7c93..2de59644b 100644 --- a/keras_retinanet/utils/gpu.py +++ b/keras_retinanet/utils/gpu.py @@ -18,23 +18,26 @@ def setup_gpu(gpu_id): - if gpu_id == 'cpu' or gpu_id == -1: + try: + visible_gpu_indice = [int(id) for id in gpu_id.split(',')] + available_gpus = tf.config.list_physical_devices('GPU') + visible_gpus = [gpu for idx, gpu in enumerate(available_gpus) if idx in visible_gpu_indice] + + if visible_gpus: + try: + # Currently, memory growth needs to be the same across GPUs. + for gpu in available_gpus: + tf.config.experimental.set_memory_growth(gpu, True) + + # Use only the selcted gpu. + tf.config.set_visible_devices(visible_gpus, 'GPU') + except RuntimeError as e: + # Visible devices must be set before GPUs have been initialized. + print(e) + + logical_gpus = tf.config.list_logical_devices('GPU') + print(len(available_gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs") + else: + tf.config.set_visible_devices([], 'GPU') + except ValueError: tf.config.set_visible_devices([], 'GPU') - return - - gpus = tf.config.list_physical_devices('GPU') - if gpus: - # Restrict TensorFlow to only use the first GPU. - try: - # Currently, memory growth needs to be the same across GPUs. - for gpu in gpus: - tf.config.experimental.set_memory_growth(gpu, True) - - # Use only the selcted gpu. - tf.config.set_visible_devices(gpus[int(gpu_id)], 'GPU') - except RuntimeError as e: - # Visible devices must be set before GPUs have been initialized. - print(e) - - logical_gpus = tf.config.list_logical_devices('GPU') - print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs") From 95155f28ccb0a5a7de2f3cdcd3cbbb456fe9b6a2 Mon Sep 17 00:00:00 2001 From: Jeong Hansol Date: Thu, 1 Oct 2020 19:48:42 +0900 Subject: [PATCH 2/3] indice(s) Co-authored-by: Hans Gaiser --- keras_retinanet/utils/gpu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keras_retinanet/utils/gpu.py b/keras_retinanet/utils/gpu.py index 2de59644b..fe50adc10 100644 --- a/keras_retinanet/utils/gpu.py +++ b/keras_retinanet/utils/gpu.py @@ -19,7 +19,7 @@ def setup_gpu(gpu_id): try: - visible_gpu_indice = [int(id) for id in gpu_id.split(',')] + visible_gpu_indices = [int(id) for id in gpu_id.split(',')] available_gpus = tf.config.list_physical_devices('GPU') visible_gpus = [gpu for idx, gpu in enumerate(available_gpus) if idx in visible_gpu_indice] From c05643dd54a60a436e67fa7354ff54d6adfb330b Mon Sep 17 00:00:00 2001 From: Jeong Hansol Date: Thu, 1 Oct 2020 19:49:10 +0900 Subject: [PATCH 3/3] indice(s) Co-authored-by: Hans Gaiser --- keras_retinanet/utils/gpu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keras_retinanet/utils/gpu.py b/keras_retinanet/utils/gpu.py index fe50adc10..067f30b44 100644 --- a/keras_retinanet/utils/gpu.py +++ b/keras_retinanet/utils/gpu.py @@ -21,7 +21,7 @@ def setup_gpu(gpu_id): try: visible_gpu_indices = [int(id) for id in gpu_id.split(',')] available_gpus = tf.config.list_physical_devices('GPU') - visible_gpus = [gpu for idx, gpu in enumerate(available_gpus) if idx in visible_gpu_indice] + visible_gpus = [gpu for idx, gpu in enumerate(available_gpus) if idx in visible_gpu_indices] if visible_gpus: try: