From d23da204620162f2e4b04a9a7d0145051a8b13cc Mon Sep 17 00:00:00 2001 From: Sharpiless <1691608003@qq.com> Date: Mon, 18 Apr 2022 10:26:59 +0800 Subject: [PATCH] first commit --- CITATION.cff | 8 + LICENSE | 203 ++ MANIFEST.in | 6 + README.md | 75 + README_zh-CN.md | 14 + .../_base_/datasets/cityscapes_detection.py | 56 + .../_base_/datasets/cityscapes_instance.py | 56 + configs/_base_/datasets/coco_detection.py | 49 + .../_base_/datasets/coco_detection_pix2seq.py | 68 + .../_base_/datasets/coco_detection_vqvae.py | 69 + configs/_base_/datasets/coco_instance.py | 49 + .../_base_/datasets/coco_instance_semantic.py | 54 + configs/_base_/datasets/coco_panoptic.py | 59 + configs/_base_/datasets/deepfashion.py | 53 + configs/_base_/datasets/lvis_v0.5_instance.py | 24 + configs/_base_/datasets/lvis_v1_instance.py | 24 + configs/_base_/datasets/voc0712.py | 55 + configs/_base_/datasets/voc0712_pix2seq.py | 73 + configs/_base_/datasets/wider_face.py | 63 + configs/_base_/default_runtime.py | 16 + .../models/cascade_mask_rcnn_r50_fpn.py | 196 ++ configs/_base_/models/cascade_rcnn_r50_fpn.py | 179 + configs/_base_/models/fast_rcnn_r50_fpn.py | 62 + .../_base_/models/faster_rcnn_r50_caffe_c4.py | 114 + .../models/faster_rcnn_r50_caffe_dc5.py | 105 + configs/_base_/models/faster_rcnn_r50_fpn.py | 108 + .../_base_/models/mask_rcnn_r50_caffe_c4.py | 125 + configs/_base_/models/mask_rcnn_r50_fpn.py | 120 + configs/_base_/models/retinanet_r50_fpn.py | 60 + configs/_base_/models/rpn_r50_caffe_c4.py | 58 + configs/_base_/models/rpn_r50_fpn.py | 58 + configs/_base_/models/ssd300.py | 56 + configs/_base_/schedules/schedule_1x.py | 11 + configs/_base_/schedules/schedule_20e.py | 11 + configs/_base_/schedules/schedule_2x.py | 11 + configs/albu_example/README.md | 19 + .../mask_rcnn_r50_fpn_albu_1x_coco.py | 73 + configs/atss/README.md | 21 + configs/atss/atss_r101_fpn_1x_coco.py | 6 + configs/atss/atss_r50_fpn_1x_coco.py | 62 + configs/atss/metafile.yml | 60 + configs/autoassign/README.md | 25 + .../autoassign_r50_fpn_8x2_1x_coco.py | 85 + configs/autoassign/metafile.yml | 33 + configs/carafe/README.md | 32 + .../faster_rcnn_r50_fpn_carafe_1x_coco.py | 50 + .../mask_rcnn_r50_fpn_carafe_1x_coco.py | 60 + configs/cascade_rcnn/README.md | 69 + ...ascade_mask_rcnn_r101_caffe_fpn_1x_coco.py | 7 + ...ask_rcnn_r101_caffe_fpn_mstrain_3x_coco.py | 7 + .../cascade_mask_rcnn_r101_fpn_1x_coco.py | 6 + .../cascade_mask_rcnn_r101_fpn_20e_coco.py | 6 + ...cade_mask_rcnn_r101_fpn_mstrain_3x_coco.py | 6 + ...cascade_mask_rcnn_r50_caffe_fpn_1x_coco.py | 41 + ...mask_rcnn_r50_caffe_fpn_mstrain_3x_coco.py | 49 + .../cascade_mask_rcnn_r50_fpn_1x_coco.py | 5 + .../cascade_mask_rcnn_r50_fpn_20e_coco.py | 5 + ...scade_mask_rcnn_r50_fpn_mstrain_3x_coco.py | 4 + ...ascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py | 14 + ...scade_mask_rcnn_x101_32x4d_fpn_20e_coco.py | 14 + ...ask_rcnn_x101_32x4d_fpn_mstrain_3x_coco.py | 14 + ...ask_rcnn_x101_32x8d_fpn_mstrain_3x_coco.py | 60 + ...ascade_mask_rcnn_x101_64x4d_fpn_1x_coco.py | 14 + ...scade_mask_rcnn_x101_64x4d_fpn_20e_coco.py | 14 + ...ask_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py | 14 + .../cascade_rcnn_r101_caffe_fpn_1x_coco.py | 7 + .../cascade_rcnn_r101_fpn_1x_coco.py | 6 + .../cascade_rcnn_r101_fpn_20e_coco.py | 6 + .../cascade_rcnn_r50_caffe_fpn_1x_coco.py | 42 + .../cascade_rcnn_r50_fpn_1x_coco.py | 5 + .../cascade_rcnn_r50_fpn_20e_coco.py | 4 + .../cascade_rcnn_x101_32x4d_fpn_1x_coco.py | 14 + .../cascade_rcnn_x101_32x4d_fpn_20e_coco.py | 14 + .../cascade_rcnn_x101_64x4d_fpn_1x_coco.py | 15 + .../cascade_rcnn_x101_64x4d_fpn_20e_coco.py | 15 + configs/cascade_rcnn/metafile.yml | 525 +++ configs/cascade_rpn/README.md | 29 + .../crpn_fast_rcnn_r50_caffe_fpn_1x_coco.py | 77 + .../crpn_faster_rcnn_r50_caffe_fpn_1x_coco.py | 92 + .../cascade_rpn/crpn_r50_caffe_fpn_1x_coco.py | 77 + configs/centernet/README.md | 30 + .../centernet/centernet_resnet18_140e_coco.py | 3 + .../centernet_resnet18_dcnv2_140e_coco.py | 122 + configs/centernet/metafile.yml | 46 + configs/centripetalnet/README.md | 26 + ...lnet_hourglass104_mstest_16x6_210e_coco.py | 105 + configs/centripetalnet/metafile.yml | 39 + configs/cityscapes/README.md | 33 + .../faster_rcnn_r50_fpn_1x_cityscapes.py | 39 + .../mask_rcnn_r50_fpn_1x_cityscapes.py | 46 + configs/common/lsj_100e_coco_instance.py | 90 + .../common/mstrain-poly_3x_coco_instance.py | 80 + configs/common/mstrain_3x_coco.py | 76 + configs/common/mstrain_3x_coco_instance.py | 76 + configs/cornernet/README.md | 33 + ...rnet_hourglass104_mstest_10x5_210e_coco.py | 105 + ...rnet_hourglass104_mstest_32x3_210e_coco.py | 105 + ...ernet_hourglass104_mstest_8x6_210e_coco.py | 105 + configs/cornernet/metafile.yml | 83 + configs/dcn/README.md | 52 + ..._mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py | 5 + ...e_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py | 5 + ...rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py | 5 + ...scade_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py | 5 + ...ascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py | 5 + ...aster_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py | 5 + ...faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py | 5 + .../dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py | 12 + ...aster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py | 5 + ...cnn_r50_fpn_mdconv_c3-c5_group4_1x_coco.py | 5 + .../dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py | 12 + ...rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py | 16 + .../mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py | 5 + .../mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py | 5 + .../mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py | 5 + configs/dcn/metafile.yml | 335 ++ configs/deepfashion/README.md | 56 + .../mask_rcnn_r50_fpn_15e_deepfashion.py | 10 + configs/deformable_detr/README.md | 31 + .../deformable_detr_r50_16x2_50e_coco.py | 172 + ...eformable_detr_refine_r50_16x2_50e_coco.py | 2 + ..._detr_twostage_refine_r50_16x2_50e_coco.py | 2 + configs/deformable_detr/metafile.yml | 56 + configs/detectors/README.md | 59 + .../detectors/cascade_rcnn_r50_rfp_1x_coco.py | 28 + .../detectors/cascade_rcnn_r50_sac_1x_coco.py | 12 + .../detectors_cascade_rcnn_r50_1x_coco.py | 32 + .../detectors/detectors_htc_r101_20e_coco.py | 28 + .../detectors/detectors_htc_r50_1x_coco.py | 28 + configs/detectors/htc_r50_rfp_1x_coco.py | 24 + configs/detectors/htc_r50_sac_1x_coco.py | 8 + configs/detectors/metafile.yml | 114 + configs/detr/README.md | 27 + configs/detr/detr_r50_8x2_150e_coco.py | 150 + configs/detr/detr_r50_8x2_150e_coco_merge.py | 152 + configs/detr/metafile.yml | 33 + configs/double_heads/README.md | 22 + .../dh_faster_rcnn_r50_fpn_1x_coco.py | 23 + configs/double_heads/metafile.yml | 41 + configs/dynamic_rcnn/README.md | 20 + .../dynamic_rcnn_r50_fpn_1x_coco.py | 28 + configs/dynamic_rcnn/metafile.yml | 35 + configs/empirical_attention/README.md | 23 + ...ter_rcnn_r50_fpn_attention_0010_1x_coco.py | 13 + ...rcnn_r50_fpn_attention_0010_dcn_1x_coco.py | 16 + ...ter_rcnn_r50_fpn_attention_1111_1x_coco.py | 13 + ...rcnn_r50_fpn_attention_1111_dcn_1x_coco.py | 16 + configs/empirical_attention/metafile.yml | 103 + configs/fast_rcnn/README.md | 16 + .../fast_rcnn_r101_caffe_fpn_1x_coco.py | 7 + .../fast_rcnn/fast_rcnn_r101_fpn_1x_coco.py | 6 + .../fast_rcnn/fast_rcnn_r101_fpn_2x_coco.py | 6 + .../fast_rcnn_r50_caffe_fpn_1x_coco.py | 48 + .../fast_rcnn/fast_rcnn_r50_fpn_1x_coco.py | 52 + .../fast_rcnn/fast_rcnn_r50_fpn_2x_coco.py | 5 + configs/faster_rcnn/README.md | 67 + .../faster_rcnn_r101_caffe_fpn_1x_coco.py | 7 + ...ter_rcnn_r101_caffe_fpn_mstrain_3x_coco.py | 49 + .../faster_rcnn_r101_fpn_1x_coco.py | 6 + .../faster_rcnn_r101_fpn_2x_coco.py | 6 + .../faster_rcnn_r101_fpn_mstrain_3x_coco.py | 7 + .../faster_rcnn_r50_caffe_c4_1x_coco.py | 39 + .../faster_rcnn_r50_caffe_dc5_1x_coco.py | 37 + ...ster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py | 42 + ...ster_rcnn_r50_caffe_dc5_mstrain_3x_coco.py | 4 + .../faster_rcnn_r50_caffe_fpn_1x_coco.py | 41 + .../faster_rcnn_r50_caffe_fpn_90k_coco.py | 15 + ..._fpn_mstrain_1x_coco-person-bicycle-car.py | 9 + ...nn_r50_caffe_fpn_mstrain_1x_coco-person.py | 9 + ...ster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py | 46 + ...ster_rcnn_r50_caffe_fpn_mstrain_2x_coco.py | 4 + ...ster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py | 47 + ...ter_rcnn_r50_caffe_fpn_mstrain_90k_coco.py | 15 + .../faster_rcnn_r50_fpn_1x_coco.py | 5 + .../faster_rcnn_r50_fpn_2x_coco.py | 5 + ...faster_rcnn_r50_fpn_bounded_iou_1x_coco.py | 6 + .../faster_rcnn_r50_fpn_ciou_1x_coco.py | 6 + .../faster_rcnn_r50_fpn_giou_1x_coco.py | 6 + .../faster_rcnn_r50_fpn_iou_1x_coco.py | 6 + .../faster_rcnn_r50_fpn_mstrain_3x_coco.py | 3 + .../faster_rcnn_r50_fpn_ohem_1x_coco.py | 2 + .../faster_rcnn_r50_fpn_soft_nms_1x_coco.py | 12 + .../faster_rcnn_x101_32x4d_fpn_1x_coco.py | 14 + .../faster_rcnn_x101_32x4d_fpn_2x_coco.py | 14 + ...ter_rcnn_x101_32x4d_fpn_mstrain_3x_coco.py | 16 + ...ter_rcnn_x101_32x8d_fpn_mstrain_3x_coco.py | 62 + .../faster_rcnn_x101_64x4d_fpn_1x_coco.py | 14 + .../faster_rcnn_x101_64x4d_fpn_2x_coco.py | 14 + ...ter_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py | 16 + configs/faster_rcnn/metafile.yml | 383 +++ configs/fcos/README.md | 35 + ...nreg-giou_r50_caffe_fpn_gn-head_1x_coco.py | 54 + ...-giou_r50_caffe_fpn_gn-head_dcn_1x_coco.py | 56 + ...os_center_r50_caffe_fpn_gn-head_1x_coco.py | 2 + .../fcos_r101_caffe_fpn_gn-head_1x_coco.py | 7 + ...ffe_fpn_gn-head_mstrain_640-800_2x_coco.py | 47 + .../fcos_r50_caffe_fpn_gn-head_1x_coco.py | 106 + .../fcos_r50_caffe_fpn_gn-head_4x4_1x_coco.py | 4 + ...ffe_fpn_gn-head_mstrain_640-800_2x_coco.py | 39 + ...x4d_fpn_gn-head_mstrain_640-800_2x_coco.py | 60 + configs/fcos/metafile.yml | 146 + configs/foveabox/README.md | 41 + ...ovea_align_r101_fpn_gn-head_4x4_2x_coco.py | 12 + ...fpn_gn-head_mstrain_640-800_4x4_2x_coco.py | 29 + ...fovea_align_r50_fpn_gn-head_4x4_2x_coco.py | 10 + ...fpn_gn-head_mstrain_640-800_4x4_2x_coco.py | 25 + .../foveabox/fovea_r101_fpn_4x4_1x_coco.py | 6 + .../foveabox/fovea_r101_fpn_4x4_2x_coco.py | 6 + configs/foveabox/fovea_r50_fpn_4x4_1x_coco.py | 52 + configs/foveabox/fovea_r50_fpn_4x4_2x_coco.py | 4 + configs/foveabox/metafile.yml | 172 + configs/fp16/README.md | 24 + .../fp16/faster_rcnn_r50_fpn_fp16_1x_coco.py | 3 + .../fp16/mask_rcnn_r50_fpn_fp16_1x_coco.py | 3 + ...k_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco.py | 7 + ..._rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco.py | 7 + configs/fp16/metafile.yml | 113 + .../fp16/retinanet_r50_fpn_fp16_1x_coco.py | 3 + configs/fpg/README.md | 30 + ...er_rcnn_r50_fpg-chn128_crop640_50e_coco.py | 9 + .../faster_rcnn_r50_fpg_crop640_50e_coco.py | 48 + .../faster_rcnn_r50_fpn_crop640_50e_coco.py | 68 + ...sk_rcnn_r50_fpg-chn128_crop640_50e_coco.py | 10 + .../fpg/mask_rcnn_r50_fpg_crop640_50e_coco.py | 48 + .../fpg/mask_rcnn_r50_fpn_crop640_50e_coco.py | 74 + configs/fpg/metafile.yml | 104 + ...tinanet_r50_fpg-chn128_crop640_50e_coco.py | 5 + .../fpg/retinanet_r50_fpg_crop640_50e_coco.py | 53 + configs/free_anchor/README.md | 27 + configs/free_anchor/metafile.yml | 79 + .../retinanet_free_anchor_r101_fpn_1x_coco.py | 6 + .../retinanet_free_anchor_r50_fpn_1x_coco.py | 22 + ...anet_free_anchor_x101_32x4d_fpn_1x_coco.py | 13 + configs/fsaf/README.md | 45 + configs/fsaf/fsaf_r101_fpn_1x_coco.py | 6 + configs/fsaf/fsaf_r50_fpn_1x_coco.py | 48 + configs/fsaf/fsaf_x101_64x4d_fpn_1x_coco.py | 14 + configs/fsaf/metafile.yml | 80 + configs/gcnet/README.md | 59 + ..._x101_32x4d_fpn_syncbn-backbone_1x_coco.py | 4 + ...fpn_syncbn-backbone_dconv_c3-c5_1x_coco.py | 4 + ...kbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco.py | 11 + ...ckbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco.py | 11 + ...n_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py | 11 + ...pn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py | 11 + ...ask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco.py | 8 + ...mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco.py | 8 + ...k_rcnn_r101_fpn_syncbn-backbone_1x_coco.py | 4 + ...n_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py | 11 + ...pn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py | 11 + ...mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco.py | 8 + .../mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco.py | 8 + ...sk_rcnn_r50_fpn_syncbn-backbone_1x_coco.py | 4 + ...n_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py | 11 + ...pn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py | 11 + ..._x101_32x4d_fpn_syncbn-backbone_1x_coco.py | 4 + ...n_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py | 11 + ...pn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py | 11 + configs/gcnet/metafile.yml | 440 +++ configs/gfl/README.md | 32 + ...fl_r101_fpn_dconv_c3-c5_mstrain_2x_coco.py | 15 + configs/gfl/gfl_r101_fpn_mstrain_2x_coco.py | 13 + configs/gfl/gfl_r50_fpn_1x_coco.py | 57 + configs/gfl/gfl_r50_fpn_mstrain_2x_coco.py | 22 + ...1_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco.py | 18 + .../gfl/gfl_x101_32x4d_fpn_mstrain_2x_coco.py | 16 + configs/gfl/metafile.yml | 134 + configs/ghm/README.md | 23 + configs/ghm/metafile.yml | 101 + configs/ghm/retinanet_ghm_r101_fpn_1x_coco.py | 6 + configs/ghm/retinanet_ghm_r50_fpn_1x_coco.py | 19 + .../retinanet_ghm_x101_32x4d_fpn_1x_coco.py | 14 + .../retinanet_ghm_x101_64x4d_fpn_1x_coco.py | 14 + configs/gn+ws/README.md | 44 + .../faster_rcnn_r101_fpn_gn_ws-all_1x_coco.py | 6 + .../faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py | 16 + ...r_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco.py | 18 + ...er_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco.py | 18 + ..._rcnn_r101_fpn_gn_ws-all_20_23_24e_coco.py | 4 + .../mask_rcnn_r101_fpn_gn_ws-all_2x_coco.py | 6 + ...k_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco.py | 4 + .../mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py | 20 + ...x101_32x4d_fpn_gn_ws-all_20_23_24e_coco.py | 4 + ...k_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco.py | 19 + ..._x50_32x4d_fpn_gn_ws-all_20_23_24e_coco.py | 4 + ...sk_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco.py | 19 + configs/gn+ws/metafile.yml | 263 ++ configs/gn/README.md | 31 + .../gn/mask_rcnn_r101_fpn_gn-all_2x_coco.py | 7 + .../gn/mask_rcnn_r101_fpn_gn-all_3x_coco.py | 5 + .../gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py | 49 + .../gn/mask_rcnn_r50_fpn_gn-all_3x_coco.py | 5 + ...ask_rcnn_r50_fpn_gn-all_contrib_2x_coco.py | 17 + ...ask_rcnn_r50_fpn_gn-all_contrib_3x_coco.py | 5 + configs/gn/metafile.yml | 162 + configs/grid_rcnn/README.md | 35 + .../grid_rcnn_r101_fpn_gn-head_2x_coco.py | 7 + .../grid_rcnn_r50_fpn_gn-head_1x_coco.py | 11 + .../grid_rcnn_r50_fpn_gn-head_2x_coco.py | 131 + ...rid_rcnn_x101_32x4d_fpn_gn-head_2x_coco.py | 24 + ...rid_rcnn_x101_64x4d_fpn_gn-head_2x_coco.py | 13 + configs/grid_rcnn/metafile.yml | 101 + configs/groie/README.md | 62 + .../faster_rcnn_r50_fpn_groie_1x_coco.py | 25 + ...grid_rcnn_r50_fpn_gn-head_groie_1x_coco.py | 45 + ...cbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py | 45 + .../groie/mask_rcnn_r50_fpn_groie_1x_coco.py | 45 + ...cbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py | 45 + configs/groie/metafile.yml | 93 + configs/guided_anchoring/README.md | 49 + .../ga_fast_r50_caffe_fpn_1x_coco.py | 65 + .../ga_faster_r101_caffe_fpn_1x_coco.py | 7 + .../ga_faster_r50_caffe_fpn_1x_coco.py | 65 + .../ga_faster_r50_fpn_1x_coco.py | 65 + .../ga_faster_x101_32x4d_fpn_1x_coco.py | 14 + .../ga_faster_x101_64x4d_fpn_1x_coco.py | 14 + .../ga_retinanet_r101_caffe_fpn_1x_coco.py | 7 + .../ga_retinanet_r101_caffe_fpn_mstrain_2x.py | 169 + .../ga_retinanet_r50_caffe_fpn_1x_coco.py | 62 + .../ga_retinanet_r50_fpn_1x_coco.py | 62 + .../ga_retinanet_x101_32x4d_fpn_1x_coco.py | 14 + .../ga_retinanet_x101_64x4d_fpn_1x_coco.py | 14 + .../ga_rpn_r101_caffe_fpn_1x_coco.py | 8 + .../ga_rpn_r50_caffe_fpn_1x_coco.py | 58 + .../ga_rpn_r50_fpn_1x_coco.py | 58 + .../ga_rpn_x101_32x4d_fpn_1x_coco.py | 14 + .../ga_rpn_x101_64x4d_fpn_1x_coco.py | 14 + configs/guided_anchoring/metafile.yml | 246 ++ configs/hrnet/README.md | 88 + ...cascade_mask_rcnn_hrnetv2p_w18_20e_coco.py | 11 + ...cascade_mask_rcnn_hrnetv2p_w32_20e_coco.py | 40 + ...cascade_mask_rcnn_hrnetv2p_w40_20e_coco.py | 12 + .../cascade_rcnn_hrnetv2p_w18_20e_coco.py | 11 + .../cascade_rcnn_hrnetv2p_w32_20e_coco.py | 40 + .../cascade_rcnn_hrnetv2p_w40_20e_coco.py | 12 + .../hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.py | 11 + .../hrnet/faster_rcnn_hrnetv2p_w18_2x_coco.py | 5 + .../hrnet/faster_rcnn_hrnetv2p_w32_1x_coco.py | 37 + .../hrnet/faster_rcnn_hrnetv2p_w32_2x_coco.py | 4 + .../hrnet/faster_rcnn_hrnetv2p_w40_1x_coco.py | 11 + .../hrnet/faster_rcnn_hrnetv2p_w40_2x_coco.py | 4 + .../fcos_hrnetv2p_w18_gn-head_4x4_1x_coco.py | 10 + .../fcos_hrnetv2p_w18_gn-head_4x4_2x_coco.py | 4 + ...w18_gn-head_mstrain_640-800_4x4_2x_coco.py | 10 + .../fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py | 70 + .../fcos_hrnetv2p_w32_gn-head_4x4_2x_coco.py | 4 + ...w32_gn-head_mstrain_640-800_4x4_2x_coco.py | 39 + ...w40_gn-head_mstrain_640-800_4x4_2x_coco.py | 11 + configs/hrnet/htc_hrnetv2p_w18_20e_coco.py | 10 + configs/hrnet/htc_hrnetv2p_w32_20e_coco.py | 37 + configs/hrnet/htc_hrnetv2p_w40_20e_coco.py | 11 + configs/hrnet/htc_hrnetv2p_w40_28e_coco.py | 4 + .../hrnet/htc_x101_64x4d_fpn_16x1_28e_coco.py | 4 + .../hrnet/mask_rcnn_hrnetv2p_w18_1x_coco.py | 10 + .../hrnet/mask_rcnn_hrnetv2p_w18_2x_coco.py | 4 + .../hrnet/mask_rcnn_hrnetv2p_w32_1x_coco.py | 37 + .../hrnet/mask_rcnn_hrnetv2p_w32_2x_coco.py | 4 + .../hrnet/mask_rcnn_hrnetv2p_w40_1x_coco.py | 11 + .../hrnet/mask_rcnn_hrnetv2p_w40_2x_coco.py | 4 + configs/hrnet/metafile.yml | 604 ++++ configs/htc/README.md | 57 + configs/htc/htc_r101_fpn_20e_coco.py | 9 + configs/htc/htc_r50_fpn_1x_coco.py | 56 + configs/htc/htc_r50_fpn_20e_coco.py | 4 + .../htc_without_semantic_r50_fpn_1x_coco.py | 236 ++ .../htc/htc_x101_32x4d_fpn_16x1_20e_coco.py | 19 + .../htc/htc_x101_64x4d_fpn_16x1_20e_coco.py | 19 + ...nv_c3-c5_mstrain_400_1400_16x1_20e_coco.py | 43 + configs/htc/metafile.yml | 165 + configs/instaboost/README.md | 44 + ...e_mask_rcnn_r101_fpn_instaboost_4x_coco.py | 7 + ...de_mask_rcnn_r50_fpn_instaboost_4x_coco.py | 28 + ..._rcnn_x101_64x4d_fpn_instaboost_4x_coco.py | 14 + .../mask_rcnn_r101_fpn_instaboost_4x_coco.py | 6 + .../mask_rcnn_r50_fpn_instaboost_4x_coco.py | 28 + ..._rcnn_x101_64x4d_fpn_instaboost_4x_coco.py | 14 + configs/instaboost/metafile.yml | 99 + configs/ld/README.md | 31 + .../ld/ld_r101_gflv1_r101dcn_fpn_coco_2x.py | 44 + configs/ld/ld_r18_gflv1_r101_fpn_coco_1x.py | 62 + configs/ld/ld_r34_gflv1_r101_fpn_coco_1x.py | 19 + configs/ld/ld_r50_gflv1_r101_fpn_coco_1x.py | 19 + configs/ld/metafile.yml | 72 + configs/legacy_1.x/README.md | 53 + .../cascade_mask_rcnn_r50_fpn_1x_coco_v1.py | 79 + .../faster_rcnn_r50_fpn_1x_coco_v1.py | 38 + .../mask_rcnn_r50_fpn_1x_coco_v1.py | 34 + .../retinanet_r50_caffe_fpn_1x_coco_v1.py | 41 + .../retinanet_r50_fpn_1x_coco_v1.py | 17 + configs/legacy_1.x/ssd300_coco_v1.py | 79 + configs/libra_rcnn/README.md | 41 + .../libra_fast_rcnn_r50_fpn_1x_coco.py | 50 + .../libra_faster_rcnn_r101_fpn_1x_coco.py | 6 + .../libra_faster_rcnn_r50_fpn_1x_coco.py | 41 + ...ibra_faster_rcnn_x101_64x4d_fpn_1x_coco.py | 14 + .../libra_retinanet_r50_fpn_1x_coco.py | 26 + configs/libra_rcnn/metafile.yml | 99 + configs/lvis/README.md | 44 + ..._r101_fpn_sample1e-3_mstrain_1x_lvis_v1.py | 6 + ...101_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py | 6 + ...n_r50_fpn_sample1e-3_mstrain_1x_lvis_v1.py | 31 + ...r50_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py | 31 + ...32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1.py | 14 + ...x4d_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py | 14 + ...64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1.py | 14 + ...x4d_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py | 14 + configs/mask_rcnn/README.md | 48 + .../mask_rcnn_r101_caffe_fpn_1x_coco.py | 7 + ...cnn_r101_caffe_fpn_mstrain-poly_3x_coco.py | 55 + .../mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py | 6 + .../mask_rcnn/mask_rcnn_r101_fpn_2x_coco.py | 6 + ...mask_rcnn_r101_fpn_mstrain-poly_3x_coco.py | 10 + .../mask_rcnn_r50_caffe_c4_1x_coco.py | 39 + .../mask_rcnn_r50_caffe_fpn_1x_coco.py | 40 + ...rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py | 49 + ...rcnn_r50_caffe_fpn_mstrain-poly_2x_coco.py | 4 + ...rcnn_r50_caffe_fpn_mstrain-poly_3x_coco.py | 4 + ...mask_rcnn_r50_caffe_fpn_mstrain_1x_coco.py | 45 + ...mask_rcnn_r50_caffe_fpn_poly_1x_coco_v1.py | 61 + .../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py | 5 + .../mask_rcnn/mask_rcnn_r50_fpn_2x_coco.py | 5 + .../mask_rcnn_r50_fpn_mstrain-poly_3x_coco.py | 4 + .../mask_rcnn_r50_fpn_poly_1x_coco.py | 23 + .../mask_rcnn_x101_32x4d_fpn_1x_coco.py | 14 + .../mask_rcnn_x101_32x4d_fpn_2x_coco.py | 14 + ...cnn_x101_32x4d_fpn_mstrain-poly_3x_coco.py | 18 + .../mask_rcnn_x101_32x8d_fpn_1x_coco.py | 65 + ...cnn_x101_32x8d_fpn_mstrain-poly_1x_coco.py | 60 + ...cnn_x101_32x8d_fpn_mstrain-poly_3x_coco.py | 85 + .../mask_rcnn_x101_64x4d_fpn_1x_coco.py | 14 + .../mask_rcnn_x101_64x4d_fpn_2x_coco.py | 14 + ...cnn_x101_64x4d_fpn_mstrain-poly_3x_coco.py | 18 + configs/mask_rcnn/metafile.yml | 419 +++ configs/ms_rcnn/README.md | 26 + configs/ms_rcnn/metafile.yml | 159 + .../ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco.py | 7 + .../ms_rcnn/ms_rcnn_r101_caffe_fpn_2x_coco.py | 4 + .../ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco.py | 16 + .../ms_rcnn/ms_rcnn_r50_caffe_fpn_2x_coco.py | 4 + configs/ms_rcnn/ms_rcnn_r50_fpn_1x_coco.py | 16 + .../ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_coco.py | 14 + .../ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco.py | 14 + .../ms_rcnn/ms_rcnn_x101_64x4d_fpn_2x_coco.py | 4 + configs/nas_fcos/README.md | 25 + configs/nas_fcos/metafile.yml | 44 + ...shead_r50_caffe_fpn_gn-head_4x4_1x_coco.py | 100 + ...shead_r50_caffe_fpn_gn-head_4x4_1x_coco.py | 99 + configs/nas_fpn/README.md | 26 + configs/nas_fpn/metafile.yml | 59 + .../retinanet_r50_fpn_crop640_50e_coco.py | 80 + .../retinanet_r50_nasfpn_crop640_50e_coco.py | 79 + configs/paa/README.md | 35 + configs/paa/metafile.yml | 104 + configs/paa/paa_r101_fpn_1x_coco.py | 6 + configs/paa/paa_r101_fpn_2x_coco.py | 3 + configs/paa/paa_r101_fpn_mstrain_3x_coco.py | 6 + configs/paa/paa_r50_fpn_1.5x_coco.py | 3 + configs/paa/paa_r50_fpn_1x_coco.py | 70 + configs/paa/paa_r50_fpn_2x_coco.py | 3 + configs/paa/paa_r50_fpn_mstrain_3x_coco.py | 20 + configs/pafpn/README.md | 26 + .../pafpn/faster_rcnn_r50_pafpn_1x_coco.py | 8 + configs/pafpn/metafile.yml | 38 + configs/panoptic_fpn/README.md | 50 + configs/panoptic_fpn/metafile.yml | 70 + .../panoptic_fpn_r101_fpn_1x_coco.py | 6 + .../panoptic_fpn_r101_fpn_mstrain_3x_coco.py | 6 + .../panoptic_fpn_r50_fpn_1x_coco.py | 33 + .../panoptic_fpn_r50_fpn_mstrain_3x_coco.py | 61 + configs/pascal_voc/README.md | 23 + .../faster_rcnn_r50_fpn_1x_voc0712.py | 14 + .../faster_rcnn_r50_fpn_1x_voc0712_cocofmt.py | 75 + .../retinanet_r50_fpn_1x_voc0712.py | 14 + configs/pascal_voc/ssd300_voc0712.py | 69 + configs/pascal_voc/ssd512_voc0712.py | 52 + configs/pisa/README.md | 40 + configs/pisa/metafile.yml | 110 + .../pisa/pisa_faster_rcnn_r50_fpn_1x_coco.py | 30 + ...pisa_faster_rcnn_x101_32x4d_fpn_1x_coco.py | 30 + .../pisa/pisa_mask_rcnn_r50_fpn_1x_coco.py | 30 + .../pisa_mask_rcnn_x101_32x4d_fpn_1x_coco.py | 30 + .../pisa/pisa_retinanet_r50_fpn_1x_coco.py | 7 + .../pisa_retinanet_x101_32x4d_fpn_1x_coco.py | 7 + configs/pisa/pisa_ssd300_coco.py | 8 + configs/pisa/pisa_ssd512_coco.py | 8 + configs/pix2seq/README.md | 21 + configs/pix2seq/metafile.yml | 33 + configs/pix2seq/pix2seq_r50_8x4_300e_coco.py | 156 + configs/point_rend/README.md | 23 + configs/point_rend/metafile.yml | 54 + ...oint_rend_r50_caffe_fpn_mstrain_1x_coco.py | 44 + ...oint_rend_r50_caffe_fpn_mstrain_3x_coco.py | 4 + configs/pvt/README.md | 43 + configs/pvt/metafile.yml | 136 + configs/pvt/retinanet_pvt-l_fpn_1x_coco.py | 7 + configs/pvt/retinanet_pvt-m_fpn_1x_coco.py | 6 + configs/pvt/retinanet_pvt-s_fpn_1x_coco.py | 6 + configs/pvt/retinanet_pvt-t_fpn_1x_coco.py | 16 + configs/pvt/retinanet_pvtv2-b0_fpn_1x_coco.py | 17 + configs/pvt/retinanet_pvtv2-b1_fpn_1x_coco.py | 7 + configs/pvt/retinanet_pvtv2-b2_fpn_1x_coco.py | 8 + configs/pvt/retinanet_pvtv2-b3_fpn_1x_coco.py | 8 + configs/pvt/retinanet_pvtv2-b4_fpn_1x_coco.py | 13 + configs/pvt/retinanet_pvtv2-b5_fpn_1x_coco.py | 14 + configs/queryinst/README.md | 26 + configs/queryinst/metafile.yml | 100 + ..._proposals_crop_mstrain_480-800_3x_coco.py | 7 + ...ryinst_r101_fpn_mstrain_480-800_3x_coco.py | 7 + .../queryinst/queryinst_r50_fpn_1x_coco.py | 138 + ..._proposals_crop_mstrain_480-800_3x_coco.py | 54 + ...eryinst_r50_fpn_mstrain_480-800_3x_coco.py | 23 + configs/regnet/README.md | 110 + ..._rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py | 17 + ..._rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py | 63 + ..._rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py | 17 + ...sk_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py | 17 + ..._rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py | 17 + ..._rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py | 17 + .../faster_rcnn_regnetx-3.2GF_fpn_1x_coco.py | 57 + .../faster_rcnn_regnetx-3.2GF_fpn_2x_coco.py | 3 + ..._rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py | 61 + ..._rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py | 17 + ...er_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py | 17 + ..._rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py | 17 + ..._regnetx-1.6GF_fpn_mstrain-poly_3x_coco.py | 26 + .../mask_rcnn_regnetx-12GF_fpn_1x_coco.py | 17 + .../mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py | 58 + ..._regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco.py | 7 + ..._rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py | 66 + ..._regnetx-400MF_fpn_mstrain-poly_3x_coco.py | 26 + .../mask_rcnn_regnetx-4GF_fpn_1x_coco.py | 17 + ...nn_regnetx-4GF_fpn_mstrain-poly_3x_coco.py | 26 + .../mask_rcnn_regnetx-6.4GF_fpn_1x_coco.py | 17 + ..._regnetx-800MF_fpn_mstrain-poly_3x_coco.py | 26 + .../mask_rcnn_regnetx-8GF_fpn_1x_coco.py | 17 + configs/regnet/metafile.yml | 437 +++ .../retinanet_regnetx-1.6GF_fpn_1x_coco.py | 17 + .../retinanet_regnetx-3.2GF_fpn_1x_coco.py | 59 + .../retinanet_regnetx-800MF_fpn_1x_coco.py | 17 + configs/reppoints/README.md | 54 + ...50_grid_center_fpn_gn-neck+head_1x_coco.py | 2 + .../bbox_r50_grid_fpn_gn-neck+head_1x_coco.py | 13 + configs/reppoints/metafile.yml | 181 + configs/reppoints/reppoints.png | Bin 0 -> 1198109 bytes ...nts_minmax_r50_fpn_gn-neck+head_1x_coco.py | 2 + ...01_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py | 8 + ...ts_moment_r101_fpn_gn-neck+head_2x_coco.py | 6 + .../reppoints_moment_r50_fpn_1x_coco.py | 67 + ...nts_moment_r50_fpn_gn-neck+head_1x_coco.py | 4 + ...nts_moment_r50_fpn_gn-neck+head_2x_coco.py | 3 + ...01_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py | 16 + ...ial_minmax_r50_fpn_gn-neck+head_1x_coco.py | 2 + configs/res2net/README.md | 65 + .../cascade_mask_rcnn_r2_101_fpn_20e_coco.py | 10 + .../cascade_rcnn_r2_101_fpn_20e_coco.py | 10 + .../res2net/faster_rcnn_r2_101_fpn_2x_coco.py | 10 + configs/res2net/htc_r2_101_fpn_20e_coco.py | 13 + .../res2net/mask_rcnn_r2_101_fpn_2x_coco.py | 10 + configs/res2net/metafile.yml | 94 + configs/resnest/README.md | 44 + ...pn_syncbn-backbone+head_mstrain_1x_coco.py | 7 + ...pn_syncbn-backbone+head_mstrain_1x_coco.py | 118 + ...cbn-backbone+head_mstrain-range_1x_coco.py | 7 + ...cbn-backbone+head_mstrain-range_1x_coco.py | 116 + ...cbn-backbone+head_mstrain-range_1x_coco.py | 7 + ...cbn-backbone+head_mstrain-range_1x_coco.py | 62 + ...pn_syncbn-backbone+head_mstrain_1x_coco.py | 7 + ...pn_syncbn-backbone+head_mstrain_1x_coco.py | 64 + configs/resnest/metafile.yml | 136 + configs/retinanet/README.md | 40 + configs/retinanet/metafile.yml | 261 ++ .../retinanet_r101_caffe_fpn_1x_coco.py | 7 + ...etinanet_r101_caffe_fpn_mstrain_3x_coco.py | 7 + .../retinanet/retinanet_r101_fpn_1x_coco.py | 6 + .../retinanet/retinanet_r101_fpn_2x_coco.py | 6 + ...inanet_r101_fpn_mstrain_640-800_3x_coco.py | 6 + .../retinanet_r50_caffe_fpn_1x_coco.py | 41 + ...retinanet_r50_caffe_fpn_mstrain_1x_coco.py | 46 + ...retinanet_r50_caffe_fpn_mstrain_2x_coco.py | 4 + ...retinanet_r50_caffe_fpn_mstrain_3x_coco.py | 4 + .../retinanet/retinanet_r50_fpn_1x_coco.py | 7 + .../retinanet/retinanet_r50_fpn_2x_coco.py | 4 + .../retinanet/retinanet_r50_fpn_90k_coco.py | 15 + ...tinanet_r50_fpn_mstrain_640-800_3x_coco.py | 5 + .../retinanet_x101_32x4d_fpn_1x_coco.py | 14 + .../retinanet_x101_32x4d_fpn_2x_coco.py | 14 + .../retinanet_x101_64x4d_fpn_1x_coco.py | 14 + .../retinanet_x101_64x4d_fpn_2x_coco.py | 14 + ..._x101_64x4d_fpn_mstrain_640-800_3x_coco.py | 8 + configs/rpn/README.md | 29 + configs/rpn/rpn_r101_caffe_fpn_1x_coco.py | 7 + configs/rpn/rpn_r101_fpn_1x_coco.py | 6 + configs/rpn/rpn_r101_fpn_2x_coco.py | 6 + configs/rpn/rpn_r50_caffe_c4_1x_coco.py | 38 + configs/rpn/rpn_r50_caffe_fpn_1x_coco.py | 41 + configs/rpn/rpn_r50_fpn_1x_coco.py | 18 + configs/rpn/rpn_r50_fpn_2x_coco.py | 5 + configs/rpn/rpn_x101_32x4d_fpn_1x_coco.py | 14 + configs/rpn/rpn_x101_32x4d_fpn_2x_coco.py | 14 + configs/rpn/rpn_x101_64x4d_fpn_1x_coco.py | 14 + configs/rpn/rpn_x101_64x4d_fpn_2x_coco.py | 14 + configs/sabl/README.md | 37 + configs/sabl/metafile.yml | 140 + .../sabl_cascade_rcnn_r101_fpn_1x_coco.py | 90 + .../sabl/sabl_cascade_rcnn_r50_fpn_1x_coco.py | 86 + .../sabl/sabl_faster_rcnn_r101_fpn_1x_coco.py | 38 + .../sabl/sabl_faster_rcnn_r50_fpn_1x_coco.py | 34 + .../sabl/sabl_retinanet_r101_fpn_1x_coco.py | 54 + .../sabl_retinanet_r101_fpn_gn_1x_coco.py | 56 + ...etinanet_r101_fpn_gn_2x_ms_480_960_coco.py | 73 + ...etinanet_r101_fpn_gn_2x_ms_640_800_coco.py | 73 + .../sabl/sabl_retinanet_r50_fpn_1x_coco.py | 50 + .../sabl/sabl_retinanet_r50_fpn_gn_1x_coco.py | 52 + configs/scnet/README.md | 51 + configs/scnet/metafile.yml | 116 + configs/scnet/scnet_r101_fpn_20e_coco.py | 6 + configs/scnet/scnet_r50_fpn_1x_coco.py | 136 + configs/scnet/scnet_r50_fpn_20e_coco.py | 4 + .../scnet/scnet_x101_64x4d_fpn_20e_coco.py | 15 + .../scnet_x101_64x4d_fpn_8x1_20e_coco.py | 3 + configs/scratch/README.md | 25 + ...ter_rcnn_r50_fpn_gn-all_scratch_6x_coco.py | 24 + ...ask_rcnn_r50_fpn_gn-all_scratch_6x_coco.py | 25 + configs/scratch/metafile.yml | 48 + configs/seesaw_loss/README.md | 39 + ...n_random_seesaw_loss_mstrain_2x_lvis_v1.py | 132 + ...saw_loss_normed_mask_mstrain_2x_lvis_v1.py | 5 + ...mple1e-3_seesaw_loss_mstrain_2x_lvis_v1.py | 98 + ...saw_loss_normed_mask_mstrain_2x_lvis_v1.py | 5 + ...n_random_seesaw_loss_mstrain_2x_lvis_v1.py | 6 + ...saw_loss_normed_mask_mstrain_2x_lvis_v1.py | 6 + ...mple1e-3_seesaw_loss_mstrain_2x_lvis_v1.py | 6 + ...saw_loss_normed_mask_mstrain_2x_lvis_v1.py | 6 + ...n_random_seesaw_loss_mstrain_2x_lvis_v1.py | 75 + ...saw_loss_normed_mask_mstrain_2x_lvis_v1.py | 5 + ...mple1e-3_seesaw_loss_mstrain_2x_lvis_v1.py | 41 + ...saw_loss_normed_mask_mstrain_2x_lvis_v1.py | 5 + configs/selfsup_pretrain/README.md | 95 + ...sk_rcnn_r50_fpn_mocov2-pretrain_1x_coco.py | 13 + ...rcnn_r50_fpn_mocov2-pretrain_ms-2x_coco.py | 32 + ...mask_rcnn_r50_fpn_swav-pretrain_1x_coco.py | 13 + ...k_rcnn_r50_fpn_swav-pretrain_ms-2x_coco.py | 32 + configs/solo/README.md | 42 + .../decoupled_solo_light_r50_fpn_3x_coco.py | 63 + .../solo/decoupled_solo_r50_fpn_1x_coco.py | 28 + .../solo/decoupled_solo_r50_fpn_3x_coco.py | 25 + configs/solo/metafile.yml | 115 + configs/solo/solo_r50_fpn_1x_coco.py | 53 + configs/solo/solo_r50_fpn_3x_coco.py | 28 + configs/sparse_rcnn/README.md | 28 + configs/sparse_rcnn/metafile.yml | 80 + ..._proposals_crop_mstrain_480-800_3x_coco.py | 7 + ...e_rcnn_r101_fpn_mstrain_480-800_3x_coco.py | 7 + .../sparse_rcnn_r50_fpn_1x_coco.py | 95 + ..._proposals_crop_mstrain_480-800_3x_coco.py | 52 + ...se_rcnn_r50_fpn_mstrain_480-800_3x_coco.py | 23 + configs/ssd/README.md | 52 + configs/ssd/metafile.yml | 78 + configs/ssd/ssd300_coco.py | 66 + configs/ssd/ssd512_coco.py | 79 + .../ssdlite_mobilenetv2_scratch_600e_coco.py | 145 + configs/strong_baselines/README.md | 18 + ..._fpn_syncbn-all_rpn-2conv_lsj_100e_coco.py | 80 + ...syncbn-all_rpn-2conv_lsj_100e_fp16_coco.py | 2 + ..._fpn_syncbn-all_rpn-2conv_lsj_400e_coco.py | 6 + ..._fpn_syncbn-all_rpn-2conv_lsj_100e_coco.py | 22 + ...syncbn-all_rpn-2conv_lsj_100e_fp16_coco.py | 3 + ...0_fpn_syncbn-all_rpn-2conv_lsj_50e_coco.py | 5 + configs/swin/README.md | 25 + ...n_swin-s-p4-w7_fpn_fp16_ms-crop-3x_coco.py | 6 + .../mask_rcnn_swin-t-p4-w7_fpn_1x_coco.py | 42 + ...n_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco.py | 3 + ...k_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco.py | 91 + configs/swin/metafile.yml | 85 + configs/tridentnet/README.md | 28 + configs/tridentnet/metafile.yml | 55 + .../tridentnet_r50_caffe_1x_coco.py | 55 + .../tridentnet_r50_caffe_mstrain_1x_coco.py | 22 + .../tridentnet_r50_caffe_mstrain_3x_coco.py | 4 + configs/vfnet/README.md | 43 + configs/vfnet/metafile.yml | 116 + configs/vfnet/vfnet_r101_fpn_1x_coco.py | 6 + configs/vfnet/vfnet_r101_fpn_2x_coco.py | 8 + ...t_r101_fpn_mdconv_c3-c5_mstrain_2x_coco.py | 15 + .../vfnet/vfnet_r101_fpn_mstrain_2x_coco.py | 6 + ...r2_101_fpn_mdconv_c3-c5_mstrain_2x_coco.py | 18 + .../vfnet/vfnet_r2_101_fpn_mstrain_2x_coco.py | 16 + configs/vfnet/vfnet_r50_fpn_1x_coco.py | 107 + ...et_r50_fpn_mdconv_c3-c5_mstrain_2x_coco.py | 6 + .../vfnet/vfnet_r50_fpn_mstrain_2x_coco.py | 39 + ..._32x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.py | 17 + .../vfnet_x101_32x4d_fpn_mstrain_2x_coco.py | 15 + ..._64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.py | 17 + .../vfnet_x101_64x4d_fpn_mstrain_2x_coco.py | 15 + configs/wider_face/README.md | 43 + configs/wider_face/ssd300_wider_face.py | 18 + configs/yolact/README.md | 71 + configs/yolact/metafile.yml | 78 + configs/yolact/yolact_r101_1x8_coco.py | 7 + configs/yolact/yolact_r50_1x8_coco.py | 160 + configs/yolact/yolact_r50_8x8_coco.py | 11 + configs/yolo/README.md | 45 + configs/yolo/metafile.yml | 124 + configs/yolo/yolov3_d53_320_273e_coco.py | 42 + .../yolov3_d53_fp16_mstrain-608_273e_coco.py | 3 + .../yolo/yolov3_d53_mstrain-416_273e_coco.py | 42 + .../yolo/yolov3_d53_mstrain-608_273e_coco.py | 127 + .../yolo/yolov3_mobilenetv2_320_300e_coco.py | 53 + ...olov3_mobilenetv2_mstrain-416_300e_coco.py | 137 + configs/yolof/README.md | 25 + configs/yolof/metafile.yml | 32 + configs/yolof/yolof_r50_c5_8x8_1x_coco.py | 105 + .../yolof/yolof_r50_c5_8x8_iter-1x_coco.py | 14 + configs/yolox/README.md | 25 + configs/yolox/metafile.yml | 33 + configs/yolox/yolox_l_8x8_300e_coco.py | 8 + configs/yolox/yolox_m_8x8_300e_coco.py | 8 + configs/yolox/yolox_nano_8x8_300e_coco.py | 11 + configs/yolox/yolox_s_8x8_300e_coco.py | 142 + configs/yolox/yolox_tiny_8x8_300e_coco.py | 78 + configs/yolox/yolox_x_8x8_300e_coco.py | 8 + demo/MMDet_Tutorial.ipynb | 1656 +++++++++ demo/create_result_gif.py | 163 + demo/demo.jpg | Bin 0 -> 259865 bytes demo/demo.mp4 | Bin 0 -> 297688 bytes demo/image_demo.py | 50 + demo/inference_demo.ipynb | 100 + demo/video_demo.py | 61 + demo/webcam_demo.py | 47 + docker/Dockerfile | 24 + docker/serve/Dockerfile | 49 + docker/serve/config.properties | 5 + docker/serve/entrypoint.sh | 12 + docs/1_exist_data_model.md | 602 ++++ docs/2_new_data_model.md | 263 ++ docs/3_exist_data_new_model.md | 275 ++ docs/Makefile | 20 + docs/_static/css/readthedocs.css | 6 + docs/_static/image/mmdet-logo.png | Bin 0 -> 32181 bytes docs/api.rst | 108 + docs/changelog.md | 1201 +++++++ docs/compatibility.md | 143 + docs/conf.py | 178 + docs/conventions.md | 78 + docs/faq.md | 90 + docs/get_started.md | 246 ++ docs/index.rst | 57 + docs/make.bat | 35 + docs/model_zoo.md | 341 ++ docs/projects.md | 58 + docs/robustness_benchmarking.md | 110 + docs/stat.py | 64 + docs/switch_language.md | 3 + docs/tutorials/config.md | 544 +++ docs/tutorials/customize_dataset.md | 542 +++ docs/tutorials/customize_losses.md | 126 + docs/tutorials/customize_models.md | 363 ++ docs/tutorials/customize_runtime.md | 323 ++ docs/tutorials/data_pipeline.md | 199 ++ docs/tutorials/finetune.md | 89 + docs/tutorials/index.rst | 14 + docs/tutorials/init_cfg.md | 160 + docs/tutorials/onnx2tensorrt.md | 104 + docs/tutorials/pytorch2onnx.md | 332 ++ docs/useful_tools.md | 492 +++ docs_zh-CN/1_exist_data_model.md | 569 ++++ docs_zh-CN/2_new_data_model.md | 264 ++ docs_zh-CN/3_exist_data_new_model.md | 274 ++ docs_zh-CN/Makefile | 20 + docs_zh-CN/_static/css/readthedocs.css | 6 + docs_zh-CN/_static/image/mmdet-logo.png | Bin 0 -> 32181 bytes docs_zh-CN/api.rst | 108 + docs_zh-CN/compatibility.md | 144 + docs_zh-CN/conf.py | 180 + docs_zh-CN/conventions.md | 74 + docs_zh-CN/faq.md | 91 + docs_zh-CN/get_started.md | 247 ++ docs_zh-CN/index.rst | 54 + docs_zh-CN/make.bat | 35 + docs_zh-CN/model_zoo.md | 335 ++ docs_zh-CN/projects.md | 48 + docs_zh-CN/robustness_benchmarking.md | 109 + docs_zh-CN/stat.py | 64 + docs_zh-CN/switch_language.md | 3 + docs_zh-CN/tutorials/config.md | 522 +++ docs_zh-CN/tutorials/customize_dataset.md | 457 +++ docs_zh-CN/tutorials/customize_losses.md | 125 + docs_zh-CN/tutorials/customize_models.md | 1 + docs_zh-CN/tutorials/customize_runtime.md | 1 + docs_zh-CN/tutorials/data_pipeline.md | 190 ++ docs_zh-CN/tutorials/finetune.md | 1 + docs_zh-CN/tutorials/index.rst | 12 + docs_zh-CN/tutorials/onnx2tensorrt.md | 1 + docs_zh-CN/tutorials/pytorch2onnx.md | 1 + docs_zh-CN/useful_tools.md | 1 + mmdet/__init__.py | 29 + mmdet/apis/__init__.py | 12 + mmdet/apis/inference.py | 241 ++ mmdet/apis/test.py | 190 ++ mmdet/apis/train.py | 213 ++ mmdet/core/__init__.py | 9 + mmdet/core/anchor/__init__.py | 14 + mmdet/core/anchor/anchor_generator.py | 866 +++++ mmdet/core/anchor/builder.py | 19 + mmdet/core/anchor/point_generator.py | 263 ++ mmdet/core/anchor/utils.py | 72 + mmdet/core/bbox/__init__.py | 28 + mmdet/core/bbox/assigners/__init__.py | 19 + .../bbox/assigners/approx_max_iou_assigner.py | 146 + mmdet/core/bbox/assigners/assign_result.py | 205 ++ mmdet/core/bbox/assigners/atss_assigner.py | 179 + mmdet/core/bbox/assigners/base_assigner.py | 10 + .../bbox/assigners/center_region_assigner.py | 336 ++ mmdet/core/bbox/assigners/grid_assigner.py | 156 + .../core/bbox/assigners/hungarian_assigner.py | 146 + mmdet/core/bbox/assigners/max_iou_assigner.py | 213 ++ mmdet/core/bbox/assigners/point_assigner.py | 134 + mmdet/core/bbox/assigners/region_assigner.py | 222 ++ mmdet/core/bbox/assigners/sim_ota_assigner.py | 254 ++ mmdet/core/bbox/assigners/uniform_assigner.py | 135 + mmdet/core/bbox/builder.py | 21 + mmdet/core/bbox/coder/__init__.py | 15 + mmdet/core/bbox/coder/base_bbox_coder.py | 18 + mmdet/core/bbox/coder/bucketing_bbox_coder.py | 351 ++ .../core/bbox/coder/delta_xywh_bbox_coder.py | 392 +++ .../bbox/coder/distance_point_bbox_coder.py | 62 + .../coder/legacy_delta_xywh_bbox_coder.py | 216 ++ mmdet/core/bbox/coder/pseudo_bbox_coder.py | 19 + mmdet/core/bbox/coder/tblr_bbox_coder.py | 206 ++ mmdet/core/bbox/coder/yolo_bbox_coder.py | 83 + mmdet/core/bbox/demodata.py | 42 + mmdet/core/bbox/iou_calculators/__init__.py | 5 + mmdet/core/bbox/iou_calculators/builder.py | 9 + .../bbox/iou_calculators/iou2d_calculator.py | 261 ++ mmdet/core/bbox/match_costs/__init__.py | 8 + mmdet/core/bbox/match_costs/builder.py | 9 + mmdet/core/bbox/match_costs/match_cost.py | 185 ++ mmdet/core/bbox/samplers/__init__.py | 16 + mmdet/core/bbox/samplers/base_sampler.py | 102 + mmdet/core/bbox/samplers/combined_sampler.py | 21 + .../samplers/instance_balanced_pos_sampler.py | 56 + .../bbox/samplers/iou_balanced_neg_sampler.py | 158 + mmdet/core/bbox/samplers/ohem_sampler.py | 108 + mmdet/core/bbox/samplers/pseudo_sampler.py | 42 + mmdet/core/bbox/samplers/random_sampler.py | 82 + mmdet/core/bbox/samplers/sampling_result.py | 153 + mmdet/core/bbox/samplers/score_hlr_sampler.py | 265 ++ mmdet/core/bbox/transforms.py | 254 ++ mmdet/core/data_structures/__init__.py | 5 + mmdet/core/data_structures/general_data.py | 316 ++ mmdet/core/data_structures/instance_data.py | 188 ++ mmdet/core/evaluation/__init__.py | 16 + mmdet/core/evaluation/bbox_overlaps.py | 65 + mmdet/core/evaluation/class_names.py | 117 + mmdet/core/evaluation/eval_hooks.py | 61 + mmdet/core/evaluation/mean_ap.py | 509 +++ mmdet/core/evaluation/recall.py | 197 ++ mmdet/core/export/__init__.py | 12 + mmdet/core/export/model_wrappers.py | 183 + mmdet/core/export/onnx_helper.py | 223 ++ mmdet/core/export/pytorch2onnx.py | 159 + mmdet/core/hook/__init__.py | 14 + mmdet/core/hook/checkloss_hook.py | 24 + mmdet/core/hook/ema.py | 130 + mmdet/core/hook/pix2seq_lrupdater_hook.py | 30 + mmdet/core/hook/sync_norm_hook.py | 50 + mmdet/core/hook/sync_random_size_hook.py | 72 + mmdet/core/hook/yolox_lrupdater_hook.py | 67 + mmdet/core/hook/yolox_mode_switch_hook.py | 38 + mmdet/core/mask/__init__.py | 9 + mmdet/core/mask/mask_target.py | 127 + mmdet/core/mask/structures.py | 1071 ++++++ mmdet/core/mask/utils.py | 64 + mmdet/core/post_processing/__init__.py | 10 + mmdet/core/post_processing/bbox_nms.py | 171 + mmdet/core/post_processing/matrix_nms.py | 121 + mmdet/core/post_processing/merge_augs.py | 154 + mmdet/core/utils/__init__.py | 13 + mmdet/core/utils/dist_utils.py | 151 + mmdet/core/utils/misc.py | 208 ++ mmdet/core/visualization/__init__.py | 5 + mmdet/core/visualization/image.py | 309 ++ mmdet/datasets/__init__.py | 28 + mmdet/datasets/api_wrappers/__init__.py | 4 + mmdet/datasets/api_wrappers/coco_api.py | 47 + mmdet/datasets/builder.py | 185 ++ mmdet/datasets/cityscapes.py | 335 ++ mmdet/datasets/coco.py | 578 ++++ mmdet/datasets/coco_panoptic.py | 544 +++ mmdet/datasets/coco_pix2seq.py | 114 + mmdet/datasets/custom.py | 368 ++ mmdet/datasets/dataset_wrappers.py | 394 +++ mmdet/datasets/deepfashion.py | 11 + mmdet/datasets/lvis.py | 738 +++++ mmdet/datasets/pipelines/__init__.py | 28 + mmdet/datasets/pipelines/auto_augment.py | 894 +++++ mmdet/datasets/pipelines/compose.py | 52 + mmdet/datasets/pipelines/formating.py | 365 ++ mmdet/datasets/pipelines/instaboost.py | 118 + mmdet/datasets/pipelines/loading.py | 573 ++++ mmdet/datasets/pipelines/test_time_aug.py | 121 + mmdet/datasets/pipelines/transforms.py | 2945 +++++++++++++++++ mmdet/datasets/samplers/__init__.py | 9 + .../datasets/samplers/distributed_sampler.py | 40 + mmdet/datasets/samplers/group_sampler.py | 148 + mmdet/datasets/samplers/infinite_sampler.py | 171 + mmdet/datasets/utils.py | 168 + mmdet/datasets/voc.py | 105 + mmdet/datasets/wider_face.py | 52 + mmdet/datasets/xml_style.py | 178 + mmdet/models/__init__.py | 19 + mmdet/models/backbones/__init__.py | 24 + mmdet/models/backbones/csp_darknet.py | 284 ++ mmdet/models/backbones/darknet.py | 213 ++ mmdet/models/backbones/detectors_resnet.py | 353 ++ mmdet/models/backbones/detectors_resnext.py | 123 + mmdet/models/backbones/hourglass.py | 222 ++ mmdet/models/backbones/hrnet.py | 589 ++++ mmdet/models/backbones/mobilenet_v2.py | 197 ++ mmdet/models/backbones/pvt.py | 593 ++++ mmdet/models/backbones/regnet.py | 356 ++ mmdet/models/backbones/res2net.py | 327 ++ mmdet/models/backbones/resnest.py | 322 ++ mmdet/models/backbones/resnet.py | 671 ++++ mmdet/models/backbones/resnext.py | 154 + mmdet/models/backbones/ssd_vgg.py | 128 + mmdet/models/backbones/swin.py | 763 +++++ mmdet/models/backbones/trident_resnet.py | 298 ++ mmdet/models/builder.py | 59 + mmdet/models/dense_heads/__init__.py | 52 + mmdet/models/dense_heads/anchor_free_head.py | 350 ++ mmdet/models/dense_heads/anchor_head.py | 542 +++ mmdet/models/dense_heads/atss_head.py | 492 +++ mmdet/models/dense_heads/autoassign_head.py | 524 +++ mmdet/models/dense_heads/base_dense_head.py | 517 +++ mmdet/models/dense_heads/base_mask_head.py | 116 + mmdet/models/dense_heads/cascade_rpn_head.py | 801 +++++ mmdet/models/dense_heads/centernet_head.py | 412 +++ mmdet/models/dense_heads/centripetal_head.py | 427 +++ mmdet/models/dense_heads/corner_head.py | 1083 ++++++ .../dense_heads/deformable_detr_head.py | 318 ++ mmdet/models/dense_heads/dense_test_mixins.py | 206 ++ mmdet/models/dense_heads/detr_head.py | 822 +++++ .../models/dense_heads/embedding_rpn_head.py | 116 + mmdet/models/dense_heads/fcos_head.py | 453 +++ mmdet/models/dense_heads/fovea_head.py | 385 +++ .../dense_heads/free_anchor_retina_head.py | 271 ++ mmdet/models/dense_heads/fsaf_head.py | 433 +++ mmdet/models/dense_heads/ga_retina_head.py | 113 + mmdet/models/dense_heads/ga_rpn_head.py | 177 + mmdet/models/dense_heads/gfl_head.py | 648 ++++ .../models/dense_heads/guided_anchor_head.py | 868 +++++ mmdet/models/dense_heads/ld_head.py | 261 ++ mmdet/models/dense_heads/nasfcos_head.py | 80 + mmdet/models/dense_heads/paa_head.py | 756 +++++ .../models/dense_heads/pisa_retinanet_head.py | 155 + mmdet/models/dense_heads/pisa_ssd_head.py | 140 + mmdet/models/dense_heads/pix2seq_head.py | 430 +++ mmdet/models/dense_heads/reppoints_head.py | 764 +++++ mmdet/models/dense_heads/retina_head.py | 115 + mmdet/models/dense_heads/retina_sepbn_head.py | 118 + mmdet/models/dense_heads/rpn_head.py | 266 ++ mmdet/models/dense_heads/sabl_retina_head.py | 630 ++++ mmdet/models/dense_heads/solo_head.py | 1177 +++++++ mmdet/models/dense_heads/ssd_head.py | 357 ++ mmdet/models/dense_heads/vfnet_head.py | 740 +++++ mmdet/models/dense_heads/yolact_head.py | 1018 ++++++ mmdet/models/dense_heads/yolo_head.py | 619 ++++ mmdet/models/dense_heads/yolof_head.py | 416 +++ mmdet/models/dense_heads/yolox_head.py | 482 +++ mmdet/models/detectors/__init__.py | 51 + mmdet/models/detectors/atss.py | 19 + mmdet/models/detectors/autoassign.py | 19 + mmdet/models/detectors/base.py | 349 ++ mmdet/models/detectors/cascade_rcnn.py | 49 + mmdet/models/detectors/centernet.py | 111 + mmdet/models/detectors/cornernet.py | 97 + mmdet/models/detectors/deformable_detr.py | 10 + mmdet/models/detectors/detr.py | 116 + mmdet/models/detectors/fast_rcnn.py | 55 + mmdet/models/detectors/faster_rcnn.py | 27 + mmdet/models/detectors/fcos.py | 19 + mmdet/models/detectors/fovea.py | 19 + mmdet/models/detectors/fsaf.py | 19 + mmdet/models/detectors/gfl.py | 18 + mmdet/models/detectors/grid_rcnn.py | 32 + mmdet/models/detectors/htc.py | 16 + mmdet/models/detectors/kd_one_stage.py | 101 + mmdet/models/detectors/mask_rcnn.py | 27 + mmdet/models/detectors/mask_scoring_rcnn.py | 30 + mmdet/models/detectors/nasfcos.py | 22 + mmdet/models/detectors/paa.py | 19 + mmdet/models/detectors/panoptic_fpn.py | 34 + .../detectors/panoptic_two_stage_segmentor.py | 203 ++ mmdet/models/detectors/pix2seq.py | 45 + mmdet/models/detectors/point_rend.py | 32 + mmdet/models/detectors/queryinst.py | 27 + mmdet/models/detectors/reppoints_detector.py | 24 + mmdet/models/detectors/retinanet.py | 19 + mmdet/models/detectors/rpn.py | 159 + mmdet/models/detectors/scnet.py | 11 + mmdet/models/detectors/single_stage.py | 171 + .../detectors/single_stage_instance_seg.py | 363 ++ mmdet/models/detectors/solo.py | 29 + mmdet/models/detectors/sparse_rcnn.py | 111 + mmdet/models/detectors/trident_faster_rcnn.py | 70 + mmdet/models/detectors/two_stage.py | 211 ++ mmdet/models/detectors/vfnet.py | 20 + mmdet/models/detectors/yolact.py | 120 + mmdet/models/detectors/yolo.py | 42 + mmdet/models/detectors/yolof.py | 19 + mmdet/models/detectors/yolox.py | 20 + mmdet/models/losses/__init__.py | 32 + mmdet/models/losses/accuracy.py | 79 + mmdet/models/losses/ae_loss.py | 103 + mmdet/models/losses/balanced_l1_loss.py | 124 + mmdet/models/losses/cross_entropy_loss.py | 251 ++ mmdet/models/losses/dice_loss.py | 123 + mmdet/models/losses/focal_loss.py | 182 + mmdet/models/losses/gaussian_focal_loss.py | 92 + mmdet/models/losses/gfocal_loss.py | 189 ++ mmdet/models/losses/ghm_loss.py | 213 ++ mmdet/models/losses/iou_loss.py | 474 +++ mmdet/models/losses/kd_loss.py | 88 + mmdet/models/losses/mse_loss.py | 57 + mmdet/models/losses/pisa_loss.py | 184 + mmdet/models/losses/seesaw_loss.py | 262 ++ mmdet/models/losses/smooth_l1_loss.py | 146 + mmdet/models/losses/utils.py | 101 + mmdet/models/losses/varifocal_loss.py | 134 + mmdet/models/necks/__init__.py | 22 + mmdet/models/necks/bfp.py | 102 + mmdet/models/necks/channel_mapper.py | 100 + mmdet/models/necks/ct_resnet_neck.py | 94 + mmdet/models/necks/dilated_encoder.py | 108 + mmdet/models/necks/fpg.py | 406 +++ mmdet/models/necks/fpn.py | 203 ++ mmdet/models/necks/fpn_carafe.py | 275 ++ mmdet/models/necks/hrfpn.py | 100 + mmdet/models/necks/nas_fpn.py | 158 + mmdet/models/necks/nasfcos_fpn.py | 169 + mmdet/models/necks/pafpn.py | 158 + mmdet/models/necks/rfp.py | 135 + mmdet/models/necks/ssd_neck.py | 129 + mmdet/models/necks/yolo_neck.py | 140 + mmdet/models/necks/yolox_pafpn.py | 156 + mmdet/models/plugins/__init__.py | 4 + mmdet/models/plugins/dropblock.py | 85 + mmdet/models/roi_heads/__init__.py | 37 + mmdet/models/roi_heads/base_roi_head.py | 103 + mmdet/models/roi_heads/bbox_heads/__init__.py | 14 + .../models/roi_heads/bbox_heads/bbox_head.py | 593 ++++ .../roi_heads/bbox_heads/convfc_bbox_head.py | 228 ++ mmdet/models/roi_heads/bbox_heads/dii_head.py | 425 +++ .../roi_heads/bbox_heads/double_bbox_head.py | 178 + .../models/roi_heads/bbox_heads/sabl_head.py | 584 ++++ .../roi_heads/bbox_heads/scnet_bbox_head.py | 77 + mmdet/models/roi_heads/cascade_roi_head.py | 631 ++++ mmdet/models/roi_heads/double_roi_head.py | 34 + mmdet/models/roi_heads/dynamic_roi_head.py | 155 + mmdet/models/roi_heads/grid_roi_head.py | 170 + mmdet/models/roi_heads/htc_roi_head.py | 628 ++++ mmdet/models/roi_heads/mask_heads/__init__.py | 20 + .../roi_heads/mask_heads/coarse_mask_head.py | 100 + .../roi_heads/mask_heads/dynamic_mask_head.py | 146 + .../roi_heads/mask_heads/fcn_mask_head.py | 412 +++ .../mask_heads/feature_relay_head.py | 53 + .../mask_heads/fused_semantic_head.py | 117 + .../mask_heads/global_context_head.py | 101 + .../models/roi_heads/mask_heads/grid_head.py | 363 ++ .../roi_heads/mask_heads/htc_mask_head.py | 39 + .../roi_heads/mask_heads/mask_point_head.py | 306 ++ .../roi_heads/mask_heads/maskiou_head.py | 183 + .../roi_heads/mask_heads/scnet_mask_head.py | 28 + .../mask_heads/scnet_semantic_head.py | 28 + .../models/roi_heads/mask_scoring_roi_head.py | 113 + mmdet/models/roi_heads/pisa_roi_head.py | 160 + mmdet/models/roi_heads/point_rend_roi_head.py | 393 +++ .../roi_heads/roi_extractors/__init__.py | 6 + .../roi_extractors/base_roi_extractor.py | 88 + .../roi_extractors/generic_roi_extractor.py | 84 + .../single_level_roi_extractor.py | 115 + mmdet/models/roi_heads/scnet_roi_head.py | 605 ++++ .../models/roi_heads/shared_heads/__init__.py | 4 + .../roi_heads/shared_heads/res_layer.py | 80 + mmdet/models/roi_heads/sparse_roi_head.py | 424 +++ mmdet/models/roi_heads/standard_roi_head.py | 397 +++ mmdet/models/roi_heads/test_mixins.py | 311 ++ mmdet/models/roi_heads/trident_roi_head.py | 120 + mmdet/models/seg_heads/__init__.py | 3 + mmdet/models/seg_heads/base_semantic_head.py | 86 + mmdet/models/seg_heads/panoptic_fpn_head.py | 155 + .../panoptic_fusion_heads/__init__.py | 4 + .../base_panoptic_fusion_head.py | 48 + .../heuristic_fusion_head.py | 126 + mmdet/models/utils/__init__.py | 31 + mmdet/models/utils/brick_wrappers.py | 50 + mmdet/models/utils/builder.py | 47 + mmdet/models/utils/ckpt_convert.py | 137 + mmdet/models/utils/conv_upsample.py | 67 + mmdet/models/utils/csp_layer.py | 150 + mmdet/models/utils/gaussian_target.py | 268 ++ mmdet/models/utils/inverted_residual.py | 124 + mmdet/models/utils/make_divisible.py | 28 + mmdet/models/utils/misc.py | 42 + mmdet/models/utils/normed_predictor.py | 88 + mmdet/models/utils/positional_encoding.py | 163 + mmdet/models/utils/res_layer.py | 190 ++ mmdet/models/utils/se_layer.py | 58 + mmdet/models/utils/transformer.py | 2036 ++++++++++++ mmdet/models/utils/transformer_old.py | 2104 ++++++++++++ mmdet/utils/__init__.py | 5 + mmdet/utils/collect_env.py | 17 + mmdet/utils/contextmanagers.py | 122 + mmdet/utils/logger.py | 20 + mmdet/utils/profiling.py | 40 + mmdet/utils/util_mixins.py | 105 + mmdet/utils/util_random.py | 34 + mmdet/version.py | 19 + model-index.yml | 62 + pytest.ini | 7 + requirements.txt | 4 + requirements/build.txt | 3 + requirements/docs.txt | 7 + requirements/mminstall.txt | 1 + requirements/optional.txt | 4 + requirements/readthedocs.txt | 3 + requirements/runtime.txt | 6 + requirements/tests.txt | 14 + resources/000000212559.jpg | Bin 0 -> 173895 bytes resources/000000255664.jpg | Bin 0 -> 128199 bytes resources/000000289393.jpg | Bin 0 -> 106371 bytes resources/007114.jpg | Bin 0 -> 72599 bytes resources/007351.jpg | Bin 0 -> 77145 bytes resources/008322.jpg | Bin 0 -> 61937 bytes resources/coco_test_12510.jpg | Bin 0 -> 183096 bytes resources/corruptions_sev_3.png | Bin 0 -> 1401893 bytes resources/data_pipeline.png | Bin 0 -> 84111 bytes resources/loss_curve.png | Bin 0 -> 37484 bytes resources/mmdet-logo.png | Bin 0 -> 32836 bytes resources/qq_group_qrcode.jpg | Bin 0 -> 207584 bytes resources/vqvae-framework.png | Bin 0 -> 220943 bytes resources/zhihu_qrcode.jpg | Bin 0 -> 397245 bytes setup.cfg | 18 + setup.py | 217 ++ .../test_datasets/test_coco_dataset.py | 58 + tests/test_data/test_datasets/test_common.py | 364 ++ .../test_datasets/test_custom_dataset.py | 139 + .../test_datasets/test_dataset_wrapper.py | 142 + .../test_datasets/test_panoptic_dataset.py | 307 ++ .../test_datasets/test_xml_dataset.py | 23 + .../test_pipelines/test_formatting.py | 24 + .../test_data/test_pipelines/test_loading.py | 91 + .../test_data/test_pipelines/test_sampler.py | 329 ++ .../test_pipelines/test_transform/__init__.py | 4 + .../test_transform/test_img_augment.py | 174 + .../test_transform/test_models_aug_test.py | 131 + .../test_transform/test_rotate.py | 172 + .../test_transform/test_shear.py | 164 + .../test_transform/test_transform.py | 967 ++++++ .../test_transform/test_translate.py | 516 +++ .../test_pipelines/test_transform/utils.py | 78 + tests/test_data/test_utils.py | 80 + tests/test_downstream/test_mmtrack.py | 230 ++ tests/test_metrics/test_box_overlap.py | 134 + tests/test_metrics/test_losses.py | 241 ++ tests/test_metrics/test_mean_ap.py | 87 + tests/test_metrics/test_recall.py | 46 + tests/test_models/test_backbones/__init__.py | 4 + .../test_backbones/test_csp_darknet.py | 116 + .../test_backbones/test_detectors_resnet.py | 47 + .../test_backbones/test_hourglass.py | 49 + .../test_models/test_backbones/test_hrnet.py | 111 + .../test_backbones/test_mobilenet_v2.py | 173 + tests/test_models/test_backbones/test_pvt.py | 103 + .../test_models/test_backbones/test_regnet.py | 58 + .../test_models/test_backbones/test_renext.py | 105 + .../test_backbones/test_res2net.py | 62 + .../test_backbones/test_resnest.py | 47 + .../test_models/test_backbones/test_resnet.py | 632 ++++ tests/test_models/test_backbones/test_swin.py | 82 + .../test_backbones/test_trident_resnet.py | 180 + tests/test_models/test_backbones/utils.py | 32 + .../test_dense_heads/test_anchor_head.py | 70 + .../test_dense_heads/test_atss_head.py | 77 + .../test_dense_heads/test_autoassign_head.py | 91 + .../test_dense_heads/test_centernet_head.py | 107 + .../test_dense_heads/test_corner_head.py | 167 + .../test_dense_heads/test_dense_heads_attr.py | 44 + .../test_dense_heads/test_detr_head.py | 104 + .../test_dense_heads/test_fcos_head.py | 64 + .../test_dense_heads/test_fsaf_head.py | 82 + .../test_dense_heads/test_ga_anchor_head.py | 91 + .../test_dense_heads/test_gfl_head.py | 74 + .../test_dense_heads/test_ld_head.py | 121 + .../test_dense_heads/test_paa_head.py | 135 + .../test_dense_heads/test_pisa_head.py | 245 ++ .../test_dense_heads/test_sabl_retina_head.py | 76 + .../test_dense_heads/test_solo_head.py | 284 ++ .../test_dense_heads/test_vfnet_head.py | 63 + .../test_dense_heads/test_yolact_head.py | 137 + .../test_dense_heads/test_yolof_head.py | 76 + .../test_dense_heads/test_yolox_head.py | 72 + tests/test_models/test_forward.py | 672 ++++ tests/test_models/test_loss.py | 215 ++ tests/test_models/test_loss_compatibility.py | 201 ++ tests/test_models/test_necks.py | 406 +++ tests/test_models/test_plugins.py | 29 + tests/test_models/test_roi_heads/__init__.py | 4 + .../test_roi_heads/test_bbox_head.py | 251 ++ .../test_roi_heads/test_mask_head.py | 96 + .../test_roi_heads/test_roi_extractor.py | 114 + .../test_roi_heads/test_sabl_bbox_head.py | 77 + tests/test_models/test_roi_heads/utils.py | 38 + .../test_utils/test_brick_wrappers.py | 93 + .../test_utils/test_conv_upsample.py | 24 + .../test_utils/test_inverted_residual.py | 76 + .../test_models/test_utils/test_model_misc.py | 27 + .../test_utils/test_position_encoding.py | 39 + tests/test_models/test_utils/test_se_layer.py | 24 + .../test_utils/test_transformer.py | 569 ++++ tests/test_onnx/__init__.py | 4 + tests/test_onnx/test_head.py | 453 +++ tests/test_onnx/test_neck.py | 163 + tests/test_onnx/utils.py | 137 + tests/test_runtime/async_benchmark.py | 102 + tests/test_runtime/test_async.py | 83 + tests/test_runtime/test_config.py | 371 +++ tests/test_runtime/test_eval_hook.py | 252 ++ tests/test_runtime/test_fp16.py | 301 ++ tests/test_utils/test_anchor.py | 767 +++++ tests/test_utils/test_assigner.py | 498 +++ tests/test_utils/test_coder.py | 127 + tests/test_utils/test_general_data.py | 591 ++++ tests/test_utils/test_hook.py | 325 ++ tests/test_utils/test_masks.py | 689 ++++ tests/test_utils/test_misc.py | 162 + tests/test_utils/test_nms.py | 75 + tests/test_utils/test_version.py | 16 + tests/test_utils/test_visualization.py | 127 + tools/analysis_tools/analyze_logs.py | 180 + tools/analysis_tools/analyze_results.py | 199 ++ tools/analysis_tools/benchmark.py | 187 ++ tools/analysis_tools/coco_error_analysis.py | 339 ++ tools/analysis_tools/confusion_matrix.py | 261 ++ tools/analysis_tools/eval_metric.py | 80 + tools/analysis_tools/get_flops.py | 97 + tools/analysis_tools/optimize_anchors.py | 370 +++ tools/analysis_tools/robustness_eval.py | 251 ++ tools/analysis_tools/test_robustness.py | 387 +++ tools/dataset_converters/cityscapes.py | 152 + tools/dataset_converters/images2coco.py | 101 + tools/dataset_converters/pascal_voc.py | 237 ++ tools/deployment/mmdet2torchserve.py | 110 + tools/deployment/mmdet_handler.py | 71 + tools/deployment/onnx2tensorrt.py | 254 ++ tools/deployment/pytorch2onnx.py | 345 ++ tools/deployment/test.py | 143 + tools/deployment/test_torchserver.py | 74 + tools/dist_test.sh | 10 + tools/dist_train.sh | 8 + tools/misc/browse_dataset.py | 107 + tools/misc/print_config.py | 51 + tools/model_converters/detectron2pytorch.py | 83 + tools/model_converters/publish_model.py | 43 + tools/model_converters/regnet2mmdet.py | 90 + tools/model_converters/selfsup2mmdet.py | 42 + .../model_converters/upgrade_model_version.py | 210 ++ tools/model_converters/upgrade_ssd_version.py | 58 + tools/slurm_test.sh | 24 + tools/slurm_train.sh | 24 + tools/test.py | 234 ++ tools/train.py | 185 ++ 1273 files changed, 133851 insertions(+) create mode 100644 CITATION.cff create mode 100644 LICENSE create mode 100644 MANIFEST.in create mode 100644 README.md create mode 100644 README_zh-CN.md create mode 100644 configs/_base_/datasets/cityscapes_detection.py create mode 100644 configs/_base_/datasets/cityscapes_instance.py create mode 100644 configs/_base_/datasets/coco_detection.py create mode 100644 configs/_base_/datasets/coco_detection_pix2seq.py create mode 100644 configs/_base_/datasets/coco_detection_vqvae.py create mode 100644 configs/_base_/datasets/coco_instance.py create mode 100644 configs/_base_/datasets/coco_instance_semantic.py create mode 100644 configs/_base_/datasets/coco_panoptic.py create mode 100644 configs/_base_/datasets/deepfashion.py create mode 100644 configs/_base_/datasets/lvis_v0.5_instance.py create mode 100644 configs/_base_/datasets/lvis_v1_instance.py create mode 100644 configs/_base_/datasets/voc0712.py create mode 100644 configs/_base_/datasets/voc0712_pix2seq.py create mode 100644 configs/_base_/datasets/wider_face.py create mode 100644 configs/_base_/default_runtime.py create mode 100644 configs/_base_/models/cascade_mask_rcnn_r50_fpn.py create mode 100644 configs/_base_/models/cascade_rcnn_r50_fpn.py create mode 100644 configs/_base_/models/fast_rcnn_r50_fpn.py create mode 100644 configs/_base_/models/faster_rcnn_r50_caffe_c4.py create mode 100644 configs/_base_/models/faster_rcnn_r50_caffe_dc5.py create mode 100644 configs/_base_/models/faster_rcnn_r50_fpn.py create mode 100644 configs/_base_/models/mask_rcnn_r50_caffe_c4.py create mode 100644 configs/_base_/models/mask_rcnn_r50_fpn.py create mode 100644 configs/_base_/models/retinanet_r50_fpn.py create mode 100644 configs/_base_/models/rpn_r50_caffe_c4.py create mode 100644 configs/_base_/models/rpn_r50_fpn.py create mode 100644 configs/_base_/models/ssd300.py create mode 100644 configs/_base_/schedules/schedule_1x.py create mode 100644 configs/_base_/schedules/schedule_20e.py create mode 100644 configs/_base_/schedules/schedule_2x.py create mode 100644 configs/albu_example/README.md create mode 100644 configs/albu_example/mask_rcnn_r50_fpn_albu_1x_coco.py create mode 100644 configs/atss/README.md create mode 100644 configs/atss/atss_r101_fpn_1x_coco.py create mode 100644 configs/atss/atss_r50_fpn_1x_coco.py create mode 100644 configs/atss/metafile.yml create mode 100644 configs/autoassign/README.md create mode 100644 configs/autoassign/autoassign_r50_fpn_8x2_1x_coco.py create mode 100644 configs/autoassign/metafile.yml create mode 100644 configs/carafe/README.md create mode 100644 configs/carafe/faster_rcnn_r50_fpn_carafe_1x_coco.py create mode 100644 configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py create mode 100644 configs/cascade_rcnn/README.md create mode 100644 configs/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_1x_coco.py create mode 100644 configs/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_mstrain_3x_coco.py create mode 100644 configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco.py create mode 100644 configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco.py create mode 100644 configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_mstrain_3x_coco.py create mode 100644 configs/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_1x_coco.py create mode 100644 configs/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco.py create mode 100644 configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py create mode 100644 configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco.py create mode 100644 configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_mstrain_3x_coco.py create mode 100644 configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py create mode 100644 configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco.py create mode 100644 configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_mstrain_3x_coco.py create mode 100644 configs/cascade_rcnn/cascade_mask_rcnn_x101_32x8d_fpn_mstrain_3x_coco.py create mode 100644 configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco.py create mode 100644 configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco.py create mode 100644 configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py create mode 100644 configs/cascade_rcnn/cascade_rcnn_r101_caffe_fpn_1x_coco.py create mode 100644 configs/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco.py create mode 100644 configs/cascade_rcnn/cascade_rcnn_r101_fpn_20e_coco.py create mode 100644 configs/cascade_rcnn/cascade_rcnn_r50_caffe_fpn_1x_coco.py create mode 100644 configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py create mode 100644 configs/cascade_rcnn/cascade_rcnn_r50_fpn_20e_coco.py create mode 100644 configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco.py create mode 100644 configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_20e_coco.py create mode 100644 configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco.py create mode 100644 configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py create mode 100644 configs/cascade_rcnn/metafile.yml create mode 100644 configs/cascade_rpn/README.md create mode 100644 configs/cascade_rpn/crpn_fast_rcnn_r50_caffe_fpn_1x_coco.py create mode 100644 configs/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco.py create mode 100644 configs/cascade_rpn/crpn_r50_caffe_fpn_1x_coco.py create mode 100644 configs/centernet/README.md create mode 100644 configs/centernet/centernet_resnet18_140e_coco.py create mode 100644 configs/centernet/centernet_resnet18_dcnv2_140e_coco.py create mode 100644 configs/centernet/metafile.yml create mode 100644 configs/centripetalnet/README.md create mode 100644 configs/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco.py create mode 100644 configs/centripetalnet/metafile.yml create mode 100644 configs/cityscapes/README.md create mode 100644 configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py create mode 100644 configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py create mode 100644 configs/common/lsj_100e_coco_instance.py create mode 100644 configs/common/mstrain-poly_3x_coco_instance.py create mode 100644 configs/common/mstrain_3x_coco.py create mode 100644 configs/common/mstrain_3x_coco_instance.py create mode 100644 configs/cornernet/README.md create mode 100644 configs/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco.py create mode 100644 configs/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco.py create mode 100644 configs/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco.py create mode 100644 configs/cornernet/metafile.yml create mode 100644 configs/dcn/README.md create mode 100644 configs/dcn/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py create mode 100644 configs/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py create mode 100644 configs/dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py create mode 100644 configs/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py create mode 100644 configs/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py create mode 100644 configs/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py create mode 100644 configs/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py create mode 100644 configs/dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py create mode 100644 configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py create mode 100644 configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco.py create mode 100644 configs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py create mode 100644 configs/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py create mode 100644 configs/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py create mode 100644 configs/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py create mode 100644 configs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py create mode 100644 configs/dcn/metafile.yml create mode 100644 configs/deepfashion/README.md create mode 100644 configs/deepfashion/mask_rcnn_r50_fpn_15e_deepfashion.py create mode 100644 configs/deformable_detr/README.md create mode 100644 configs/deformable_detr/deformable_detr_r50_16x2_50e_coco.py create mode 100644 configs/deformable_detr/deformable_detr_refine_r50_16x2_50e_coco.py create mode 100644 configs/deformable_detr/deformable_detr_twostage_refine_r50_16x2_50e_coco.py create mode 100644 configs/deformable_detr/metafile.yml create mode 100644 configs/detectors/README.md create mode 100644 configs/detectors/cascade_rcnn_r50_rfp_1x_coco.py create mode 100644 configs/detectors/cascade_rcnn_r50_sac_1x_coco.py create mode 100644 configs/detectors/detectors_cascade_rcnn_r50_1x_coco.py create mode 100644 configs/detectors/detectors_htc_r101_20e_coco.py create mode 100644 configs/detectors/detectors_htc_r50_1x_coco.py create mode 100644 configs/detectors/htc_r50_rfp_1x_coco.py create mode 100644 configs/detectors/htc_r50_sac_1x_coco.py create mode 100644 configs/detectors/metafile.yml create mode 100644 configs/detr/README.md create mode 100644 configs/detr/detr_r50_8x2_150e_coco.py create mode 100644 configs/detr/detr_r50_8x2_150e_coco_merge.py create mode 100644 configs/detr/metafile.yml create mode 100644 configs/double_heads/README.md create mode 100644 configs/double_heads/dh_faster_rcnn_r50_fpn_1x_coco.py create mode 100644 configs/double_heads/metafile.yml create mode 100644 configs/dynamic_rcnn/README.md create mode 100644 configs/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x_coco.py create mode 100644 configs/dynamic_rcnn/metafile.yml create mode 100644 configs/empirical_attention/README.md create mode 100644 configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x_coco.py create mode 100644 configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco.py create mode 100644 configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco.py create mode 100644 configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco.py create mode 100644 configs/empirical_attention/metafile.yml create mode 100644 configs/fast_rcnn/README.md create mode 100644 configs/fast_rcnn/fast_rcnn_r101_caffe_fpn_1x_coco.py create mode 100644 configs/fast_rcnn/fast_rcnn_r101_fpn_1x_coco.py create mode 100644 configs/fast_rcnn/fast_rcnn_r101_fpn_2x_coco.py create mode 100644 configs/fast_rcnn/fast_rcnn_r50_caffe_fpn_1x_coco.py create mode 100644 configs/fast_rcnn/fast_rcnn_r50_fpn_1x_coco.py create mode 100644 configs/fast_rcnn/fast_rcnn_r50_fpn_2x_coco.py create mode 100644 configs/faster_rcnn/README.md create mode 100644 configs/faster_rcnn/faster_rcnn_r101_caffe_fpn_1x_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_r101_caffe_fpn_mstrain_3x_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_r101_fpn_1x_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_r101_fpn_2x_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_r101_fpn_mstrain_3x_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_r50_caffe_c4_1x_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_90k_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person-bicycle-car.py create mode 100644 configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person.py create mode 100644 configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_90k_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_r50_fpn_2x_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_r50_fpn_bounded_iou_1x_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_r50_fpn_ciou_1x_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_r50_fpn_giou_1x_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_r50_fpn_iou_1x_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_r50_fpn_mstrain_3x_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_r50_fpn_soft_nms_1x_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_2x_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_2x_coco.py create mode 100644 configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py create mode 100644 configs/faster_rcnn/metafile.yml create mode 100644 configs/fcos/README.md create mode 100644 configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py create mode 100644 configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco.py create mode 100644 configs/fcos/fcos_center_r50_caffe_fpn_gn-head_1x_coco.py create mode 100644 configs/fcos/fcos_r101_caffe_fpn_gn-head_1x_coco.py create mode 100644 configs/fcos/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py create mode 100644 configs/fcos/fcos_r50_caffe_fpn_gn-head_1x_coco.py create mode 100644 configs/fcos/fcos_r50_caffe_fpn_gn-head_4x4_1x_coco.py create mode 100644 configs/fcos/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py create mode 100644 configs/fcos/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco.py create mode 100644 configs/fcos/metafile.yml create mode 100644 configs/foveabox/README.md create mode 100644 configs/foveabox/fovea_align_r101_fpn_gn-head_4x4_2x_coco.py create mode 100644 configs/foveabox/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py create mode 100644 configs/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py create mode 100644 configs/foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py create mode 100644 configs/foveabox/fovea_r101_fpn_4x4_1x_coco.py create mode 100644 configs/foveabox/fovea_r101_fpn_4x4_2x_coco.py create mode 100644 configs/foveabox/fovea_r50_fpn_4x4_1x_coco.py create mode 100644 configs/foveabox/fovea_r50_fpn_4x4_2x_coco.py create mode 100644 configs/foveabox/metafile.yml create mode 100644 configs/fp16/README.md create mode 100644 configs/fp16/faster_rcnn_r50_fpn_fp16_1x_coco.py create mode 100644 configs/fp16/mask_rcnn_r50_fpn_fp16_1x_coco.py create mode 100644 configs/fp16/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco.py create mode 100644 configs/fp16/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco.py create mode 100644 configs/fp16/metafile.yml create mode 100644 configs/fp16/retinanet_r50_fpn_fp16_1x_coco.py create mode 100644 configs/fpg/README.md create mode 100644 configs/fpg/faster_rcnn_r50_fpg-chn128_crop640_50e_coco.py create mode 100644 configs/fpg/faster_rcnn_r50_fpg_crop640_50e_coco.py create mode 100644 configs/fpg/faster_rcnn_r50_fpn_crop640_50e_coco.py create mode 100644 configs/fpg/mask_rcnn_r50_fpg-chn128_crop640_50e_coco.py create mode 100644 configs/fpg/mask_rcnn_r50_fpg_crop640_50e_coco.py create mode 100644 configs/fpg/mask_rcnn_r50_fpn_crop640_50e_coco.py create mode 100644 configs/fpg/metafile.yml create mode 100644 configs/fpg/retinanet_r50_fpg-chn128_crop640_50e_coco.py create mode 100644 configs/fpg/retinanet_r50_fpg_crop640_50e_coco.py create mode 100644 configs/free_anchor/README.md create mode 100644 configs/free_anchor/metafile.yml create mode 100644 configs/free_anchor/retinanet_free_anchor_r101_fpn_1x_coco.py create mode 100644 configs/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py create mode 100644 configs/free_anchor/retinanet_free_anchor_x101_32x4d_fpn_1x_coco.py create mode 100644 configs/fsaf/README.md create mode 100644 configs/fsaf/fsaf_r101_fpn_1x_coco.py create mode 100644 configs/fsaf/fsaf_r50_fpn_1x_coco.py create mode 100644 configs/fsaf/fsaf_x101_64x4d_fpn_1x_coco.py create mode 100644 configs/fsaf/metafile.yml create mode 100644 configs/gcnet/README.md create mode 100644 configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco.py create mode 100644 configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco.py create mode 100644 configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco.py create mode 100644 configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco.py create mode 100644 configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py create mode 100644 configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py create mode 100644 configs/gcnet/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco.py create mode 100644 configs/gcnet/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco.py create mode 100644 configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco.py create mode 100644 configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py create mode 100644 configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py create mode 100644 configs/gcnet/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco.py create mode 100644 configs/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco.py create mode 100644 configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco.py create mode 100644 configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py create mode 100644 configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py create mode 100644 configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco.py create mode 100644 configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py create mode 100644 configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py create mode 100644 configs/gcnet/metafile.yml create mode 100644 configs/gfl/README.md create mode 100644 configs/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco.py create mode 100644 configs/gfl/gfl_r101_fpn_mstrain_2x_coco.py create mode 100644 configs/gfl/gfl_r50_fpn_1x_coco.py create mode 100644 configs/gfl/gfl_r50_fpn_mstrain_2x_coco.py create mode 100644 configs/gfl/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco.py create mode 100644 configs/gfl/gfl_x101_32x4d_fpn_mstrain_2x_coco.py create mode 100644 configs/gfl/metafile.yml create mode 100644 configs/ghm/README.md create mode 100644 configs/ghm/metafile.yml create mode 100644 configs/ghm/retinanet_ghm_r101_fpn_1x_coco.py create mode 100644 configs/ghm/retinanet_ghm_r50_fpn_1x_coco.py create mode 100644 configs/ghm/retinanet_ghm_x101_32x4d_fpn_1x_coco.py create mode 100644 configs/ghm/retinanet_ghm_x101_64x4d_fpn_1x_coco.py create mode 100644 configs/gn+ws/README.md create mode 100644 configs/gn+ws/faster_rcnn_r101_fpn_gn_ws-all_1x_coco.py create mode 100644 configs/gn+ws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py create mode 100644 configs/gn+ws/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco.py create mode 100644 configs/gn+ws/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco.py create mode 100644 configs/gn+ws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco.py create mode 100644 configs/gn+ws/mask_rcnn_r101_fpn_gn_ws-all_2x_coco.py create mode 100644 configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco.py create mode 100644 configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py create mode 100644 configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco.py create mode 100644 configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco.py create mode 100644 configs/gn+ws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco.py create mode 100644 configs/gn+ws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco.py create mode 100644 configs/gn+ws/metafile.yml create mode 100644 configs/gn/README.md create mode 100644 configs/gn/mask_rcnn_r101_fpn_gn-all_2x_coco.py create mode 100644 configs/gn/mask_rcnn_r101_fpn_gn-all_3x_coco.py create mode 100644 configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py create mode 100644 configs/gn/mask_rcnn_r50_fpn_gn-all_3x_coco.py create mode 100644 configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco.py create mode 100644 configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco.py create mode 100644 configs/gn/metafile.yml create mode 100644 configs/grid_rcnn/README.md create mode 100644 configs/grid_rcnn/grid_rcnn_r101_fpn_gn-head_2x_coco.py create mode 100644 configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_1x_coco.py create mode 100644 configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py create mode 100644 configs/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco.py create mode 100644 configs/grid_rcnn/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco.py create mode 100644 configs/grid_rcnn/metafile.yml create mode 100644 configs/groie/README.md create mode 100644 configs/groie/faster_rcnn_r50_fpn_groie_1x_coco.py create mode 100644 configs/groie/grid_rcnn_r50_fpn_gn-head_groie_1x_coco.py create mode 100644 configs/groie/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py create mode 100644 configs/groie/mask_rcnn_r50_fpn_groie_1x_coco.py create mode 100644 configs/groie/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py create mode 100644 configs/groie/metafile.yml create mode 100644 configs/guided_anchoring/README.md create mode 100644 configs/guided_anchoring/ga_fast_r50_caffe_fpn_1x_coco.py create mode 100644 configs/guided_anchoring/ga_faster_r101_caffe_fpn_1x_coco.py create mode 100644 configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco.py create mode 100644 configs/guided_anchoring/ga_faster_r50_fpn_1x_coco.py create mode 100644 configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco.py create mode 100644 configs/guided_anchoring/ga_faster_x101_64x4d_fpn_1x_coco.py create mode 100644 configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_1x_coco.py create mode 100644 configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_mstrain_2x.py create mode 100644 configs/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco.py create mode 100644 configs/guided_anchoring/ga_retinanet_r50_fpn_1x_coco.py create mode 100644 configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco.py create mode 100644 configs/guided_anchoring/ga_retinanet_x101_64x4d_fpn_1x_coco.py create mode 100644 configs/guided_anchoring/ga_rpn_r101_caffe_fpn_1x_coco.py create mode 100644 configs/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco.py create mode 100644 configs/guided_anchoring/ga_rpn_r50_fpn_1x_coco.py create mode 100644 configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco.py create mode 100644 configs/guided_anchoring/ga_rpn_x101_64x4d_fpn_1x_coco.py create mode 100644 configs/guided_anchoring/metafile.yml create mode 100644 configs/hrnet/README.md create mode 100644 configs/hrnet/cascade_mask_rcnn_hrnetv2p_w18_20e_coco.py create mode 100644 configs/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco.py create mode 100644 configs/hrnet/cascade_mask_rcnn_hrnetv2p_w40_20e_coco.py create mode 100644 configs/hrnet/cascade_rcnn_hrnetv2p_w18_20e_coco.py create mode 100644 configs/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco.py create mode 100644 configs/hrnet/cascade_rcnn_hrnetv2p_w40_20e_coco.py create mode 100644 configs/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.py create mode 100644 configs/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco.py create mode 100644 configs/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco.py create mode 100644 configs/hrnet/faster_rcnn_hrnetv2p_w32_2x_coco.py create mode 100644 configs/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco.py create mode 100644 configs/hrnet/faster_rcnn_hrnetv2p_w40_2x_coco.py create mode 100644 configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco.py create mode 100644 configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco.py create mode 100644 configs/hrnet/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco.py create mode 100644 configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py create mode 100644 configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco.py create mode 100644 configs/hrnet/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco.py create mode 100644 configs/hrnet/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco.py create mode 100644 configs/hrnet/htc_hrnetv2p_w18_20e_coco.py create mode 100644 configs/hrnet/htc_hrnetv2p_w32_20e_coco.py create mode 100644 configs/hrnet/htc_hrnetv2p_w40_20e_coco.py create mode 100644 configs/hrnet/htc_hrnetv2p_w40_28e_coco.py create mode 100644 configs/hrnet/htc_x101_64x4d_fpn_16x1_28e_coco.py create mode 100644 configs/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco.py create mode 100644 configs/hrnet/mask_rcnn_hrnetv2p_w18_2x_coco.py create mode 100644 configs/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco.py create mode 100644 configs/hrnet/mask_rcnn_hrnetv2p_w32_2x_coco.py create mode 100644 configs/hrnet/mask_rcnn_hrnetv2p_w40_1x_coco.py create mode 100644 configs/hrnet/mask_rcnn_hrnetv2p_w40_2x_coco.py create mode 100644 configs/hrnet/metafile.yml create mode 100644 configs/htc/README.md create mode 100644 configs/htc/htc_r101_fpn_20e_coco.py create mode 100644 configs/htc/htc_r50_fpn_1x_coco.py create mode 100644 configs/htc/htc_r50_fpn_20e_coco.py create mode 100644 configs/htc/htc_without_semantic_r50_fpn_1x_coco.py create mode 100644 configs/htc/htc_x101_32x4d_fpn_16x1_20e_coco.py create mode 100644 configs/htc/htc_x101_64x4d_fpn_16x1_20e_coco.py create mode 100644 configs/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco.py create mode 100644 configs/htc/metafile.yml create mode 100644 configs/instaboost/README.md create mode 100644 configs/instaboost/cascade_mask_rcnn_r101_fpn_instaboost_4x_coco.py create mode 100644 configs/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco.py create mode 100644 configs/instaboost/cascade_mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco.py create mode 100644 configs/instaboost/mask_rcnn_r101_fpn_instaboost_4x_coco.py create mode 100644 configs/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco.py create mode 100644 configs/instaboost/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco.py create mode 100644 configs/instaboost/metafile.yml create mode 100644 configs/ld/README.md create mode 100644 configs/ld/ld_r101_gflv1_r101dcn_fpn_coco_2x.py create mode 100644 configs/ld/ld_r18_gflv1_r101_fpn_coco_1x.py create mode 100644 configs/ld/ld_r34_gflv1_r101_fpn_coco_1x.py create mode 100644 configs/ld/ld_r50_gflv1_r101_fpn_coco_1x.py create mode 100644 configs/ld/metafile.yml create mode 100644 configs/legacy_1.x/README.md create mode 100644 configs/legacy_1.x/cascade_mask_rcnn_r50_fpn_1x_coco_v1.py create mode 100644 configs/legacy_1.x/faster_rcnn_r50_fpn_1x_coco_v1.py create mode 100644 configs/legacy_1.x/mask_rcnn_r50_fpn_1x_coco_v1.py create mode 100644 configs/legacy_1.x/retinanet_r50_caffe_fpn_1x_coco_v1.py create mode 100644 configs/legacy_1.x/retinanet_r50_fpn_1x_coco_v1.py create mode 100644 configs/legacy_1.x/ssd300_coco_v1.py create mode 100644 configs/libra_rcnn/README.md create mode 100644 configs/libra_rcnn/libra_fast_rcnn_r50_fpn_1x_coco.py create mode 100644 configs/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_coco.py create mode 100644 configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco.py create mode 100644 configs/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_coco.py create mode 100644 configs/libra_rcnn/libra_retinanet_r50_fpn_1x_coco.py create mode 100644 configs/libra_rcnn/metafile.yml create mode 100644 configs/lvis/README.md create mode 100644 configs/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1.py create mode 100644 configs/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py create mode 100644 configs/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1.py create mode 100644 configs/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py create mode 100644 configs/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1.py create mode 100644 configs/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py create mode 100644 configs/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1.py create mode 100644 configs/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py create mode 100644 configs/mask_rcnn/README.md create mode 100644 configs/mask_rcnn/mask_rcnn_r101_caffe_fpn_1x_coco.py create mode 100644 configs/mask_rcnn/mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco.py create mode 100644 configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py create mode 100644 configs/mask_rcnn/mask_rcnn_r101_fpn_2x_coco.py create mode 100644 configs/mask_rcnn/mask_rcnn_r101_fpn_mstrain-poly_3x_coco.py create mode 100644 configs/mask_rcnn/mask_rcnn_r50_caffe_c4_1x_coco.py create mode 100644 configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco.py create mode 100644 configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py create mode 100644 configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco.py create mode 100644 configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco.py create mode 100644 configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain_1x_coco.py create mode 100644 configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_poly_1x_coco_v1.py create mode 100644 configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py create mode 100644 configs/mask_rcnn/mask_rcnn_r50_fpn_2x_coco.py create mode 100644 configs/mask_rcnn/mask_rcnn_r50_fpn_mstrain-poly_3x_coco.py create mode 100644 configs/mask_rcnn/mask_rcnn_r50_fpn_poly_1x_coco.py create mode 100644 configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py create mode 100644 configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_2x_coco.py create mode 100644 configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco.py create mode 100644 configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_1x_coco.py create mode 100644 configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_1x_coco.py create mode 100644 configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco.py create mode 100644 configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco.py create mode 100644 configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_2x_coco.py create mode 100644 configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco.py create mode 100644 configs/mask_rcnn/metafile.yml create mode 100644 configs/ms_rcnn/README.md create mode 100644 configs/ms_rcnn/metafile.yml create mode 100644 configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco.py create mode 100644 configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_2x_coco.py create mode 100644 configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco.py create mode 100644 configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_2x_coco.py create mode 100644 configs/ms_rcnn/ms_rcnn_r50_fpn_1x_coco.py create mode 100644 configs/ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_coco.py create mode 100644 configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco.py create mode 100644 configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_2x_coco.py create mode 100644 configs/nas_fcos/README.md create mode 100644 configs/nas_fcos/metafile.yml create mode 100644 configs/nas_fcos/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco.py create mode 100644 configs/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco.py create mode 100644 configs/nas_fpn/README.md create mode 100644 configs/nas_fpn/metafile.yml create mode 100644 configs/nas_fpn/retinanet_r50_fpn_crop640_50e_coco.py create mode 100644 configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py create mode 100644 configs/paa/README.md create mode 100644 configs/paa/metafile.yml create mode 100644 configs/paa/paa_r101_fpn_1x_coco.py create mode 100644 configs/paa/paa_r101_fpn_2x_coco.py create mode 100644 configs/paa/paa_r101_fpn_mstrain_3x_coco.py create mode 100644 configs/paa/paa_r50_fpn_1.5x_coco.py create mode 100644 configs/paa/paa_r50_fpn_1x_coco.py create mode 100644 configs/paa/paa_r50_fpn_2x_coco.py create mode 100644 configs/paa/paa_r50_fpn_mstrain_3x_coco.py create mode 100644 configs/pafpn/README.md create mode 100644 configs/pafpn/faster_rcnn_r50_pafpn_1x_coco.py create mode 100644 configs/pafpn/metafile.yml create mode 100644 configs/panoptic_fpn/README.md create mode 100644 configs/panoptic_fpn/metafile.yml create mode 100644 configs/panoptic_fpn/panoptic_fpn_r101_fpn_1x_coco.py create mode 100644 configs/panoptic_fpn/panoptic_fpn_r101_fpn_mstrain_3x_coco.py create mode 100644 configs/panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco.py create mode 100644 configs/panoptic_fpn/panoptic_fpn_r50_fpn_mstrain_3x_coco.py create mode 100644 configs/pascal_voc/README.md create mode 100644 configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py create mode 100644 configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712_cocofmt.py create mode 100644 configs/pascal_voc/retinanet_r50_fpn_1x_voc0712.py create mode 100644 configs/pascal_voc/ssd300_voc0712.py create mode 100644 configs/pascal_voc/ssd512_voc0712.py create mode 100644 configs/pisa/README.md create mode 100644 configs/pisa/metafile.yml create mode 100644 configs/pisa/pisa_faster_rcnn_r50_fpn_1x_coco.py create mode 100644 configs/pisa/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco.py create mode 100644 configs/pisa/pisa_mask_rcnn_r50_fpn_1x_coco.py create mode 100644 configs/pisa/pisa_mask_rcnn_x101_32x4d_fpn_1x_coco.py create mode 100644 configs/pisa/pisa_retinanet_r50_fpn_1x_coco.py create mode 100644 configs/pisa/pisa_retinanet_x101_32x4d_fpn_1x_coco.py create mode 100644 configs/pisa/pisa_ssd300_coco.py create mode 100644 configs/pisa/pisa_ssd512_coco.py create mode 100644 configs/pix2seq/README.md create mode 100644 configs/pix2seq/metafile.yml create mode 100644 configs/pix2seq/pix2seq_r50_8x4_300e_coco.py create mode 100644 configs/point_rend/README.md create mode 100644 configs/point_rend/metafile.yml create mode 100644 configs/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco.py create mode 100644 configs/point_rend/point_rend_r50_caffe_fpn_mstrain_3x_coco.py create mode 100644 configs/pvt/README.md create mode 100644 configs/pvt/metafile.yml create mode 100644 configs/pvt/retinanet_pvt-l_fpn_1x_coco.py create mode 100644 configs/pvt/retinanet_pvt-m_fpn_1x_coco.py create mode 100644 configs/pvt/retinanet_pvt-s_fpn_1x_coco.py create mode 100644 configs/pvt/retinanet_pvt-t_fpn_1x_coco.py create mode 100644 configs/pvt/retinanet_pvtv2-b0_fpn_1x_coco.py create mode 100644 configs/pvt/retinanet_pvtv2-b1_fpn_1x_coco.py create mode 100644 configs/pvt/retinanet_pvtv2-b2_fpn_1x_coco.py create mode 100644 configs/pvt/retinanet_pvtv2-b3_fpn_1x_coco.py create mode 100644 configs/pvt/retinanet_pvtv2-b4_fpn_1x_coco.py create mode 100644 configs/pvt/retinanet_pvtv2-b5_fpn_1x_coco.py create mode 100644 configs/queryinst/README.md create mode 100644 configs/queryinst/metafile.yml create mode 100644 configs/queryinst/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py create mode 100644 configs/queryinst/queryinst_r101_fpn_mstrain_480-800_3x_coco.py create mode 100644 configs/queryinst/queryinst_r50_fpn_1x_coco.py create mode 100644 configs/queryinst/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py create mode 100644 configs/queryinst/queryinst_r50_fpn_mstrain_480-800_3x_coco.py create mode 100644 configs/regnet/README.md create mode 100644 configs/regnet/cascade_mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py create mode 100644 configs/regnet/cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py create mode 100644 configs/regnet/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py create mode 100644 configs/regnet/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py create mode 100644 configs/regnet/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py create mode 100644 configs/regnet/faster_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py create mode 100644 configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_1x_coco.py create mode 100644 configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_2x_coco.py create mode 100644 configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py create mode 100644 configs/regnet/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py create mode 100644 configs/regnet/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py create mode 100644 configs/regnet/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py create mode 100644 configs/regnet/mask_rcnn_regnetx-1.6GF_fpn_mstrain-poly_3x_coco.py create mode 100644 configs/regnet/mask_rcnn_regnetx-12GF_fpn_1x_coco.py create mode 100644 configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py create mode 100644 configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco.py create mode 100644 configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py create mode 100644 configs/regnet/mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco.py create mode 100644 configs/regnet/mask_rcnn_regnetx-4GF_fpn_1x_coco.py create mode 100644 configs/regnet/mask_rcnn_regnetx-4GF_fpn_mstrain-poly_3x_coco.py create mode 100644 configs/regnet/mask_rcnn_regnetx-6.4GF_fpn_1x_coco.py create mode 100644 configs/regnet/mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco.py create mode 100644 configs/regnet/mask_rcnn_regnetx-8GF_fpn_1x_coco.py create mode 100644 configs/regnet/metafile.yml create mode 100644 configs/regnet/retinanet_regnetx-1.6GF_fpn_1x_coco.py create mode 100644 configs/regnet/retinanet_regnetx-3.2GF_fpn_1x_coco.py create mode 100644 configs/regnet/retinanet_regnetx-800MF_fpn_1x_coco.py create mode 100644 configs/reppoints/README.md create mode 100644 configs/reppoints/bbox_r50_grid_center_fpn_gn-neck+head_1x_coco.py create mode 100644 configs/reppoints/bbox_r50_grid_fpn_gn-neck+head_1x_coco.py create mode 100644 configs/reppoints/metafile.yml create mode 100644 configs/reppoints/reppoints.png create mode 100644 configs/reppoints/reppoints_minmax_r50_fpn_gn-neck+head_1x_coco.py create mode 100644 configs/reppoints/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py create mode 100644 configs/reppoints/reppoints_moment_r101_fpn_gn-neck+head_2x_coco.py create mode 100644 configs/reppoints/reppoints_moment_r50_fpn_1x_coco.py create mode 100644 configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py create mode 100644 configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_2x_coco.py create mode 100644 configs/reppoints/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py create mode 100644 configs/reppoints/reppoints_partial_minmax_r50_fpn_gn-neck+head_1x_coco.py create mode 100644 configs/res2net/README.md create mode 100644 configs/res2net/cascade_mask_rcnn_r2_101_fpn_20e_coco.py create mode 100644 configs/res2net/cascade_rcnn_r2_101_fpn_20e_coco.py create mode 100644 configs/res2net/faster_rcnn_r2_101_fpn_2x_coco.py create mode 100644 configs/res2net/htc_r2_101_fpn_20e_coco.py create mode 100644 configs/res2net/mask_rcnn_r2_101_fpn_2x_coco.py create mode 100644 configs/res2net/metafile.yml create mode 100644 configs/resnest/README.md create mode 100644 configs/resnest/cascade_mask_rcnn_s101_fpn_syncbn-backbone+head_mstrain_1x_coco.py create mode 100644 configs/resnest/cascade_mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py create mode 100644 configs/resnest/cascade_rcnn_s101_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py create mode 100644 configs/resnest/cascade_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py create mode 100644 configs/resnest/faster_rcnn_s101_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py create mode 100644 configs/resnest/faster_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py create mode 100644 configs/resnest/mask_rcnn_s101_fpn_syncbn-backbone+head_mstrain_1x_coco.py create mode 100644 configs/resnest/mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py create mode 100644 configs/resnest/metafile.yml create mode 100644 configs/retinanet/README.md create mode 100644 configs/retinanet/metafile.yml create mode 100644 configs/retinanet/retinanet_r101_caffe_fpn_1x_coco.py create mode 100644 configs/retinanet/retinanet_r101_caffe_fpn_mstrain_3x_coco.py create mode 100644 configs/retinanet/retinanet_r101_fpn_1x_coco.py create mode 100644 configs/retinanet/retinanet_r101_fpn_2x_coco.py create mode 100644 configs/retinanet/retinanet_r101_fpn_mstrain_640-800_3x_coco.py create mode 100644 configs/retinanet/retinanet_r50_caffe_fpn_1x_coco.py create mode 100644 configs/retinanet/retinanet_r50_caffe_fpn_mstrain_1x_coco.py create mode 100644 configs/retinanet/retinanet_r50_caffe_fpn_mstrain_2x_coco.py create mode 100644 configs/retinanet/retinanet_r50_caffe_fpn_mstrain_3x_coco.py create mode 100644 configs/retinanet/retinanet_r50_fpn_1x_coco.py create mode 100644 configs/retinanet/retinanet_r50_fpn_2x_coco.py create mode 100644 configs/retinanet/retinanet_r50_fpn_90k_coco.py create mode 100644 configs/retinanet/retinanet_r50_fpn_mstrain_640-800_3x_coco.py create mode 100644 configs/retinanet/retinanet_x101_32x4d_fpn_1x_coco.py create mode 100644 configs/retinanet/retinanet_x101_32x4d_fpn_2x_coco.py create mode 100644 configs/retinanet/retinanet_x101_64x4d_fpn_1x_coco.py create mode 100644 configs/retinanet/retinanet_x101_64x4d_fpn_2x_coco.py create mode 100644 configs/retinanet/retinanet_x101_64x4d_fpn_mstrain_640-800_3x_coco.py create mode 100644 configs/rpn/README.md create mode 100644 configs/rpn/rpn_r101_caffe_fpn_1x_coco.py create mode 100644 configs/rpn/rpn_r101_fpn_1x_coco.py create mode 100644 configs/rpn/rpn_r101_fpn_2x_coco.py create mode 100644 configs/rpn/rpn_r50_caffe_c4_1x_coco.py create mode 100644 configs/rpn/rpn_r50_caffe_fpn_1x_coco.py create mode 100644 configs/rpn/rpn_r50_fpn_1x_coco.py create mode 100644 configs/rpn/rpn_r50_fpn_2x_coco.py create mode 100644 configs/rpn/rpn_x101_32x4d_fpn_1x_coco.py create mode 100644 configs/rpn/rpn_x101_32x4d_fpn_2x_coco.py create mode 100644 configs/rpn/rpn_x101_64x4d_fpn_1x_coco.py create mode 100644 configs/rpn/rpn_x101_64x4d_fpn_2x_coco.py create mode 100644 configs/sabl/README.md create mode 100644 configs/sabl/metafile.yml create mode 100644 configs/sabl/sabl_cascade_rcnn_r101_fpn_1x_coco.py create mode 100644 configs/sabl/sabl_cascade_rcnn_r50_fpn_1x_coco.py create mode 100644 configs/sabl/sabl_faster_rcnn_r101_fpn_1x_coco.py create mode 100644 configs/sabl/sabl_faster_rcnn_r50_fpn_1x_coco.py create mode 100644 configs/sabl/sabl_retinanet_r101_fpn_1x_coco.py create mode 100644 configs/sabl/sabl_retinanet_r101_fpn_gn_1x_coco.py create mode 100644 configs/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_480_960_coco.py create mode 100644 configs/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_640_800_coco.py create mode 100644 configs/sabl/sabl_retinanet_r50_fpn_1x_coco.py create mode 100644 configs/sabl/sabl_retinanet_r50_fpn_gn_1x_coco.py create mode 100644 configs/scnet/README.md create mode 100644 configs/scnet/metafile.yml create mode 100644 configs/scnet/scnet_r101_fpn_20e_coco.py create mode 100644 configs/scnet/scnet_r50_fpn_1x_coco.py create mode 100644 configs/scnet/scnet_r50_fpn_20e_coco.py create mode 100644 configs/scnet/scnet_x101_64x4d_fpn_20e_coco.py create mode 100644 configs/scnet/scnet_x101_64x4d_fpn_8x1_20e_coco.py create mode 100644 configs/scratch/README.md create mode 100644 configs/scratch/faster_rcnn_r50_fpn_gn-all_scratch_6x_coco.py create mode 100644 configs/scratch/mask_rcnn_r50_fpn_gn-all_scratch_6x_coco.py create mode 100644 configs/scratch/metafile.yml create mode 100644 configs/seesaw_loss/README.md create mode 100644 configs/seesaw_loss/cascade_mask_rcnn_r101_fpn_random_seesaw_loss_mstrain_2x_lvis_v1.py create mode 100644 configs/seesaw_loss/cascade_mask_rcnn_r101_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py create mode 100644 configs/seesaw_loss/cascade_mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1.py create mode 100644 configs/seesaw_loss/cascade_mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py create mode 100644 configs/seesaw_loss/mask_rcnn_r101_fpn_random_seesaw_loss_mstrain_2x_lvis_v1.py create mode 100644 configs/seesaw_loss/mask_rcnn_r101_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py create mode 100644 configs/seesaw_loss/mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1.py create mode 100644 configs/seesaw_loss/mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py create mode 100644 configs/seesaw_loss/mask_rcnn_r50_fpn_random_seesaw_loss_mstrain_2x_lvis_v1.py create mode 100644 configs/seesaw_loss/mask_rcnn_r50_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py create mode 100644 configs/seesaw_loss/mask_rcnn_r50_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1.py create mode 100644 configs/seesaw_loss/mask_rcnn_r50_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py create mode 100644 configs/selfsup_pretrain/README.md create mode 100644 configs/selfsup_pretrain/mask_rcnn_r50_fpn_mocov2-pretrain_1x_coco.py create mode 100644 configs/selfsup_pretrain/mask_rcnn_r50_fpn_mocov2-pretrain_ms-2x_coco.py create mode 100644 configs/selfsup_pretrain/mask_rcnn_r50_fpn_swav-pretrain_1x_coco.py create mode 100644 configs/selfsup_pretrain/mask_rcnn_r50_fpn_swav-pretrain_ms-2x_coco.py create mode 100644 configs/solo/README.md create mode 100644 configs/solo/decoupled_solo_light_r50_fpn_3x_coco.py create mode 100644 configs/solo/decoupled_solo_r50_fpn_1x_coco.py create mode 100644 configs/solo/decoupled_solo_r50_fpn_3x_coco.py create mode 100644 configs/solo/metafile.yml create mode 100644 configs/solo/solo_r50_fpn_1x_coco.py create mode 100644 configs/solo/solo_r50_fpn_3x_coco.py create mode 100644 configs/sparse_rcnn/README.md create mode 100644 configs/sparse_rcnn/metafile.yml create mode 100644 configs/sparse_rcnn/sparse_rcnn_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py create mode 100644 configs/sparse_rcnn/sparse_rcnn_r101_fpn_mstrain_480-800_3x_coco.py create mode 100644 configs/sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco.py create mode 100644 configs/sparse_rcnn/sparse_rcnn_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py create mode 100644 configs/sparse_rcnn/sparse_rcnn_r50_fpn_mstrain_480-800_3x_coco.py create mode 100644 configs/ssd/README.md create mode 100644 configs/ssd/metafile.yml create mode 100644 configs/ssd/ssd300_coco.py create mode 100644 configs/ssd/ssd512_coco.py create mode 100644 configs/ssd/ssdlite_mobilenetv2_scratch_600e_coco.py create mode 100644 configs/strong_baselines/README.md create mode 100644 configs/strong_baselines/mask_rcnn_r50_caffe_fpn_syncbn-all_rpn-2conv_lsj_100e_coco.py create mode 100644 configs/strong_baselines/mask_rcnn_r50_caffe_fpn_syncbn-all_rpn-2conv_lsj_100e_fp16_coco.py create mode 100644 configs/strong_baselines/mask_rcnn_r50_caffe_fpn_syncbn-all_rpn-2conv_lsj_400e_coco.py create mode 100644 configs/strong_baselines/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_lsj_100e_coco.py create mode 100644 configs/strong_baselines/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_lsj_100e_fp16_coco.py create mode 100644 configs/strong_baselines/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_lsj_50e_coco.py create mode 100644 configs/swin/README.md create mode 100644 configs/swin/mask_rcnn_swin-s-p4-w7_fpn_fp16_ms-crop-3x_coco.py create mode 100644 configs/swin/mask_rcnn_swin-t-p4-w7_fpn_1x_coco.py create mode 100644 configs/swin/mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco.py create mode 100644 configs/swin/mask_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco.py create mode 100644 configs/swin/metafile.yml create mode 100644 configs/tridentnet/README.md create mode 100644 configs/tridentnet/metafile.yml create mode 100644 configs/tridentnet/tridentnet_r50_caffe_1x_coco.py create mode 100644 configs/tridentnet/tridentnet_r50_caffe_mstrain_1x_coco.py create mode 100644 configs/tridentnet/tridentnet_r50_caffe_mstrain_3x_coco.py create mode 100644 configs/vfnet/README.md create mode 100644 configs/vfnet/metafile.yml create mode 100644 configs/vfnet/vfnet_r101_fpn_1x_coco.py create mode 100644 configs/vfnet/vfnet_r101_fpn_2x_coco.py create mode 100644 configs/vfnet/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco.py create mode 100644 configs/vfnet/vfnet_r101_fpn_mstrain_2x_coco.py create mode 100644 configs/vfnet/vfnet_r2_101_fpn_mdconv_c3-c5_mstrain_2x_coco.py create mode 100644 configs/vfnet/vfnet_r2_101_fpn_mstrain_2x_coco.py create mode 100644 configs/vfnet/vfnet_r50_fpn_1x_coco.py create mode 100644 configs/vfnet/vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco.py create mode 100644 configs/vfnet/vfnet_r50_fpn_mstrain_2x_coco.py create mode 100644 configs/vfnet/vfnet_x101_32x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.py create mode 100644 configs/vfnet/vfnet_x101_32x4d_fpn_mstrain_2x_coco.py create mode 100644 configs/vfnet/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.py create mode 100644 configs/vfnet/vfnet_x101_64x4d_fpn_mstrain_2x_coco.py create mode 100644 configs/wider_face/README.md create mode 100644 configs/wider_face/ssd300_wider_face.py create mode 100644 configs/yolact/README.md create mode 100644 configs/yolact/metafile.yml create mode 100644 configs/yolact/yolact_r101_1x8_coco.py create mode 100644 configs/yolact/yolact_r50_1x8_coco.py create mode 100644 configs/yolact/yolact_r50_8x8_coco.py create mode 100644 configs/yolo/README.md create mode 100644 configs/yolo/metafile.yml create mode 100644 configs/yolo/yolov3_d53_320_273e_coco.py create mode 100644 configs/yolo/yolov3_d53_fp16_mstrain-608_273e_coco.py create mode 100644 configs/yolo/yolov3_d53_mstrain-416_273e_coco.py create mode 100644 configs/yolo/yolov3_d53_mstrain-608_273e_coco.py create mode 100644 configs/yolo/yolov3_mobilenetv2_320_300e_coco.py create mode 100644 configs/yolo/yolov3_mobilenetv2_mstrain-416_300e_coco.py create mode 100644 configs/yolof/README.md create mode 100644 configs/yolof/metafile.yml create mode 100644 configs/yolof/yolof_r50_c5_8x8_1x_coco.py create mode 100644 configs/yolof/yolof_r50_c5_8x8_iter-1x_coco.py create mode 100644 configs/yolox/README.md create mode 100644 configs/yolox/metafile.yml create mode 100644 configs/yolox/yolox_l_8x8_300e_coco.py create mode 100644 configs/yolox/yolox_m_8x8_300e_coco.py create mode 100644 configs/yolox/yolox_nano_8x8_300e_coco.py create mode 100644 configs/yolox/yolox_s_8x8_300e_coco.py create mode 100644 configs/yolox/yolox_tiny_8x8_300e_coco.py create mode 100644 configs/yolox/yolox_x_8x8_300e_coco.py create mode 100644 demo/MMDet_Tutorial.ipynb create mode 100644 demo/create_result_gif.py create mode 100644 demo/demo.jpg create mode 100644 demo/demo.mp4 create mode 100644 demo/image_demo.py create mode 100644 demo/inference_demo.ipynb create mode 100644 demo/video_demo.py create mode 100644 demo/webcam_demo.py create mode 100644 docker/Dockerfile create mode 100644 docker/serve/Dockerfile create mode 100644 docker/serve/config.properties create mode 100644 docker/serve/entrypoint.sh create mode 100644 docs/1_exist_data_model.md create mode 100644 docs/2_new_data_model.md create mode 100644 docs/3_exist_data_new_model.md create mode 100644 docs/Makefile create mode 100644 docs/_static/css/readthedocs.css create mode 100644 docs/_static/image/mmdet-logo.png create mode 100644 docs/api.rst create mode 100644 docs/changelog.md create mode 100644 docs/compatibility.md create mode 100644 docs/conf.py create mode 100644 docs/conventions.md create mode 100644 docs/faq.md create mode 100644 docs/get_started.md create mode 100644 docs/index.rst create mode 100644 docs/make.bat create mode 100644 docs/model_zoo.md create mode 100644 docs/projects.md create mode 100644 docs/robustness_benchmarking.md create mode 100644 docs/stat.py create mode 100644 docs/switch_language.md create mode 100644 docs/tutorials/config.md create mode 100644 docs/tutorials/customize_dataset.md create mode 100644 docs/tutorials/customize_losses.md create mode 100644 docs/tutorials/customize_models.md create mode 100644 docs/tutorials/customize_runtime.md create mode 100644 docs/tutorials/data_pipeline.md create mode 100644 docs/tutorials/finetune.md create mode 100644 docs/tutorials/index.rst create mode 100644 docs/tutorials/init_cfg.md create mode 100644 docs/tutorials/onnx2tensorrt.md create mode 100644 docs/tutorials/pytorch2onnx.md create mode 100644 docs/useful_tools.md create mode 100644 docs_zh-CN/1_exist_data_model.md create mode 100644 docs_zh-CN/2_new_data_model.md create mode 100644 docs_zh-CN/3_exist_data_new_model.md create mode 100644 docs_zh-CN/Makefile create mode 100644 docs_zh-CN/_static/css/readthedocs.css create mode 100644 docs_zh-CN/_static/image/mmdet-logo.png create mode 100644 docs_zh-CN/api.rst create mode 100644 docs_zh-CN/compatibility.md create mode 100644 docs_zh-CN/conf.py create mode 100644 docs_zh-CN/conventions.md create mode 100644 docs_zh-CN/faq.md create mode 100644 docs_zh-CN/get_started.md create mode 100644 docs_zh-CN/index.rst create mode 100644 docs_zh-CN/make.bat create mode 100644 docs_zh-CN/model_zoo.md create mode 100644 docs_zh-CN/projects.md create mode 100644 docs_zh-CN/robustness_benchmarking.md create mode 100644 docs_zh-CN/stat.py create mode 100644 docs_zh-CN/switch_language.md create mode 100644 docs_zh-CN/tutorials/config.md create mode 100644 docs_zh-CN/tutorials/customize_dataset.md create mode 100644 docs_zh-CN/tutorials/customize_losses.md create mode 100644 docs_zh-CN/tutorials/customize_models.md create mode 100644 docs_zh-CN/tutorials/customize_runtime.md create mode 100644 docs_zh-CN/tutorials/data_pipeline.md create mode 100644 docs_zh-CN/tutorials/finetune.md create mode 100644 docs_zh-CN/tutorials/index.rst create mode 100644 docs_zh-CN/tutorials/onnx2tensorrt.md create mode 100644 docs_zh-CN/tutorials/pytorch2onnx.md create mode 100644 docs_zh-CN/useful_tools.md create mode 100644 mmdet/__init__.py create mode 100644 mmdet/apis/__init__.py create mode 100644 mmdet/apis/inference.py create mode 100644 mmdet/apis/test.py create mode 100644 mmdet/apis/train.py create mode 100644 mmdet/core/__init__.py create mode 100644 mmdet/core/anchor/__init__.py create mode 100644 mmdet/core/anchor/anchor_generator.py create mode 100644 mmdet/core/anchor/builder.py create mode 100644 mmdet/core/anchor/point_generator.py create mode 100644 mmdet/core/anchor/utils.py create mode 100644 mmdet/core/bbox/__init__.py create mode 100644 mmdet/core/bbox/assigners/__init__.py create mode 100644 mmdet/core/bbox/assigners/approx_max_iou_assigner.py create mode 100644 mmdet/core/bbox/assigners/assign_result.py create mode 100644 mmdet/core/bbox/assigners/atss_assigner.py create mode 100644 mmdet/core/bbox/assigners/base_assigner.py create mode 100644 mmdet/core/bbox/assigners/center_region_assigner.py create mode 100644 mmdet/core/bbox/assigners/grid_assigner.py create mode 100644 mmdet/core/bbox/assigners/hungarian_assigner.py create mode 100644 mmdet/core/bbox/assigners/max_iou_assigner.py create mode 100644 mmdet/core/bbox/assigners/point_assigner.py create mode 100644 mmdet/core/bbox/assigners/region_assigner.py create mode 100644 mmdet/core/bbox/assigners/sim_ota_assigner.py create mode 100644 mmdet/core/bbox/assigners/uniform_assigner.py create mode 100644 mmdet/core/bbox/builder.py create mode 100644 mmdet/core/bbox/coder/__init__.py create mode 100644 mmdet/core/bbox/coder/base_bbox_coder.py create mode 100644 mmdet/core/bbox/coder/bucketing_bbox_coder.py create mode 100644 mmdet/core/bbox/coder/delta_xywh_bbox_coder.py create mode 100644 mmdet/core/bbox/coder/distance_point_bbox_coder.py create mode 100644 mmdet/core/bbox/coder/legacy_delta_xywh_bbox_coder.py create mode 100644 mmdet/core/bbox/coder/pseudo_bbox_coder.py create mode 100644 mmdet/core/bbox/coder/tblr_bbox_coder.py create mode 100644 mmdet/core/bbox/coder/yolo_bbox_coder.py create mode 100644 mmdet/core/bbox/demodata.py create mode 100644 mmdet/core/bbox/iou_calculators/__init__.py create mode 100644 mmdet/core/bbox/iou_calculators/builder.py create mode 100644 mmdet/core/bbox/iou_calculators/iou2d_calculator.py create mode 100644 mmdet/core/bbox/match_costs/__init__.py create mode 100644 mmdet/core/bbox/match_costs/builder.py create mode 100644 mmdet/core/bbox/match_costs/match_cost.py create mode 100644 mmdet/core/bbox/samplers/__init__.py create mode 100644 mmdet/core/bbox/samplers/base_sampler.py create mode 100644 mmdet/core/bbox/samplers/combined_sampler.py create mode 100644 mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py create mode 100644 mmdet/core/bbox/samplers/iou_balanced_neg_sampler.py create mode 100644 mmdet/core/bbox/samplers/ohem_sampler.py create mode 100644 mmdet/core/bbox/samplers/pseudo_sampler.py create mode 100644 mmdet/core/bbox/samplers/random_sampler.py create mode 100644 mmdet/core/bbox/samplers/sampling_result.py create mode 100644 mmdet/core/bbox/samplers/score_hlr_sampler.py create mode 100644 mmdet/core/bbox/transforms.py create mode 100644 mmdet/core/data_structures/__init__.py create mode 100644 mmdet/core/data_structures/general_data.py create mode 100644 mmdet/core/data_structures/instance_data.py create mode 100644 mmdet/core/evaluation/__init__.py create mode 100644 mmdet/core/evaluation/bbox_overlaps.py create mode 100644 mmdet/core/evaluation/class_names.py create mode 100644 mmdet/core/evaluation/eval_hooks.py create mode 100644 mmdet/core/evaluation/mean_ap.py create mode 100644 mmdet/core/evaluation/recall.py create mode 100644 mmdet/core/export/__init__.py create mode 100644 mmdet/core/export/model_wrappers.py create mode 100644 mmdet/core/export/onnx_helper.py create mode 100644 mmdet/core/export/pytorch2onnx.py create mode 100644 mmdet/core/hook/__init__.py create mode 100644 mmdet/core/hook/checkloss_hook.py create mode 100644 mmdet/core/hook/ema.py create mode 100644 mmdet/core/hook/pix2seq_lrupdater_hook.py create mode 100644 mmdet/core/hook/sync_norm_hook.py create mode 100644 mmdet/core/hook/sync_random_size_hook.py create mode 100644 mmdet/core/hook/yolox_lrupdater_hook.py create mode 100644 mmdet/core/hook/yolox_mode_switch_hook.py create mode 100644 mmdet/core/mask/__init__.py create mode 100644 mmdet/core/mask/mask_target.py create mode 100644 mmdet/core/mask/structures.py create mode 100644 mmdet/core/mask/utils.py create mode 100644 mmdet/core/post_processing/__init__.py create mode 100644 mmdet/core/post_processing/bbox_nms.py create mode 100644 mmdet/core/post_processing/matrix_nms.py create mode 100644 mmdet/core/post_processing/merge_augs.py create mode 100644 mmdet/core/utils/__init__.py create mode 100644 mmdet/core/utils/dist_utils.py create mode 100644 mmdet/core/utils/misc.py create mode 100644 mmdet/core/visualization/__init__.py create mode 100644 mmdet/core/visualization/image.py create mode 100644 mmdet/datasets/__init__.py create mode 100644 mmdet/datasets/api_wrappers/__init__.py create mode 100644 mmdet/datasets/api_wrappers/coco_api.py create mode 100644 mmdet/datasets/builder.py create mode 100644 mmdet/datasets/cityscapes.py create mode 100644 mmdet/datasets/coco.py create mode 100644 mmdet/datasets/coco_panoptic.py create mode 100644 mmdet/datasets/coco_pix2seq.py create mode 100644 mmdet/datasets/custom.py create mode 100644 mmdet/datasets/dataset_wrappers.py create mode 100644 mmdet/datasets/deepfashion.py create mode 100644 mmdet/datasets/lvis.py create mode 100644 mmdet/datasets/pipelines/__init__.py create mode 100644 mmdet/datasets/pipelines/auto_augment.py create mode 100644 mmdet/datasets/pipelines/compose.py create mode 100644 mmdet/datasets/pipelines/formating.py create mode 100644 mmdet/datasets/pipelines/instaboost.py create mode 100644 mmdet/datasets/pipelines/loading.py create mode 100644 mmdet/datasets/pipelines/test_time_aug.py create mode 100644 mmdet/datasets/pipelines/transforms.py create mode 100644 mmdet/datasets/samplers/__init__.py create mode 100644 mmdet/datasets/samplers/distributed_sampler.py create mode 100644 mmdet/datasets/samplers/group_sampler.py create mode 100644 mmdet/datasets/samplers/infinite_sampler.py create mode 100644 mmdet/datasets/utils.py create mode 100644 mmdet/datasets/voc.py create mode 100644 mmdet/datasets/wider_face.py create mode 100644 mmdet/datasets/xml_style.py create mode 100644 mmdet/models/__init__.py create mode 100644 mmdet/models/backbones/__init__.py create mode 100644 mmdet/models/backbones/csp_darknet.py create mode 100644 mmdet/models/backbones/darknet.py create mode 100644 mmdet/models/backbones/detectors_resnet.py create mode 100644 mmdet/models/backbones/detectors_resnext.py create mode 100644 mmdet/models/backbones/hourglass.py create mode 100644 mmdet/models/backbones/hrnet.py create mode 100644 mmdet/models/backbones/mobilenet_v2.py create mode 100644 mmdet/models/backbones/pvt.py create mode 100644 mmdet/models/backbones/regnet.py create mode 100644 mmdet/models/backbones/res2net.py create mode 100644 mmdet/models/backbones/resnest.py create mode 100644 mmdet/models/backbones/resnet.py create mode 100644 mmdet/models/backbones/resnext.py create mode 100644 mmdet/models/backbones/ssd_vgg.py create mode 100644 mmdet/models/backbones/swin.py create mode 100644 mmdet/models/backbones/trident_resnet.py create mode 100644 mmdet/models/builder.py create mode 100644 mmdet/models/dense_heads/__init__.py create mode 100644 mmdet/models/dense_heads/anchor_free_head.py create mode 100644 mmdet/models/dense_heads/anchor_head.py create mode 100644 mmdet/models/dense_heads/atss_head.py create mode 100644 mmdet/models/dense_heads/autoassign_head.py create mode 100644 mmdet/models/dense_heads/base_dense_head.py create mode 100644 mmdet/models/dense_heads/base_mask_head.py create mode 100644 mmdet/models/dense_heads/cascade_rpn_head.py create mode 100644 mmdet/models/dense_heads/centernet_head.py create mode 100644 mmdet/models/dense_heads/centripetal_head.py create mode 100644 mmdet/models/dense_heads/corner_head.py create mode 100644 mmdet/models/dense_heads/deformable_detr_head.py create mode 100644 mmdet/models/dense_heads/dense_test_mixins.py create mode 100644 mmdet/models/dense_heads/detr_head.py create mode 100644 mmdet/models/dense_heads/embedding_rpn_head.py create mode 100644 mmdet/models/dense_heads/fcos_head.py create mode 100644 mmdet/models/dense_heads/fovea_head.py create mode 100644 mmdet/models/dense_heads/free_anchor_retina_head.py create mode 100644 mmdet/models/dense_heads/fsaf_head.py create mode 100644 mmdet/models/dense_heads/ga_retina_head.py create mode 100644 mmdet/models/dense_heads/ga_rpn_head.py create mode 100644 mmdet/models/dense_heads/gfl_head.py create mode 100644 mmdet/models/dense_heads/guided_anchor_head.py create mode 100644 mmdet/models/dense_heads/ld_head.py create mode 100644 mmdet/models/dense_heads/nasfcos_head.py create mode 100644 mmdet/models/dense_heads/paa_head.py create mode 100644 mmdet/models/dense_heads/pisa_retinanet_head.py create mode 100644 mmdet/models/dense_heads/pisa_ssd_head.py create mode 100644 mmdet/models/dense_heads/pix2seq_head.py create mode 100644 mmdet/models/dense_heads/reppoints_head.py create mode 100644 mmdet/models/dense_heads/retina_head.py create mode 100644 mmdet/models/dense_heads/retina_sepbn_head.py create mode 100644 mmdet/models/dense_heads/rpn_head.py create mode 100644 mmdet/models/dense_heads/sabl_retina_head.py create mode 100644 mmdet/models/dense_heads/solo_head.py create mode 100644 mmdet/models/dense_heads/ssd_head.py create mode 100644 mmdet/models/dense_heads/vfnet_head.py create mode 100644 mmdet/models/dense_heads/yolact_head.py create mode 100644 mmdet/models/dense_heads/yolo_head.py create mode 100644 mmdet/models/dense_heads/yolof_head.py create mode 100644 mmdet/models/dense_heads/yolox_head.py create mode 100644 mmdet/models/detectors/__init__.py create mode 100644 mmdet/models/detectors/atss.py create mode 100644 mmdet/models/detectors/autoassign.py create mode 100644 mmdet/models/detectors/base.py create mode 100644 mmdet/models/detectors/cascade_rcnn.py create mode 100644 mmdet/models/detectors/centernet.py create mode 100644 mmdet/models/detectors/cornernet.py create mode 100644 mmdet/models/detectors/deformable_detr.py create mode 100644 mmdet/models/detectors/detr.py create mode 100644 mmdet/models/detectors/fast_rcnn.py create mode 100644 mmdet/models/detectors/faster_rcnn.py create mode 100644 mmdet/models/detectors/fcos.py create mode 100644 mmdet/models/detectors/fovea.py create mode 100644 mmdet/models/detectors/fsaf.py create mode 100644 mmdet/models/detectors/gfl.py create mode 100644 mmdet/models/detectors/grid_rcnn.py create mode 100644 mmdet/models/detectors/htc.py create mode 100644 mmdet/models/detectors/kd_one_stage.py create mode 100644 mmdet/models/detectors/mask_rcnn.py create mode 100644 mmdet/models/detectors/mask_scoring_rcnn.py create mode 100644 mmdet/models/detectors/nasfcos.py create mode 100644 mmdet/models/detectors/paa.py create mode 100644 mmdet/models/detectors/panoptic_fpn.py create mode 100644 mmdet/models/detectors/panoptic_two_stage_segmentor.py create mode 100644 mmdet/models/detectors/pix2seq.py create mode 100644 mmdet/models/detectors/point_rend.py create mode 100644 mmdet/models/detectors/queryinst.py create mode 100644 mmdet/models/detectors/reppoints_detector.py create mode 100644 mmdet/models/detectors/retinanet.py create mode 100644 mmdet/models/detectors/rpn.py create mode 100644 mmdet/models/detectors/scnet.py create mode 100644 mmdet/models/detectors/single_stage.py create mode 100644 mmdet/models/detectors/single_stage_instance_seg.py create mode 100644 mmdet/models/detectors/solo.py create mode 100644 mmdet/models/detectors/sparse_rcnn.py create mode 100644 mmdet/models/detectors/trident_faster_rcnn.py create mode 100644 mmdet/models/detectors/two_stage.py create mode 100644 mmdet/models/detectors/vfnet.py create mode 100644 mmdet/models/detectors/yolact.py create mode 100644 mmdet/models/detectors/yolo.py create mode 100644 mmdet/models/detectors/yolof.py create mode 100644 mmdet/models/detectors/yolox.py create mode 100644 mmdet/models/losses/__init__.py create mode 100644 mmdet/models/losses/accuracy.py create mode 100644 mmdet/models/losses/ae_loss.py create mode 100644 mmdet/models/losses/balanced_l1_loss.py create mode 100644 mmdet/models/losses/cross_entropy_loss.py create mode 100644 mmdet/models/losses/dice_loss.py create mode 100644 mmdet/models/losses/focal_loss.py create mode 100644 mmdet/models/losses/gaussian_focal_loss.py create mode 100644 mmdet/models/losses/gfocal_loss.py create mode 100644 mmdet/models/losses/ghm_loss.py create mode 100644 mmdet/models/losses/iou_loss.py create mode 100644 mmdet/models/losses/kd_loss.py create mode 100644 mmdet/models/losses/mse_loss.py create mode 100644 mmdet/models/losses/pisa_loss.py create mode 100644 mmdet/models/losses/seesaw_loss.py create mode 100644 mmdet/models/losses/smooth_l1_loss.py create mode 100644 mmdet/models/losses/utils.py create mode 100644 mmdet/models/losses/varifocal_loss.py create mode 100644 mmdet/models/necks/__init__.py create mode 100644 mmdet/models/necks/bfp.py create mode 100644 mmdet/models/necks/channel_mapper.py create mode 100644 mmdet/models/necks/ct_resnet_neck.py create mode 100644 mmdet/models/necks/dilated_encoder.py create mode 100644 mmdet/models/necks/fpg.py create mode 100644 mmdet/models/necks/fpn.py create mode 100644 mmdet/models/necks/fpn_carafe.py create mode 100644 mmdet/models/necks/hrfpn.py create mode 100644 mmdet/models/necks/nas_fpn.py create mode 100644 mmdet/models/necks/nasfcos_fpn.py create mode 100644 mmdet/models/necks/pafpn.py create mode 100644 mmdet/models/necks/rfp.py create mode 100644 mmdet/models/necks/ssd_neck.py create mode 100644 mmdet/models/necks/yolo_neck.py create mode 100644 mmdet/models/necks/yolox_pafpn.py create mode 100644 mmdet/models/plugins/__init__.py create mode 100644 mmdet/models/plugins/dropblock.py create mode 100644 mmdet/models/roi_heads/__init__.py create mode 100644 mmdet/models/roi_heads/base_roi_head.py create mode 100644 mmdet/models/roi_heads/bbox_heads/__init__.py create mode 100644 mmdet/models/roi_heads/bbox_heads/bbox_head.py create mode 100644 mmdet/models/roi_heads/bbox_heads/convfc_bbox_head.py create mode 100644 mmdet/models/roi_heads/bbox_heads/dii_head.py create mode 100644 mmdet/models/roi_heads/bbox_heads/double_bbox_head.py create mode 100644 mmdet/models/roi_heads/bbox_heads/sabl_head.py create mode 100644 mmdet/models/roi_heads/bbox_heads/scnet_bbox_head.py create mode 100644 mmdet/models/roi_heads/cascade_roi_head.py create mode 100644 mmdet/models/roi_heads/double_roi_head.py create mode 100644 mmdet/models/roi_heads/dynamic_roi_head.py create mode 100644 mmdet/models/roi_heads/grid_roi_head.py create mode 100644 mmdet/models/roi_heads/htc_roi_head.py create mode 100644 mmdet/models/roi_heads/mask_heads/__init__.py create mode 100644 mmdet/models/roi_heads/mask_heads/coarse_mask_head.py create mode 100644 mmdet/models/roi_heads/mask_heads/dynamic_mask_head.py create mode 100644 mmdet/models/roi_heads/mask_heads/fcn_mask_head.py create mode 100644 mmdet/models/roi_heads/mask_heads/feature_relay_head.py create mode 100644 mmdet/models/roi_heads/mask_heads/fused_semantic_head.py create mode 100644 mmdet/models/roi_heads/mask_heads/global_context_head.py create mode 100644 mmdet/models/roi_heads/mask_heads/grid_head.py create mode 100644 mmdet/models/roi_heads/mask_heads/htc_mask_head.py create mode 100644 mmdet/models/roi_heads/mask_heads/mask_point_head.py create mode 100644 mmdet/models/roi_heads/mask_heads/maskiou_head.py create mode 100644 mmdet/models/roi_heads/mask_heads/scnet_mask_head.py create mode 100644 mmdet/models/roi_heads/mask_heads/scnet_semantic_head.py create mode 100644 mmdet/models/roi_heads/mask_scoring_roi_head.py create mode 100644 mmdet/models/roi_heads/pisa_roi_head.py create mode 100644 mmdet/models/roi_heads/point_rend_roi_head.py create mode 100644 mmdet/models/roi_heads/roi_extractors/__init__.py create mode 100644 mmdet/models/roi_heads/roi_extractors/base_roi_extractor.py create mode 100644 mmdet/models/roi_heads/roi_extractors/generic_roi_extractor.py create mode 100644 mmdet/models/roi_heads/roi_extractors/single_level_roi_extractor.py create mode 100644 mmdet/models/roi_heads/scnet_roi_head.py create mode 100644 mmdet/models/roi_heads/shared_heads/__init__.py create mode 100644 mmdet/models/roi_heads/shared_heads/res_layer.py create mode 100644 mmdet/models/roi_heads/sparse_roi_head.py create mode 100644 mmdet/models/roi_heads/standard_roi_head.py create mode 100644 mmdet/models/roi_heads/test_mixins.py create mode 100644 mmdet/models/roi_heads/trident_roi_head.py create mode 100644 mmdet/models/seg_heads/__init__.py create mode 100644 mmdet/models/seg_heads/base_semantic_head.py create mode 100644 mmdet/models/seg_heads/panoptic_fpn_head.py create mode 100644 mmdet/models/seg_heads/panoptic_fusion_heads/__init__.py create mode 100644 mmdet/models/seg_heads/panoptic_fusion_heads/base_panoptic_fusion_head.py create mode 100644 mmdet/models/seg_heads/panoptic_fusion_heads/heuristic_fusion_head.py create mode 100644 mmdet/models/utils/__init__.py create mode 100644 mmdet/models/utils/brick_wrappers.py create mode 100644 mmdet/models/utils/builder.py create mode 100644 mmdet/models/utils/ckpt_convert.py create mode 100644 mmdet/models/utils/conv_upsample.py create mode 100644 mmdet/models/utils/csp_layer.py create mode 100644 mmdet/models/utils/gaussian_target.py create mode 100644 mmdet/models/utils/inverted_residual.py create mode 100644 mmdet/models/utils/make_divisible.py create mode 100644 mmdet/models/utils/misc.py create mode 100644 mmdet/models/utils/normed_predictor.py create mode 100644 mmdet/models/utils/positional_encoding.py create mode 100644 mmdet/models/utils/res_layer.py create mode 100644 mmdet/models/utils/se_layer.py create mode 100644 mmdet/models/utils/transformer.py create mode 100644 mmdet/models/utils/transformer_old.py create mode 100644 mmdet/utils/__init__.py create mode 100644 mmdet/utils/collect_env.py create mode 100644 mmdet/utils/contextmanagers.py create mode 100644 mmdet/utils/logger.py create mode 100644 mmdet/utils/profiling.py create mode 100644 mmdet/utils/util_mixins.py create mode 100644 mmdet/utils/util_random.py create mode 100644 mmdet/version.py create mode 100644 model-index.yml create mode 100644 pytest.ini create mode 100644 requirements.txt create mode 100644 requirements/build.txt create mode 100644 requirements/docs.txt create mode 100644 requirements/mminstall.txt create mode 100644 requirements/optional.txt create mode 100644 requirements/readthedocs.txt create mode 100644 requirements/runtime.txt create mode 100644 requirements/tests.txt create mode 100644 resources/000000212559.jpg create mode 100644 resources/000000255664.jpg create mode 100644 resources/000000289393.jpg create mode 100644 resources/007114.jpg create mode 100644 resources/007351.jpg create mode 100644 resources/008322.jpg create mode 100644 resources/coco_test_12510.jpg create mode 100644 resources/corruptions_sev_3.png create mode 100644 resources/data_pipeline.png create mode 100644 resources/loss_curve.png create mode 100644 resources/mmdet-logo.png create mode 100644 resources/qq_group_qrcode.jpg create mode 100644 resources/vqvae-framework.png create mode 100644 resources/zhihu_qrcode.jpg create mode 100644 setup.cfg create mode 100644 setup.py create mode 100644 tests/test_data/test_datasets/test_coco_dataset.py create mode 100644 tests/test_data/test_datasets/test_common.py create mode 100644 tests/test_data/test_datasets/test_custom_dataset.py create mode 100644 tests/test_data/test_datasets/test_dataset_wrapper.py create mode 100644 tests/test_data/test_datasets/test_panoptic_dataset.py create mode 100644 tests/test_data/test_datasets/test_xml_dataset.py create mode 100644 tests/test_data/test_pipelines/test_formatting.py create mode 100644 tests/test_data/test_pipelines/test_loading.py create mode 100644 tests/test_data/test_pipelines/test_sampler.py create mode 100644 tests/test_data/test_pipelines/test_transform/__init__.py create mode 100644 tests/test_data/test_pipelines/test_transform/test_img_augment.py create mode 100644 tests/test_data/test_pipelines/test_transform/test_models_aug_test.py create mode 100644 tests/test_data/test_pipelines/test_transform/test_rotate.py create mode 100644 tests/test_data/test_pipelines/test_transform/test_shear.py create mode 100644 tests/test_data/test_pipelines/test_transform/test_transform.py create mode 100644 tests/test_data/test_pipelines/test_transform/test_translate.py create mode 100644 tests/test_data/test_pipelines/test_transform/utils.py create mode 100644 tests/test_data/test_utils.py create mode 100644 tests/test_downstream/test_mmtrack.py create mode 100644 tests/test_metrics/test_box_overlap.py create mode 100644 tests/test_metrics/test_losses.py create mode 100644 tests/test_metrics/test_mean_ap.py create mode 100644 tests/test_metrics/test_recall.py create mode 100644 tests/test_models/test_backbones/__init__.py create mode 100644 tests/test_models/test_backbones/test_csp_darknet.py create mode 100644 tests/test_models/test_backbones/test_detectors_resnet.py create mode 100644 tests/test_models/test_backbones/test_hourglass.py create mode 100644 tests/test_models/test_backbones/test_hrnet.py create mode 100644 tests/test_models/test_backbones/test_mobilenet_v2.py create mode 100644 tests/test_models/test_backbones/test_pvt.py create mode 100644 tests/test_models/test_backbones/test_regnet.py create mode 100644 tests/test_models/test_backbones/test_renext.py create mode 100644 tests/test_models/test_backbones/test_res2net.py create mode 100644 tests/test_models/test_backbones/test_resnest.py create mode 100644 tests/test_models/test_backbones/test_resnet.py create mode 100644 tests/test_models/test_backbones/test_swin.py create mode 100644 tests/test_models/test_backbones/test_trident_resnet.py create mode 100644 tests/test_models/test_backbones/utils.py create mode 100644 tests/test_models/test_dense_heads/test_anchor_head.py create mode 100644 tests/test_models/test_dense_heads/test_atss_head.py create mode 100644 tests/test_models/test_dense_heads/test_autoassign_head.py create mode 100644 tests/test_models/test_dense_heads/test_centernet_head.py create mode 100644 tests/test_models/test_dense_heads/test_corner_head.py create mode 100644 tests/test_models/test_dense_heads/test_dense_heads_attr.py create mode 100644 tests/test_models/test_dense_heads/test_detr_head.py create mode 100644 tests/test_models/test_dense_heads/test_fcos_head.py create mode 100644 tests/test_models/test_dense_heads/test_fsaf_head.py create mode 100644 tests/test_models/test_dense_heads/test_ga_anchor_head.py create mode 100644 tests/test_models/test_dense_heads/test_gfl_head.py create mode 100644 tests/test_models/test_dense_heads/test_ld_head.py create mode 100644 tests/test_models/test_dense_heads/test_paa_head.py create mode 100644 tests/test_models/test_dense_heads/test_pisa_head.py create mode 100644 tests/test_models/test_dense_heads/test_sabl_retina_head.py create mode 100644 tests/test_models/test_dense_heads/test_solo_head.py create mode 100644 tests/test_models/test_dense_heads/test_vfnet_head.py create mode 100644 tests/test_models/test_dense_heads/test_yolact_head.py create mode 100644 tests/test_models/test_dense_heads/test_yolof_head.py create mode 100644 tests/test_models/test_dense_heads/test_yolox_head.py create mode 100644 tests/test_models/test_forward.py create mode 100644 tests/test_models/test_loss.py create mode 100644 tests/test_models/test_loss_compatibility.py create mode 100644 tests/test_models/test_necks.py create mode 100644 tests/test_models/test_plugins.py create mode 100644 tests/test_models/test_roi_heads/__init__.py create mode 100644 tests/test_models/test_roi_heads/test_bbox_head.py create mode 100644 tests/test_models/test_roi_heads/test_mask_head.py create mode 100644 tests/test_models/test_roi_heads/test_roi_extractor.py create mode 100644 tests/test_models/test_roi_heads/test_sabl_bbox_head.py create mode 100644 tests/test_models/test_roi_heads/utils.py create mode 100644 tests/test_models/test_utils/test_brick_wrappers.py create mode 100644 tests/test_models/test_utils/test_conv_upsample.py create mode 100644 tests/test_models/test_utils/test_inverted_residual.py create mode 100644 tests/test_models/test_utils/test_model_misc.py create mode 100644 tests/test_models/test_utils/test_position_encoding.py create mode 100644 tests/test_models/test_utils/test_se_layer.py create mode 100644 tests/test_models/test_utils/test_transformer.py create mode 100644 tests/test_onnx/__init__.py create mode 100644 tests/test_onnx/test_head.py create mode 100644 tests/test_onnx/test_neck.py create mode 100644 tests/test_onnx/utils.py create mode 100644 tests/test_runtime/async_benchmark.py create mode 100644 tests/test_runtime/test_async.py create mode 100644 tests/test_runtime/test_config.py create mode 100644 tests/test_runtime/test_eval_hook.py create mode 100644 tests/test_runtime/test_fp16.py create mode 100644 tests/test_utils/test_anchor.py create mode 100644 tests/test_utils/test_assigner.py create mode 100644 tests/test_utils/test_coder.py create mode 100644 tests/test_utils/test_general_data.py create mode 100644 tests/test_utils/test_hook.py create mode 100644 tests/test_utils/test_masks.py create mode 100644 tests/test_utils/test_misc.py create mode 100644 tests/test_utils/test_nms.py create mode 100644 tests/test_utils/test_version.py create mode 100644 tests/test_utils/test_visualization.py create mode 100644 tools/analysis_tools/analyze_logs.py create mode 100644 tools/analysis_tools/analyze_results.py create mode 100644 tools/analysis_tools/benchmark.py create mode 100644 tools/analysis_tools/coco_error_analysis.py create mode 100644 tools/analysis_tools/confusion_matrix.py create mode 100644 tools/analysis_tools/eval_metric.py create mode 100644 tools/analysis_tools/get_flops.py create mode 100644 tools/analysis_tools/optimize_anchors.py create mode 100644 tools/analysis_tools/robustness_eval.py create mode 100644 tools/analysis_tools/test_robustness.py create mode 100644 tools/dataset_converters/cityscapes.py create mode 100644 tools/dataset_converters/images2coco.py create mode 100644 tools/dataset_converters/pascal_voc.py create mode 100644 tools/deployment/mmdet2torchserve.py create mode 100644 tools/deployment/mmdet_handler.py create mode 100644 tools/deployment/onnx2tensorrt.py create mode 100644 tools/deployment/pytorch2onnx.py create mode 100644 tools/deployment/test.py create mode 100644 tools/deployment/test_torchserver.py create mode 100644 tools/dist_test.sh create mode 100644 tools/dist_train.sh create mode 100644 tools/misc/browse_dataset.py create mode 100644 tools/misc/print_config.py create mode 100644 tools/model_converters/detectron2pytorch.py create mode 100644 tools/model_converters/publish_model.py create mode 100644 tools/model_converters/regnet2mmdet.py create mode 100644 tools/model_converters/selfsup2mmdet.py create mode 100644 tools/model_converters/upgrade_model_version.py create mode 100644 tools/model_converters/upgrade_ssd_version.py create mode 100644 tools/slurm_test.sh create mode 100644 tools/slurm_train.sh create mode 100644 tools/test.py create mode 100644 tools/train.py diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000..aac9313 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,8 @@ +cff-version: 1.2.0 +message: "If you use this software, please cite it as below." +authors: + - name: "MMDetection Contributors" +title: "OpenMMLab Detection Toolbox and Benchmark" +date-released: 2018-08-22 +url: "https://github.com/open-mmlab/mmdetection" +license: Apache-2.0 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..1bfc23e --- /dev/null +++ b/LICENSE @@ -0,0 +1,203 @@ +Copyright 2018-2023 OpenMMLab. All rights reserved. + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2018-2023 OpenMMLab. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..6300b22 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,6 @@ +include requirements/*.txt +include mmdet/VERSION +include mmdet/.mim/model-index.yml +include mmdet/.mim/demo/*/* +recursive-include mmdet/.mim/configs *.py *.yml +recursive-include mmdet/.mim/tools *.sh *.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..78044b0 --- /dev/null +++ b/README.md @@ -0,0 +1,75 @@ +## Introduction + +This is an unofficial replication of "Pix2seq: A Language Modeling Framework for Object Detection" with pretrained model on mmdetection. + +## License + +This project is released under the [Apache 2.0 license](LICENSE). + +## Installation + +Please refer to [get_started.md](docs/get_started.md) for installation. + +## Train & Evaluation + +Train by running (about 10 days with 8*V100 32GB) +```bash +python -m torch.distributed.launch --nproc_per_node=8 --master_port=5003 \ + tools/train.py configs/pix2seq/pix2seq_r50_8x4_50e_coco.py --work-dir pix2seq-output --gpus 8 --launcher pytorch +``` + +or + +Download [pretrained pix2seq weights](https://drive.google.com/file/d/1Ku8ZORiLtMs66uleS3aXId7pxlJrTK9d/view?usp=sharing). + +Evaluate with single gpu: +```bash +python tools/test.py configs/pix2seq/pix2seq_r50_8x4_300_coco.py \ + weights/checkpoints.pth --work-dir pix2seq-output --eval bbox --show-dir pix2seq-vis +``` + +Evaluate with 8 gpus: +```bash +python -m torch.distributed.launch --nproc_per_node=8 --master_port=5003 \ + tools/test.py configs/pix2seq/pix2seq_r50_8x4_300_coco.py weights/checkpoints.pth \ + --work-dir pix2seq-output --eval bbox --launcher pytorch +``` + +| Method | backbone | Epoch | Batch Size | AP | AP50 | AP75 | +| :-----: | :------: | :----:| :---------:| :---:| :---: | :---: | +| Ours | R50 | 300 | 32 | 36.4 | 52.8 | 38.5 | +| Paper | R50 | 300 | 128 | 43.0 | 61.0 | 45.6 | + + +## Visualization + +![](https://github.com/Sharpiless/mmdet-Pix2Seq/blob/main/resources/007114.jpg) + +![](https://github.com/Sharpiless/mmdet-Pix2Seq/blob/main/resources/007351.jpg) + +![](https://github.com/Sharpiless/mmdet-Pix2Seq/blob/main/resources/008322.jpg) + +![](https://github.com/Sharpiless/mmdet-Pix2Seq/blob/main/resources/000000289393.jpg) + +![](https://github.com/Sharpiless/mmdet-Pix2Seq/blob/main/resources/000000212559.jpg) + +![](https://github.com/Sharpiless/mmdet-Pix2Seq/blob/main/resources/000000255664.jpg) + +## TO-DO + +- [x] random shuffle targets +- [x] training from scratch +- [x] drop class token +- [x] stochastic depth +- [x] large scale jittering +- [ ] support for custom dataset +- [x] two independent augmentations for each image +- [x] FrozenBatchNorm2d in backbones +- [x] auto-argument +- [x] nucleus sampling + +## Acknowledgement + +[https://github.com/gaopengcuhk/Pretrained-Pix2Seq](https://github.com/gaopengcuhk/Pretrained-Pix2Seq) + +[https://github.com/open-mmlab/mmdetection](https://github.com/open-mmlab/mmdetection) diff --git a/README_zh-CN.md b/README_zh-CN.md new file mode 100644 index 0000000..043e812 --- /dev/null +++ b/README_zh-CN.md @@ -0,0 +1,14 @@ +$$ +\begin{array}{l} +P(z_{n} \geq z_{n^{\prime}} ; \forall n^{\prime} \neq n \mid\{\pi_{n^{\prime}}\}_{n^{\prime}=1}^{N})\\ +=\int \prod_{n^{\prime} \neq n} e^{-e^{-(z_{n}-\pi_{n^{\prime}})}} \cdot e^{-(z_{n}-\pi_{n})-e^{-(z_{n}-\pi_{n})}} d z_{n}\\ +=\int e^{-\sum_{n^{\prime} \neq n} e^{-(z_{n}-\pi_{n})}-(z_{n}-\pi_{n})-e^{-(z_{n}-\pi_{n})}} d z_{n}\\ +=\int e^{-\sum_{n=1}^{N} e^{-(z_{n}-\pi_{n^{\prime}})}-(z_{n}-\pi_{n})} d z_{n}\\ +=\int e^{-(\sum_{n=1}^{N} e^{\pi_{n^{\prime}}}) e^{-z_{n}}-z_{n}+\pi_{n}} d z_{n}\\ +=\int e^{-e^{-z_{n}+\ln (\sum_{n=1}^{N}} e^{\pi^{\pi} n})_{-z_{n}+\pi_{n}} d z_{n}}\\ +=\int e^{-e^{-(z_{n}-\ln (\sum_{n=1}^{N}} e^{\pi_{n^{\prime}}}))}(z_{n}-\ln (\sum_{n^{\prime}=1}^{N} e^{\pi_{n^{\prime}}}))-\ln (\sum_{n^{\prime}=1}^{N} e^{\pi^{\prime}} n^{\prime})+\pi_{n} d z_{n}\\ +=e^{-\ln (\sum_{n^{\prime}}^{N} e^{e} e^{\pi_{\prime}})+\pi_{n}} \int e^{-e^{-(z_{n}-\ln (\sum_{n}^{N}=1} e^{\pi_{n^{\prime}}}))}(z_{n}-\ln (\sum_{n^{\prime}=1}^{N} e^{\pi_{n^{\prime}})} d z_{n}\\ +=\frac{e^{\pi_{n}}}{\sum_{n^{\prime}=1}^{N} e^{\pi_{n^{\prime}}}} \int e^{-e^{-(z_{n}-\ln (\sum_{n}^{N}=1} e^{\pi^{\prime}}{ }_{n}^{\prime}))}(z_{n}-\ln (\sum_{n^{\prime}=1}^{N} e^{.\pi_{n^{\prime}})}) d z_{n}\\ +=\frac{e^{\pi_{n}}}{\sum_{n=1}^{N} e^{\pi_{n^{\prime}}}} \int e^{-(z_{n}-\ln (\sum_{n=1}^{N} e^{\pi_{n^{\prime}}}))-e^{-(z_{n}-\ln (\sum_{n}^{N}=1} e^{\pi_{n^{\prime}}})} d z_{n} +\end{array} +$$ \ No newline at end of file diff --git a/configs/_base_/datasets/cityscapes_detection.py b/configs/_base_/datasets/cityscapes_detection.py new file mode 100644 index 0000000..e341b59 --- /dev/null +++ b/configs/_base_/datasets/cityscapes_detection.py @@ -0,0 +1,56 @@ +# dataset settings +dataset_type = 'CityscapesDataset' +data_root = 'data/cityscapes/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=1, + workers_per_gpu=2, + train=dict( + type='RepeatDataset', + times=8, + dataset=dict( + type=dataset_type, + ann_file=data_root + + 'annotations/instancesonly_filtered_gtFine_train.json', + img_prefix=data_root + 'leftImg8bit/train/', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + ann_file=data_root + + 'annotations/instancesonly_filtered_gtFine_val.json', + img_prefix=data_root + 'leftImg8bit/val/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + + 'annotations/instancesonly_filtered_gtFine_test.json', + img_prefix=data_root + 'leftImg8bit/test/', + pipeline=test_pipeline)) +evaluation = dict(interval=1, metric='bbox') diff --git a/configs/_base_/datasets/cityscapes_instance.py b/configs/_base_/datasets/cityscapes_instance.py new file mode 100644 index 0000000..4e3c34e --- /dev/null +++ b/configs/_base_/datasets/cityscapes_instance.py @@ -0,0 +1,56 @@ +# dataset settings +dataset_type = 'CityscapesDataset' +data_root = 'data/cityscapes/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict( + type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=1, + workers_per_gpu=2, + train=dict( + type='RepeatDataset', + times=8, + dataset=dict( + type=dataset_type, + ann_file=data_root + + 'annotations/instancesonly_filtered_gtFine_train.json', + img_prefix=data_root + 'leftImg8bit/train/', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + ann_file=data_root + + 'annotations/instancesonly_filtered_gtFine_val.json', + img_prefix=data_root + 'leftImg8bit/val/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + + 'annotations/instancesonly_filtered_gtFine_test.json', + img_prefix=data_root + 'leftImg8bit/test/', + pipeline=test_pipeline)) +evaluation = dict(metric=['bbox', 'segm']) diff --git a/configs/_base_/datasets/coco_detection.py b/configs/_base_/datasets/coco_detection.py new file mode 100644 index 0000000..149f590 --- /dev/null +++ b/configs/_base_/datasets/coco_detection.py @@ -0,0 +1,49 @@ +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) +evaluation = dict(interval=1, metric='bbox') diff --git a/configs/_base_/datasets/coco_detection_pix2seq.py b/configs/_base_/datasets/coco_detection_pix2seq.py new file mode 100644 index 0000000..32035d4 --- /dev/null +++ b/configs/_base_/datasets/coco_detection_pix2seq.py @@ -0,0 +1,68 @@ +# dataset settings +dataset_type = 'Pix2seqCocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict(type='RandomFlip', flip_ratio=0.5), + dict( + type='LargeScaleJitter', + desired_size=1333, + ratio_range=(0.3, 2.0), + keep_ratio=True, + allow_negative_crop=True), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=1), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) +] +# test_pipeline, NOTE the Pad's size_divisor is different from the default +# setting (size_divisor=32). While there is little effect on the performance +# whether we use the default setting or use size_divisor=1. +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 1333), + flip=False, + transforms=[ + dict( + type='LargeScaleJitter', + desired_size=1333, + ratio_range=(1.0, 1.0), + keep_ratio=True, + allow_negative_crop=True), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=1), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline) +) + diff --git a/configs/_base_/datasets/coco_detection_vqvae.py b/configs/_base_/datasets/coco_detection_vqvae.py new file mode 100644 index 0000000..33bfa84 --- /dev/null +++ b/configs/_base_/datasets/coco_detection_vqvae.py @@ -0,0 +1,69 @@ +# dataset settings +dataset_type = 'VQVAECocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict(type='RandomFlip', flip_ratio=0.5), + dict( + type='LargeScaleJitter', + desired_size=1333, + ratio_range=(0.3, 2.0), + keep_ratio=True, + allow_negative_crop=True), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=1), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) +] +# test_pipeline, NOTE the Pad's size_divisor is different from the default +# setting (size_divisor=32). While there is little effect on the performance +# whether we use the default setting or use size_divisor=1. +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 1333), + flip=False, + transforms=[ + dict( + type='LargeScaleJitter', + desired_size=1333, + ratio_range=(1.0, 1.0), + keep_ratio=True, + allow_negative_crop=True), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=1), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline) +) + diff --git a/configs/_base_/datasets/coco_instance.py b/configs/_base_/datasets/coco_instance.py new file mode 100644 index 0000000..9901a85 --- /dev/null +++ b/configs/_base_/datasets/coco_instance.py @@ -0,0 +1,49 @@ +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) +evaluation = dict(metric=['bbox', 'segm']) diff --git a/configs/_base_/datasets/coco_instance_semantic.py b/configs/_base_/datasets/coco_instance_semantic.py new file mode 100644 index 0000000..6c8bf07 --- /dev/null +++ b/configs/_base_/datasets/coco_instance_semantic.py @@ -0,0 +1,54 @@ +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='SegRescale', scale_factor=1 / 8), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + seg_prefix=data_root + 'stuffthingmaps/train2017/', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) +evaluation = dict(metric=['bbox', 'segm']) diff --git a/configs/_base_/datasets/coco_panoptic.py b/configs/_base_/datasets/coco_panoptic.py new file mode 100644 index 0000000..dbade7c --- /dev/null +++ b/configs/_base_/datasets/coco_panoptic.py @@ -0,0 +1,59 @@ +# dataset settings +dataset_type = 'CocoPanopticDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadPanopticAnnotations', + with_bbox=True, + with_mask=True, + with_seg=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='SegRescale', scale_factor=1 / 4), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/panoptic_train2017.json', + img_prefix=data_root + 'train2017/', + seg_prefix=data_root + 'annotations/panoptic_train2017/', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/panoptic_val2017.json', + img_prefix=data_root + 'val2017/', + seg_prefix=data_root + 'annotations/panoptic_val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/panoptic_val2017.json', + img_prefix=data_root + 'val2017/', + seg_prefix=data_root + 'annotations/panoptic_val2017/', + pipeline=test_pipeline)) +evaluation = dict(interval=1, metric=['PQ']) diff --git a/configs/_base_/datasets/deepfashion.py b/configs/_base_/datasets/deepfashion.py new file mode 100644 index 0000000..308b4b2 --- /dev/null +++ b/configs/_base_/datasets/deepfashion.py @@ -0,0 +1,53 @@ +# dataset settings +dataset_type = 'DeepFashionDataset' +data_root = 'data/DeepFashion/In-shop/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(750, 1101), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(750, 1101), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + imgs_per_gpu=2, + workers_per_gpu=1, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json', + img_prefix=data_root + 'Img/', + pipeline=train_pipeline, + data_root=data_root), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json', + img_prefix=data_root + 'Img/', + pipeline=test_pipeline, + data_root=data_root), + test=dict( + type=dataset_type, + ann_file=data_root + + 'annotations/DeepFashion_segmentation_gallery.json', + img_prefix=data_root + 'Img/', + pipeline=test_pipeline, + data_root=data_root)) +evaluation = dict(interval=5, metric=['bbox', 'segm']) diff --git a/configs/_base_/datasets/lvis_v0.5_instance.py b/configs/_base_/datasets/lvis_v0.5_instance.py new file mode 100644 index 0000000..207e005 --- /dev/null +++ b/configs/_base_/datasets/lvis_v0.5_instance.py @@ -0,0 +1,24 @@ +# dataset settings +_base_ = 'coco_instance.py' +dataset_type = 'LVISV05Dataset' +data_root = 'data/lvis_v0.5/' +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + _delete_=True, + type='ClassBalancedDataset', + oversample_thr=1e-3, + dataset=dict( + type=dataset_type, + ann_file=data_root + 'annotations/lvis_v0.5_train.json', + img_prefix=data_root + 'train2017/')), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/lvis_v0.5_val.json', + img_prefix=data_root + 'val2017/'), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/lvis_v0.5_val.json', + img_prefix=data_root + 'val2017/')) +evaluation = dict(metric=['bbox', 'segm']) diff --git a/configs/_base_/datasets/lvis_v1_instance.py b/configs/_base_/datasets/lvis_v1_instance.py new file mode 100644 index 0000000..be791ed --- /dev/null +++ b/configs/_base_/datasets/lvis_v1_instance.py @@ -0,0 +1,24 @@ +# dataset settings +_base_ = 'coco_instance.py' +dataset_type = 'LVISV1Dataset' +data_root = 'data/lvis_v1/' +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + _delete_=True, + type='ClassBalancedDataset', + oversample_thr=1e-3, + dataset=dict( + type=dataset_type, + ann_file=data_root + 'annotations/lvis_v1_train.json', + img_prefix=data_root)), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/lvis_v1_val.json', + img_prefix=data_root), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/lvis_v1_val.json', + img_prefix=data_root)) +evaluation = dict(metric=['bbox', 'segm']) diff --git a/configs/_base_/datasets/voc0712.py b/configs/_base_/datasets/voc0712.py new file mode 100644 index 0000000..ae09acd --- /dev/null +++ b/configs/_base_/datasets/voc0712.py @@ -0,0 +1,55 @@ +# dataset settings +dataset_type = 'VOCDataset' +data_root = 'data/VOCdevkit/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1000, 600), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1000, 600), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type='RepeatDataset', + times=3, + dataset=dict( + type=dataset_type, + ann_file=[ + data_root + 'VOC2007/ImageSets/Main/trainval.txt', + data_root + 'VOC2012/ImageSets/Main/trainval.txt' + ], + img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'], + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', + img_prefix=data_root + 'VOC2007/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', + img_prefix=data_root + 'VOC2007/', + pipeline=test_pipeline)) +evaluation = dict(interval=1, metric='mAP') diff --git a/configs/_base_/datasets/voc0712_pix2seq.py b/configs/_base_/datasets/voc0712_pix2seq.py new file mode 100644 index 0000000..264ec14 --- /dev/null +++ b/configs/_base_/datasets/voc0712_pix2seq.py @@ -0,0 +1,73 @@ +# dataset settings +dataset_type = 'VOCDataset' +data_root = 'data/VOCdevkit/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict(type='RandomFlip', flip_ratio=0.5), + dict( + type='LargeScaleJitter', + desired_size=1333, + ratio_range=(0.3, 2.0), + keep_ratio=True, + allow_negative_crop=True), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=1), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) +] +# test_pipeline, NOTE the Pad's size_divisor is different from the default +# setting (size_divisor=32). While there is little effect on the performance +# whether we use the default setting or use size_divisor=1. +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 1333), + flip=False, + transforms=[ + dict( + type='LargeScaleJitter', + desired_size=1333, + ratio_range=(1.0, 1.0), + keep_ratio=True, + allow_negative_crop=True), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=1), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type='RepeatDataset', + times=3, + dataset=dict( + type=dataset_type, + ann_file=[ + data_root + 'VOC2007/ImageSets/Main/trainval.txt', + data_root + 'VOC2012/ImageSets/Main/trainval.txt' + ], + img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'], + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', + img_prefix=data_root + 'VOC2007/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', + img_prefix=data_root + 'VOC2007/', + pipeline=test_pipeline)) +evaluation = dict(interval=1, metric='mAP') diff --git a/configs/_base_/datasets/wider_face.py b/configs/_base_/datasets/wider_face.py new file mode 100644 index 0000000..d1d649b --- /dev/null +++ b/configs/_base_/datasets/wider_face.py @@ -0,0 +1,63 @@ +# dataset settings +dataset_type = 'WIDERFaceDataset' +data_root = 'data/WIDERFace/' +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='Expand', + mean=img_norm_cfg['mean'], + to_rgb=img_norm_cfg['to_rgb'], + ratio_range=(1, 4)), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.3), + dict(type='Resize', img_scale=(300, 300), keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(300, 300), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=60, + workers_per_gpu=2, + train=dict( + type='RepeatDataset', + times=2, + dataset=dict( + type=dataset_type, + ann_file=data_root + 'train.txt', + img_prefix=data_root + 'WIDER_train/', + min_size=17, + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + ann_file=data_root + 'val.txt', + img_prefix=data_root + 'WIDER_val/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'val.txt', + img_prefix=data_root + 'WIDER_val/', + pipeline=test_pipeline)) diff --git a/configs/_base_/default_runtime.py b/configs/_base_/default_runtime.py new file mode 100644 index 0000000..55097c5 --- /dev/null +++ b/configs/_base_/default_runtime.py @@ -0,0 +1,16 @@ +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +custom_hooks = [dict(type='NumClassCheckHook')] + +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py b/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py new file mode 100644 index 0000000..2902cca --- /dev/null +++ b/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py @@ -0,0 +1,196 @@ +# model settings +model = dict( + type='CascadeRCNN', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + scales=[8], + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), + roi_head=dict( + type='CascadeRoIHead', + num_stages=3, + stage_loss_weights=[1, 0.5, 0.25], + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=[ + dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, + loss_weight=1.0)), + dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.05, 0.05, 0.1, 0.1]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, + loss_weight=1.0)), + dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.033, 0.033, 0.067, 0.067]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) + ], + mask_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + mask_head=dict( + type='FCNMaskHead', + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=80, + loss_mask=dict( + type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + match_low_quality=True, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=0, + pos_weight=-1, + debug=False), + rpn_proposal=dict( + nms_pre=2000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=[ + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False), + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.6, + neg_iou_thr=0.6, + min_pos_iou=0.6, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False), + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.7, + min_pos_iou=0.7, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False) + ]), + test_cfg=dict( + rpn=dict( + nms_pre=1000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100, + mask_thr_binary=0.5))) diff --git a/configs/_base_/models/cascade_rcnn_r50_fpn.py b/configs/_base_/models/cascade_rcnn_r50_fpn.py new file mode 100644 index 0000000..42f74ae --- /dev/null +++ b/configs/_base_/models/cascade_rcnn_r50_fpn.py @@ -0,0 +1,179 @@ +# model settings +model = dict( + type='CascadeRCNN', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + scales=[8], + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), + roi_head=dict( + type='CascadeRoIHead', + num_stages=3, + stage_loss_weights=[1, 0.5, 0.25], + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=[ + dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, + loss_weight=1.0)), + dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.05, 0.05, 0.1, 0.1]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, + loss_weight=1.0)), + dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.033, 0.033, 0.067, 0.067]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) + ]), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + match_low_quality=True, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=0, + pos_weight=-1, + debug=False), + rpn_proposal=dict( + nms_pre=2000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=[ + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + pos_weight=-1, + debug=False), + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.6, + neg_iou_thr=0.6, + min_pos_iou=0.6, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + pos_weight=-1, + debug=False), + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.7, + min_pos_iou=0.7, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + pos_weight=-1, + debug=False) + ]), + test_cfg=dict( + rpn=dict( + nms_pre=1000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100))) diff --git a/configs/_base_/models/fast_rcnn_r50_fpn.py b/configs/_base_/models/fast_rcnn_r50_fpn.py new file mode 100644 index 0000000..9982fe0 --- /dev/null +++ b/configs/_base_/models/fast_rcnn_r50_fpn.py @@ -0,0 +1,62 @@ +# model settings +model = dict( + type='FastRCNN', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + roi_head=dict( + type='StandardRoIHead', + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rcnn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + pos_weight=-1, + debug=False)), + test_cfg=dict( + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100))) diff --git a/configs/_base_/models/faster_rcnn_r50_caffe_c4.py b/configs/_base_/models/faster_rcnn_r50_caffe_c4.py new file mode 100644 index 0000000..51b5db4 --- /dev/null +++ b/configs/_base_/models/faster_rcnn_r50_caffe_c4.py @@ -0,0 +1,114 @@ +# model settings +norm_cfg = dict(type='BN', requires_grad=False) +model = dict( + type='FasterRCNN', + backbone=dict( + type='ResNet', + depth=50, + num_stages=3, + strides=(1, 2, 2), + dilations=(1, 1, 1), + out_indices=(2, ), + frozen_stages=1, + norm_cfg=norm_cfg, + norm_eval=True, + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet50_caffe')), + rpn_head=dict( + type='RPNHead', + in_channels=1024, + feat_channels=1024, + anchor_generator=dict( + type='AnchorGenerator', + scales=[2, 4, 8, 16, 32], + ratios=[0.5, 1.0, 2.0], + strides=[16]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + roi_head=dict( + type='StandardRoIHead', + shared_head=dict( + type='ResLayer', + depth=50, + stage=3, + stride=2, + dilation=1, + style='caffe', + norm_cfg=norm_cfg, + norm_eval=True), + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), + out_channels=1024, + featmap_strides=[16]), + bbox_head=dict( + type='BBoxHead', + with_avg_pool=True, + roi_feat_size=7, + in_channels=2048, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + match_low_quality=True, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=0, + pos_weight=-1, + debug=False), + rpn_proposal=dict( + nms_pre=12000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + pos_weight=-1, + debug=False)), + test_cfg=dict( + rpn=dict( + nms_pre=6000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100))) diff --git a/configs/_base_/models/faster_rcnn_r50_caffe_dc5.py b/configs/_base_/models/faster_rcnn_r50_caffe_dc5.py new file mode 100644 index 0000000..a377a6f --- /dev/null +++ b/configs/_base_/models/faster_rcnn_r50_caffe_dc5.py @@ -0,0 +1,105 @@ +# model settings +norm_cfg = dict(type='BN', requires_grad=False) +model = dict( + type='FasterRCNN', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + strides=(1, 2, 2, 1), + dilations=(1, 1, 1, 2), + out_indices=(3, ), + frozen_stages=1, + norm_cfg=norm_cfg, + norm_eval=True, + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet50_caffe')), + rpn_head=dict( + type='RPNHead', + in_channels=2048, + feat_channels=2048, + anchor_generator=dict( + type='AnchorGenerator', + scales=[2, 4, 8, 16, 32], + ratios=[0.5, 1.0, 2.0], + strides=[16]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + roi_head=dict( + type='StandardRoIHead', + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), + out_channels=2048, + featmap_strides=[16]), + bbox_head=dict( + type='Shared2FCBBoxHead', + in_channels=2048, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + match_low_quality=True, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=0, + pos_weight=-1, + debug=False), + rpn_proposal=dict( + nms_pre=12000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + pos_weight=-1, + debug=False)), + test_cfg=dict( + rpn=dict( + nms=dict(type='nms', iou_threshold=0.7), + nms_pre=6000, + max_per_img=1000, + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100))) diff --git a/configs/_base_/models/faster_rcnn_r50_fpn.py b/configs/_base_/models/faster_rcnn_r50_fpn.py new file mode 100644 index 0000000..1ef8e7b --- /dev/null +++ b/configs/_base_/models/faster_rcnn_r50_fpn.py @@ -0,0 +1,108 @@ +# model settings +model = dict( + type='FasterRCNN', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + scales=[8], + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + roi_head=dict( + type='StandardRoIHead', + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + match_low_quality=True, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=-1, + pos_weight=-1, + debug=False), + rpn_proposal=dict( + nms_pre=2000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + pos_weight=-1, + debug=False)), + test_cfg=dict( + rpn=dict( + nms_pre=1000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100) + # soft-nms is also supported for rcnn testing + # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05) + )) diff --git a/configs/_base_/models/mask_rcnn_r50_caffe_c4.py b/configs/_base_/models/mask_rcnn_r50_caffe_c4.py new file mode 100644 index 0000000..122202e --- /dev/null +++ b/configs/_base_/models/mask_rcnn_r50_caffe_c4.py @@ -0,0 +1,125 @@ +# model settings +norm_cfg = dict(type='BN', requires_grad=False) +model = dict( + type='MaskRCNN', + backbone=dict( + type='ResNet', + depth=50, + num_stages=3, + strides=(1, 2, 2), + dilations=(1, 1, 1), + out_indices=(2, ), + frozen_stages=1, + norm_cfg=norm_cfg, + norm_eval=True, + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet50_caffe')), + rpn_head=dict( + type='RPNHead', + in_channels=1024, + feat_channels=1024, + anchor_generator=dict( + type='AnchorGenerator', + scales=[2, 4, 8, 16, 32], + ratios=[0.5, 1.0, 2.0], + strides=[16]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + roi_head=dict( + type='StandardRoIHead', + shared_head=dict( + type='ResLayer', + depth=50, + stage=3, + stride=2, + dilation=1, + style='caffe', + norm_cfg=norm_cfg, + norm_eval=True), + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), + out_channels=1024, + featmap_strides=[16]), + bbox_head=dict( + type='BBoxHead', + with_avg_pool=True, + roi_feat_size=7, + in_channels=2048, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + mask_roi_extractor=None, + mask_head=dict( + type='FCNMaskHead', + num_convs=0, + in_channels=2048, + conv_out_channels=256, + num_classes=80, + loss_mask=dict( + type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + match_low_quality=True, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=0, + pos_weight=-1, + debug=False), + rpn_proposal=dict( + nms_pre=12000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=False, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + mask_size=14, + pos_weight=-1, + debug=False)), + test_cfg=dict( + rpn=dict( + nms_pre=6000, + nms=dict(type='nms', iou_threshold=0.7), + max_per_img=1000, + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100, + mask_thr_binary=0.5))) diff --git a/configs/_base_/models/mask_rcnn_r50_fpn.py b/configs/_base_/models/mask_rcnn_r50_fpn.py new file mode 100644 index 0000000..d903e55 --- /dev/null +++ b/configs/_base_/models/mask_rcnn_r50_fpn.py @@ -0,0 +1,120 @@ +# model settings +model = dict( + type='MaskRCNN', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + scales=[8], + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + roi_head=dict( + type='StandardRoIHead', + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + mask_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + mask_head=dict( + type='FCNMaskHead', + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=80, + loss_mask=dict( + type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + match_low_quality=True, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=-1, + pos_weight=-1, + debug=False), + rpn_proposal=dict( + nms_pre=2000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=True, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False)), + test_cfg=dict( + rpn=dict( + nms_pre=1000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100, + mask_thr_binary=0.5))) diff --git a/configs/_base_/models/retinanet_r50_fpn.py b/configs/_base_/models/retinanet_r50_fpn.py new file mode 100644 index 0000000..56e43fa --- /dev/null +++ b/configs/_base_/models/retinanet_r50_fpn.py @@ -0,0 +1,60 @@ +# model settings +model = dict( + type='RetinaNet', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs='on_input', + num_outs=5), + bbox_head=dict( + type='RetinaHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=4, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[8, 16, 32, 64, 128]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + # model training and testing settings + train_cfg=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0, + ignore_iof_thr=-1), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100)) diff --git a/configs/_base_/models/rpn_r50_caffe_c4.py b/configs/_base_/models/rpn_r50_caffe_c4.py new file mode 100644 index 0000000..8b32ca9 --- /dev/null +++ b/configs/_base_/models/rpn_r50_caffe_c4.py @@ -0,0 +1,58 @@ +# model settings +model = dict( + type='RPN', + backbone=dict( + type='ResNet', + depth=50, + num_stages=3, + strides=(1, 2, 2), + dilations=(1, 1, 1), + out_indices=(2, ), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + norm_eval=True, + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet50_caffe')), + neck=None, + rpn_head=dict( + type='RPNHead', + in_channels=1024, + feat_channels=1024, + anchor_generator=dict( + type='AnchorGenerator', + scales=[2, 4, 8, 16, 32], + ratios=[0.5, 1.0, 2.0], + strides=[16]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=0, + pos_weight=-1, + debug=False)), + test_cfg=dict( + rpn=dict( + nms_pre=12000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0))) diff --git a/configs/_base_/models/rpn_r50_fpn.py b/configs/_base_/models/rpn_r50_fpn.py new file mode 100644 index 0000000..edaf4d4 --- /dev/null +++ b/configs/_base_/models/rpn_r50_fpn.py @@ -0,0 +1,58 @@ +# model settings +model = dict( + type='RPN', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + scales=[8], + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=0, + pos_weight=-1, + debug=False)), + test_cfg=dict( + rpn=dict( + nms_pre=2000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0))) diff --git a/configs/_base_/models/ssd300.py b/configs/_base_/models/ssd300.py new file mode 100644 index 0000000..f17df01 --- /dev/null +++ b/configs/_base_/models/ssd300.py @@ -0,0 +1,56 @@ +# model settings +input_size = 300 +model = dict( + type='SingleStageDetector', + backbone=dict( + type='SSDVGG', + depth=16, + with_last_pool=False, + ceil_mode=True, + out_indices=(3, 4), + out_feature_indices=(22, 34), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://vgg16_caffe')), + neck=dict( + type='SSDNeck', + in_channels=(512, 1024), + out_channels=(512, 1024, 512, 256, 256, 256), + level_strides=(2, 2, 1, 1), + level_paddings=(1, 1, 0, 0), + l2_norm_scale=20), + bbox_head=dict( + type='SSDHead', + in_channels=(512, 1024, 512, 256, 256, 256), + num_classes=80, + anchor_generator=dict( + type='SSDAnchorGenerator', + scale_major=False, + input_size=input_size, + basesize_ratio_range=(0.15, 0.9), + strides=[8, 16, 32, 64, 100, 300], + ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.1, 0.1, 0.2, 0.2])), + # model training and testing settings + train_cfg=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0., + ignore_iof_thr=-1, + gt_max_assign_all=False), + smoothl1_beta=1., + allowed_border=-1, + pos_weight=-1, + neg_pos_ratio=3, + debug=False), + test_cfg=dict( + nms_pre=1000, + nms=dict(type='nms', iou_threshold=0.45), + min_bbox_size=0, + score_thr=0.02, + max_per_img=200)) +cudnn_benchmark = True diff --git a/configs/_base_/schedules/schedule_1x.py b/configs/_base_/schedules/schedule_1x.py new file mode 100644 index 0000000..13b3783 --- /dev/null +++ b/configs/_base_/schedules/schedule_1x.py @@ -0,0 +1,11 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[8, 11]) +runner = dict(type='EpochBasedRunner', max_epochs=12) diff --git a/configs/_base_/schedules/schedule_20e.py b/configs/_base_/schedules/schedule_20e.py new file mode 100644 index 0000000..00e8590 --- /dev/null +++ b/configs/_base_/schedules/schedule_20e.py @@ -0,0 +1,11 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[16, 19]) +runner = dict(type='EpochBasedRunner', max_epochs=20) diff --git a/configs/_base_/schedules/schedule_2x.py b/configs/_base_/schedules/schedule_2x.py new file mode 100644 index 0000000..69dc9ee --- /dev/null +++ b/configs/_base_/schedules/schedule_2x.py @@ -0,0 +1,11 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/albu_example/README.md b/configs/albu_example/README.md new file mode 100644 index 0000000..b11ae56 --- /dev/null +++ b/configs/albu_example/README.md @@ -0,0 +1,19 @@ +# Albu Example + + + +``` +@article{2018arXiv180906839B, + author = {A. Buslaev, A. Parinov, E. Khvedchenya, V.~I. Iglovikov and A.~A. Kalinin}, + title = "{Albumentations: fast and flexible image augmentations}", + journal = {ArXiv e-prints}, + eprint = {1809.06839}, + year = 2018 +} +``` + +## Results and Models + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +|:---------:|:-------:|:-------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| +| R-50 | pytorch | 1x | 4.4 | 16.6 | 38.0 | 34.5 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/albu_example/mask_rcnn_r50_fpn_albu_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/albu_example/mask_rcnn_r50_fpn_albu_1x_coco/mask_rcnn_r50_fpn_albu_1x_coco_20200208-ab203bcd.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/albu_example/mask_rcnn_r50_fpn_albu_1x_coco/mask_rcnn_r50_fpn_albu_1x_coco_20200208_225520.log.json) | diff --git a/configs/albu_example/mask_rcnn_r50_fpn_albu_1x_coco.py b/configs/albu_example/mask_rcnn_r50_fpn_albu_1x_coco.py new file mode 100644 index 0000000..b3f879a --- /dev/null +++ b/configs/albu_example/mask_rcnn_r50_fpn_albu_1x_coco.py @@ -0,0 +1,73 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +albu_train_transforms = [ + dict( + type='ShiftScaleRotate', + shift_limit=0.0625, + scale_limit=0.0, + rotate_limit=0, + interpolation=1, + p=0.5), + dict( + type='RandomBrightnessContrast', + brightness_limit=[0.1, 0.3], + contrast_limit=[0.1, 0.3], + p=0.2), + dict( + type='OneOf', + transforms=[ + dict( + type='RGBShift', + r_shift_limit=10, + g_shift_limit=10, + b_shift_limit=10, + p=1.0), + dict( + type='HueSaturationValue', + hue_shift_limit=20, + sat_shift_limit=30, + val_shift_limit=20, + p=1.0) + ], + p=0.1), + dict(type='JpegCompression', quality_lower=85, quality_upper=95, p=0.2), + dict(type='ChannelShuffle', p=0.1), + dict( + type='OneOf', + transforms=[ + dict(type='Blur', blur_limit=3, p=1.0), + dict(type='MedianBlur', blur_limit=3, p=1.0) + ], + p=0.1), +] +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='Pad', size_divisor=32), + dict( + type='Albu', + transforms=albu_train_transforms, + bbox_params=dict( + type='BboxParams', + format='pascal_voc', + label_fields=['gt_labels'], + min_visibility=0.0, + filter_lost_elements=True), + keymap={ + 'img': 'image', + 'gt_masks': 'masks', + 'gt_bboxes': 'bboxes' + }, + update_pad_shape=False, + skip_img_without_anno=True), + dict(type='Normalize', **img_norm_cfg), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'], + meta_keys=('filename', 'ori_shape', 'img_shape', 'img_norm_cfg', + 'pad_shape', 'scale_factor')) +] +data = dict(train=dict(pipeline=train_pipeline)) diff --git a/configs/atss/README.md b/configs/atss/README.md new file mode 100644 index 0000000..8aa7746 --- /dev/null +++ b/configs/atss/README.md @@ -0,0 +1,21 @@ +# Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection + +## Introduction + + + +```latex +@article{zhang2019bridging, + title = {Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection}, + author = {Zhang, Shifeng and Chi, Cheng and Yao, Yongqiang and Lei, Zhen and Li, Stan Z.}, + journal = {arXiv preprint arXiv:1912.02424}, + year = {2019} +} +``` + +## Results and Models + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:---------:|:-------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| +| R-50 | pytorch | 1x | 3.7 | 19.7 | 39.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/atss/atss_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r50_fpn_1x_coco/atss_r50_fpn_1x_coco_20200209-985f7bd0.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r50_fpn_1x_coco/atss_r50_fpn_1x_coco_20200209_102539.log.json) | +| R-101 | pytorch | 1x | 5.6 | 12.3 | 41.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/atss/atss_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r101_fpn_1x_coco/atss_r101_fpn_1x_20200825-dfcadd6f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r101_fpn_1x_coco/atss_r101_fpn_1x_20200825-dfcadd6f.log.json) | diff --git a/configs/atss/atss_r101_fpn_1x_coco.py b/configs/atss/atss_r101_fpn_1x_coco.py new file mode 100644 index 0000000..5225d2a --- /dev/null +++ b/configs/atss/atss_r101_fpn_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = './atss_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/atss/atss_r50_fpn_1x_coco.py b/configs/atss/atss_r50_fpn_1x_coco.py new file mode 100644 index 0000000..42ff4c5 --- /dev/null +++ b/configs/atss/atss_r50_fpn_1x_coco.py @@ -0,0 +1,62 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + type='ATSS', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs='on_output', + num_outs=5), + bbox_head=dict( + type='ATSSHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + octave_base_scale=8, + scales_per_octave=1, + strides=[8, 16, 32, 64, 128]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.1, 0.1, 0.2, 0.2]), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='GIoULoss', loss_weight=2.0), + loss_centerness=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), + # training and testing settings + train_cfg=dict( + assigner=dict(type='ATSSAssigner', topk=9), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.6), + max_per_img=100)) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/atss/metafile.yml b/configs/atss/metafile.yml new file mode 100644 index 0000000..f4c567e --- /dev/null +++ b/configs/atss/metafile.yml @@ -0,0 +1,60 @@ +Collections: + - Name: ATSS + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - ATSS + - FPN + - ResNet + Paper: + URL: https://arxiv.org/abs/1912.02424 + Title: 'Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection' + README: configs/atss/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/detectors/atss.py#L6 + Version: v2.0.0 + +Models: + - Name: atss_r50_fpn_1x_coco + In Collection: ATSS + Config: configs/atss/atss_r50_fpn_1x_coco.py + Metadata: + Training Memory (GB): 3.7 + inference time (ms/im): + - value: 50.76 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r50_fpn_1x_coco/atss_r50_fpn_1x_coco_20200209-985f7bd0.pth + + - Name: atss_r101_fpn_1x_coco + In Collection: ATSS + Config: configs/atss/atss_r101_fpn_1x_coco.py + Metadata: + Training Memory (GB): 5.6 + inference time (ms/im): + - value: 81.3 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r101_fpn_1x_coco/atss_r101_fpn_1x_20200825-dfcadd6f.pth diff --git a/configs/autoassign/README.md b/configs/autoassign/README.md new file mode 100644 index 0000000..4d5f376 --- /dev/null +++ b/configs/autoassign/README.md @@ -0,0 +1,25 @@ +# AutoAssign: Differentiable Label Assignment for Dense Object Detection + +## Introduction + + + +``` +@article{zhu2020autoassign, + title={AutoAssign: Differentiable Label Assignment for Dense Object Detection}, + author={Zhu, Benjin and Wang, Jianfeng and Jiang, Zhengkai and Zong, Fuhang and Liu, Songtao and Li, Zeming and Sun, Jian}, + journal={arXiv preprint arXiv:2007.03496}, + year={2020} +} +``` + +## Results and Models + +| Backbone | Style | Lr schd | Mem (GB) | box AP | Config | Download | +|:---------:|:-------:|:-------:|:--------:|:------:|:------:|:--------:| +| R-50 | caffe | 1x | 4.08 | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/autoassign/autoassign_r50_fpn_8x2_1x_coco.py) |[model](https://download.openmmlab.com/mmdetection/v2.0/autoassign/auto_assign_r50_fpn_1x_coco/auto_assign_r50_fpn_1x_coco_20210413_115540-5e17991f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/autoassign/auto_assign_r50_fpn_1x_coco/auto_assign_r50_fpn_1x_coco_20210413_115540-5e17991f.log.json) | + +**Note**: + +1. We find that the performance is unstable with 1x setting and may fluctuate by about 0.3 mAP. mAP 40.3 ~ 40.6 is acceptable. Such fluctuation can also be found in the original implementation. +2. You can get a more stable results ~ mAP 40.6 with a schedule total 13 epoch, and learning rate is divided by 10 at 10th and 13th epoch. diff --git a/configs/autoassign/autoassign_r50_fpn_8x2_1x_coco.py b/configs/autoassign/autoassign_r50_fpn_8x2_1x_coco.py new file mode 100644 index 0000000..db548dc --- /dev/null +++ b/configs/autoassign/autoassign_r50_fpn_8x2_1x_coco.py @@ -0,0 +1,85 @@ +# We follow the original implementation which +# adopts the Caffe pre-trained backbone. +_base_ = [ + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + type='AutoAssign', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + norm_eval=True, + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet50_caffe')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs=True, + num_outs=5, + relu_before_extra_convs=True, + init_cfg=dict(type='Caffe2Xavier', layer='Conv2d')), + bbox_head=dict( + type='AutoAssignHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + strides=[8, 16, 32, 64, 128], + loss_bbox=dict(type='GIoULoss', loss_weight=5.0)), + train_cfg=None, + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.6), + max_per_img=100)) +img_norm_cfg = dict( + mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict(lr=0.01, paramwise_cfg=dict(norm_decay_mult=0.)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=1000, + warmup_ratio=1.0 / 1000, + step=[8, 11]) +total_epochs = 12 diff --git a/configs/autoassign/metafile.yml b/configs/autoassign/metafile.yml new file mode 100644 index 0000000..f1e9051 --- /dev/null +++ b/configs/autoassign/metafile.yml @@ -0,0 +1,33 @@ +Collections: + - Name: AutoAssign + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - AutoAssign + - FPN + - ResNet + Paper: + URL: https://arxiv.org/abs/2007.03496 + Title: 'AutoAssign: Differentiable Label Assignment for Dense Object Detection' + README: configs/autoassign/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.12.0/mmdet/models/detectors/autoassign.py#L6 + Version: v2.12.0 + +Models: + - Name: autoassign_r50_fpn_8x2_1x_coco + In Collection: AutoAssign + Config: configs/autoassign/autoassign_r50_fpn_8x2_1x_coco.py + Metadata: + Training Memory (GB): 4.08 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/autoassign/auto_assign_r50_fpn_1x_coco/auto_assign_r50_fpn_1x_coco_20210413_115540-5e17991f.pth diff --git a/configs/carafe/README.md b/configs/carafe/README.md new file mode 100644 index 0000000..768cb98 --- /dev/null +++ b/configs/carafe/README.md @@ -0,0 +1,32 @@ +# CARAFE: Content-Aware ReAssembly of FEatures + +## Introduction + + + +We provide config files to reproduce the object detection & instance segmentation results in the ICCV 2019 Oral paper for [CARAFE: Content-Aware ReAssembly of FEatures](https://arxiv.org/abs/1905.02188). + +``` +@inproceedings{Wang_2019_ICCV, + title = {CARAFE: Content-Aware ReAssembly of FEatures}, + author = {Wang, Jiaqi and Chen, Kai and Xu, Rui and Liu, Ziwei and Loy, Chen Change and Lin, Dahua}, + booktitle = {The IEEE International Conference on Computer Vision (ICCV)}, + month = {October}, + year = {2019} +} +``` + +## Results and Models + +The results on COCO 2017 val is shown in the below table. + +| Method | Backbone | Style | Lr schd | Test Proposal Num | Inf time (fps) | Box AP | Mask AP | Config | Download | +|:--------------------:|:--------:|:-------:|:-------:|:-----------------:|:--------------:|:------:|:-------:|:------:|:--------:| +| Faster R-CNN w/ CARAFE | R-50-FPN | pytorch | 1x | 1000 | 16.5 | 38.6 | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/carafe/faster_rcnn_r50_fpn_carafe_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/carafe/faster_rcnn_r50_fpn_carafe_1x_coco/faster_rcnn_r50_fpn_carafe_1x_coco_bbox_mAP-0.386_20200504_175733-385a75b7.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/carafe/faster_rcnn_r50_fpn_carafe_1x_coco/faster_rcnn_r50_fpn_carafe_1x_coco_20200504_175733.log.json) | +| - | - | - | - | 2000 | | | | | +| Mask R-CNN w/ CARAFE | R-50-FPN | pytorch | 1x | 1000 | 14.0 | 39.3 | 35.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/carafe/mask_rcnn_r50_fpn_carafe_1x_coco/mask_rcnn_r50_fpn_carafe_1x_coco_bbox_mAP-0.393__segm_mAP-0.358_20200503_135957-8687f195.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/carafe/mask_rcnn_r50_fpn_carafe_1x_coco/mask_rcnn_r50_fpn_carafe_1x_coco_20200503_135957.log.json) | +| - | - | - | - | 2000 | | | | | + +## Implementation + +The CUDA implementation of CARAFE can be find at https://github.com/myownskyW7/CARAFE. diff --git a/configs/carafe/faster_rcnn_r50_fpn_carafe_1x_coco.py b/configs/carafe/faster_rcnn_r50_fpn_carafe_1x_coco.py new file mode 100644 index 0000000..dedac3f --- /dev/null +++ b/configs/carafe/faster_rcnn_r50_fpn_carafe_1x_coco.py @@ -0,0 +1,50 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + neck=dict( + type='FPN_CARAFE', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5, + start_level=0, + end_level=-1, + norm_cfg=None, + act_cfg=None, + order=('conv', 'norm', 'act'), + upsample_cfg=dict( + type='carafe', + up_kernel=5, + up_group=1, + encoder_kernel=3, + encoder_dilation=1, + compressed_channels=64))) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=64), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=64), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py b/configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py new file mode 100644 index 0000000..668c023 --- /dev/null +++ b/configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py @@ -0,0 +1,60 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + neck=dict( + type='FPN_CARAFE', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5, + start_level=0, + end_level=-1, + norm_cfg=None, + act_cfg=None, + order=('conv', 'norm', 'act'), + upsample_cfg=dict( + type='carafe', + up_kernel=5, + up_group=1, + encoder_kernel=3, + encoder_dilation=1, + compressed_channels=64)), + roi_head=dict( + mask_head=dict( + upsample_cfg=dict( + type='carafe', + scale_factor=2, + up_kernel=5, + up_group=1, + encoder_kernel=3, + encoder_dilation=1, + compressed_channels=64)))) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=64), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=64), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/cascade_rcnn/README.md b/configs/cascade_rcnn/README.md new file mode 100644 index 0000000..7d18ab1 --- /dev/null +++ b/configs/cascade_rcnn/README.md @@ -0,0 +1,69 @@ +# Cascade R-CNN: High Quality Object Detection and Instance Segmentation + +## Introduction + + + +```latex +@article{Cai_2019, + title={Cascade R-CNN: High Quality Object Detection and Instance Segmentation}, + ISSN={1939-3539}, + url={http://dx.doi.org/10.1109/tpami.2019.2956516}, + DOI={10.1109/tpami.2019.2956516}, + journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, + publisher={Institute of Electrical and Electronics Engineers (IEEE)}, + author={Cai, Zhaowei and Vasconcelos, Nuno}, + year={2019}, + pages={1–1} +} +``` + +## Results and models + +### Cascade R-CNN + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: |:------:|:--------:| +| R-50-FPN | caffe | 1x | 4.2 | | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_caffe_fpn_1x_coco/cascade_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.404_20200504_174853-b857be87.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_caffe_fpn_1x_coco/cascade_rcnn_r50_caffe_fpn_1x_coco_20200504_174853.log.json) | +| R-50-FPN | pytorch | 1x | 4.4 | 16.1 | 40.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco/cascade_rcnn_r50_fpn_1x_coco_20200316-3dc56deb.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco/cascade_rcnn_r50_fpn_1x_coco_20200316_214748.log.json) | +| R-50-FPN | pytorch | 20e | - | - | 41.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_r50_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_fpn_20e_coco/cascade_rcnn_r50_fpn_20e_coco_bbox_mAP-0.41_20200504_175131-e9872a90.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_fpn_20e_coco/cascade_rcnn_r50_fpn_20e_coco_20200504_175131.log.json) | +| R-101-FPN | caffe | 1x | 6.2 | | 42.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_caffe_fpn_1x_coco/cascade_rcnn_r101_caffe_fpn_1x_coco_bbox_mAP-0.423_20200504_175649-cab8dbd5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_caffe_fpn_1x_coco/cascade_rcnn_r101_caffe_fpn_1x_coco_20200504_175649.log.json) | +| R-101-FPN | pytorch | 1x | 6.4 | 13.5 | 42.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco/cascade_rcnn_r101_fpn_1x_coco_20200317-0b6a2fbf.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco/cascade_rcnn_r101_fpn_1x_coco_20200317_101744.log.json) | +| R-101-FPN | pytorch | 20e | - | - | 42.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_r101_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_fpn_20e_coco/cascade_rcnn_r101_fpn_20e_coco_bbox_mAP-0.425_20200504_231812-5057dcc5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_fpn_20e_coco/cascade_rcnn_r101_fpn_20e_coco_20200504_231812.log.json) | +| X-101-32x4d-FPN | pytorch | 1x | 7.6 | 10.9 | 43.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco/cascade_rcnn_x101_32x4d_fpn_1x_coco_20200316-95c2deb6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco/cascade_rcnn_x101_32x4d_fpn_1x_coco_20200316_055608.log.json) | +| X-101-32x4d-FPN | pytorch | 20e | 7.6 | | 43.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_20e_coco/cascade_rcnn_x101_32x4d_fpn_20e_coco_20200906_134608-9ae0a720.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_20e_coco/cascade_rcnn_x101_32x4d_fpn_20e_coco_20200906_134608.log.json) | +| X-101-64x4d-FPN | pytorch | 1x | 10.7 | | 44.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco/cascade_rcnn_x101_64x4d_fpn_1x_coco_20200515_075702-43ce6a30.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco/cascade_rcnn_x101_64x4d_fpn_1x_coco_20200515_075702.log.json) | +| X-101-64x4d-FPN | pytorch | 20e | 10.7 | | 44.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco/cascade_rcnn_x101_64x4d_fpn_20e_coco_20200509_224357-051557b1.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco/cascade_rcnn_x101_64x4d_fpn_20e_coco_20200509_224357.log.json)| + +### Cascade Mask R-CNN + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | +| R-50-FPN | caffe | 1x | 5.9 | | 41.2 | 36.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_1x_coco/cascade_mask_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.412__segm_mAP-0.36_20200504_174659-5004b251.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_1x_coco/cascade_mask_rcnn_r50_caffe_fpn_1x_coco_20200504_174659.log.json) | +| R-50-FPN | pytorch | 1x | 6.0 | 11.2 | 41.2 | 35.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco/cascade_mask_rcnn_r50_fpn_1x_coco_20200203-9d4dcb24.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco/cascade_mask_rcnn_r50_fpn_1x_coco_20200203_170449.log.json) | +| R-50-FPN | pytorch | 20e | - | - | 41.9 | 36.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco/cascade_mask_rcnn_r50_fpn_20e_coco_bbox_mAP-0.419__segm_mAP-0.365_20200504_174711-4af8e66e.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco/cascade_mask_rcnn_r50_fpn_20e_coco_20200504_174711.log.json)| +| R-101-FPN | caffe | 1x | 7.8 | | 43.2 | 37.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_1x_coco/cascade_mask_rcnn_r101_caffe_fpn_1x_coco_bbox_mAP-0.432__segm_mAP-0.376_20200504_174813-5c1e9599.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_1x_coco/cascade_mask_rcnn_r101_caffe_fpn_1x_coco_20200504_174813.log.json)| +| R-101-FPN | pytorch | 1x | 7.9 | 9.8 | 42.9 | 37.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco/cascade_mask_rcnn_r101_fpn_1x_coco_20200203-befdf6ee.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco/cascade_mask_rcnn_r101_fpn_1x_coco_20200203_092521.log.json) | +| R-101-FPN | pytorch | 20e | - | - | 43.4 | 37.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco/cascade_mask_rcnn_r101_fpn_20e_coco_bbox_mAP-0.434__segm_mAP-0.378_20200504_174836-005947da.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco/cascade_mask_rcnn_r101_fpn_20e_coco_20200504_174836.log.json)| +| X-101-32x4d-FPN | pytorch | 1x | 9.2 | 8.6 | 44.3 | 38.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco_20200201-0f411b1f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco_20200201_052416.log.json) | +| X-101-32x4d-FPN | pytorch | 20e | 9.2 | - | 45.0 | 39.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco_20200528_083917-ed1f4751.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco_20200528_083917.log.json) | +| X-101-64x4d-FPN | pytorch | 1x | 12.2 | 6.7 | 45.3 | 39.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco_20200203-9a2db89d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco_20200203_044059.log.json) | +| X-101-64x4d-FPN | pytorch | 20e | 12.2 | | 45.6 |39.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco_20200512_161033-bdb5126a.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco_20200512_161033.log.json)| + +**Notes:** + +- The `20e` schedule in Cascade (Mask) R-CNN indicates decreasing the lr at 16 and 19 epochs, with a total of 20 epochs. + +## Pre-trained Models + +We also train some models with longer schedules and multi-scale training for Cascade Mask R-CNN. The users could finetune them for downstream tasks. + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :----------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | +| R-50-FPN | caffe | 3x | 5.7 | | 44.0 | 38.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco/cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco_20210707_002651-6e29b3a6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco/cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco_20210707_002651.log.json) +| R-50-FPN | pytorch| 3x | 5.9 | | 44.3 | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_mstrain_3x_coco/cascade_mask_rcnn_r50_fpn_mstrain_3x_coco_20210628_164719-5bdc3824.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_mstrain_3x_coco/cascade_mask_rcnn_r50_fpn_mstrain_3x_coco_20210628_164719.log.json) +| R-101-FPN | caffe | 3x | 7.7 | | 45.4 | 39.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_mstrain_3x_coco/cascade_mask_rcnn_r101_caffe_fpn_mstrain_3x_coco_20210707_002620-a5bd2389.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_mstrain_3x_coco/cascade_mask_rcnn_r101_caffe_fpn_mstrain_3x_coco_20210707_002620.log.json) +| R-101-FPN | pytorch| 3x | 7.8 | | 45.5 | 39.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_mstrain_3x_coco/cascade_mask_rcnn_r101_fpn_mstrain_3x_coco_20210628_165236-51a2d363.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_mstrain_3x_coco/cascade_mask_rcnn_r101_fpn_mstrain_3x_coco_20210628_165236.log.json) +| X-101-32x4d-FPN | pytorch| 3x | 9.0 | | 46.3 | 40.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_mstrain_3x_coco/cascade_mask_rcnn_x101_32x4d_fpn_mstrain_3x_coco_20210706_225234-40773067.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_mstrain_3x_coco/cascade_mask_rcnn_x101_32x4d_fpn_mstrain_3x_coco_20210706_225234.log.json) +| X-101-32x8d-FPN | pytorch| 3x | 12.1 | | 46.1 | 39.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x8d_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x8d_fpn_mstrain_3x_coco/cascade_mask_rcnn_x101_32x8d_fpn_mstrain_3x_coco_20210719_180640-9ff7e76f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x8d_fpn_mstrain_3x_coco/cascade_mask_rcnn_x101_32x8d_fpn_mstrain_3x_coco_20210719_180640.log.json) +| X-101-64x4d-FPN | pytorch| 3x | 12.0 | | 46.6 | 40.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_mstrain_3x_coco/cascade_mask_rcnn_x101_64x4d_fpn_mstrain_3x_coco_20210719_210311-d3e64ba0.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_mstrain_3x_coco/cascade_mask_rcnn_x101_64x4d_fpn_mstrain_3x_coco_20210719_210311.log.json) diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..5ee6231 --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_1x_coco.py @@ -0,0 +1,7 @@ +_base_ = './cascade_mask_rcnn_r50_caffe_fpn_1x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet101_caffe'))) diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_mstrain_3x_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..1df87fc --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_mstrain_3x_coco.py @@ -0,0 +1,7 @@ +_base_ = './cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet101_caffe'))) diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco.py new file mode 100644 index 0000000..f59c155 --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = './cascade_mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco.py new file mode 100644 index 0000000..45ab7ed --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco.py @@ -0,0 +1,6 @@ +_base_ = './cascade_mask_rcnn_r50_fpn_20e_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_mstrain_3x_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..1b20f16 --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_mstrain_3x_coco.py @@ -0,0 +1,6 @@ +_base_ = './cascade_mask_rcnn_r50_fpn_mstrain_3x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..12d37ef --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,41 @@ +_base_ = ['./cascade_mask_rcnn_r50_fpn_1x_coco.py'] + +model = dict( + backbone=dict( + norm_cfg=dict(requires_grad=False), + norm_eval=True, + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet50_caffe'))) +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..9fb817e --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco.py @@ -0,0 +1,49 @@ +_base_ = ['./cascade_mask_rcnn_r50_fpn_mstrain_3x_coco.py'] +model = dict( + backbone=dict( + norm_cfg=dict(requires_grad=False), + norm_eval=True, + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet50_caffe'))) + +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +# In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)], +# multiscale_mode='range' +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='range', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +data = dict( + train=dict(dataset=dict(pipeline=train_pipeline)), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..49ab539 --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/cascade_mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco.py new file mode 100644 index 0000000..1296dc4 --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/cascade_mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_20e.py', '../_base_/default_runtime.py' +] diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_mstrain_3x_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..ed0c6d1 --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_mstrain_3x_coco.py @@ -0,0 +1,4 @@ +_base_ = [ + '../common/mstrain_3x_coco_instance.py', + '../_base_/models/cascade_mask_rcnn_r50_fpn.py' +] diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000..06cbbe7 --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './cascade_mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco.py new file mode 100644 index 0000000..4e35236 --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco.py @@ -0,0 +1,14 @@ +_base_ = './cascade_mask_rcnn_r50_fpn_20e_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_mstrain_3x_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..7d37d17 --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_mstrain_3x_coco.py @@ -0,0 +1,14 @@ +_base_ = './cascade_mask_rcnn_r50_fpn_mstrain_3x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x8d_fpn_mstrain_3x_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x8d_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..eeec1aa --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x8d_fpn_mstrain_3x_coco.py @@ -0,0 +1,60 @@ +_base_ = './cascade_mask_rcnn_r50_fpn_mstrain_3x_coco.py' + +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=8, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + style='pytorch', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnext101_32x8d'))) + +# ResNeXt-101-32x8d model trained with Caffe2 at FB, +# so the mean and std need to be changed. +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], + std=[57.375, 57.120, 58.395], + to_rgb=False) + +# In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)], +# multiscale_mode='range' +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='range', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +data = dict( + train=dict(dataset=dict(pipeline=train_pipeline)), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000..7dbef5f --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './cascade_mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco.py new file mode 100644 index 0000000..579b1ac --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco.py @@ -0,0 +1,14 @@ +_base_ = './cascade_mask_rcnn_r50_fpn_20e_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) diff --git a/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py b/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..ed6cf4b --- /dev/null +++ b/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py @@ -0,0 +1,14 @@ +_base_ = './cascade_mask_rcnn_r50_fpn_mstrain_3x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) diff --git a/configs/cascade_rcnn/cascade_rcnn_r101_caffe_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_rcnn_r101_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..1e90f4b --- /dev/null +++ b/configs/cascade_rcnn/cascade_rcnn_r101_caffe_fpn_1x_coco.py @@ -0,0 +1,7 @@ +_base_ = './cascade_rcnn_r50_caffe_fpn_1x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet101_caffe'))) diff --git a/configs/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco.py new file mode 100644 index 0000000..5c07776 --- /dev/null +++ b/configs/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = './cascade_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/cascade_rcnn/cascade_rcnn_r101_fpn_20e_coco.py b/configs/cascade_rcnn/cascade_rcnn_r101_fpn_20e_coco.py new file mode 100644 index 0000000..b1719c2 --- /dev/null +++ b/configs/cascade_rcnn/cascade_rcnn_r101_fpn_20e_coco.py @@ -0,0 +1,6 @@ +_base_ = './cascade_rcnn_r50_fpn_20e_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/cascade_rcnn/cascade_rcnn_r50_caffe_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_rcnn_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..696bcfb --- /dev/null +++ b/configs/cascade_rcnn/cascade_rcnn_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,42 @@ +_base_ = './cascade_rcnn_r50_fpn_1x_coco.py' + +model = dict( + backbone=dict( + norm_cfg=dict(requires_grad=False), + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet50_caffe'))) + +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..87e21fb --- /dev/null +++ b/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/cascade_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] diff --git a/configs/cascade_rcnn/cascade_rcnn_r50_fpn_20e_coco.py b/configs/cascade_rcnn/cascade_rcnn_r50_fpn_20e_coco.py new file mode 100644 index 0000000..6f886e1 --- /dev/null +++ b/configs/cascade_rcnn/cascade_rcnn_r50_fpn_20e_coco.py @@ -0,0 +1,4 @@ +_base_ = './cascade_rcnn_r50_fpn_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 19]) +runner = dict(type='EpochBasedRunner', max_epochs=20) diff --git a/configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000..5ac02c1 --- /dev/null +++ b/configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './cascade_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) diff --git a/configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_20e_coco.py b/configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_20e_coco.py new file mode 100644 index 0000000..486e45e --- /dev/null +++ b/configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_20e_coco.py @@ -0,0 +1,14 @@ +_base_ = './cascade_rcnn_r50_fpn_20e_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) diff --git a/configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco.py b/configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000..78229f0 --- /dev/null +++ b/configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,15 @@ +_base_ = './cascade_rcnn_r50_fpn_1x_coco.py' +model = dict( + type='CascadeRCNN', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) diff --git a/configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py b/configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py new file mode 100644 index 0000000..58812de --- /dev/null +++ b/configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py @@ -0,0 +1,15 @@ +_base_ = './cascade_rcnn_r50_fpn_20e_coco.py' +model = dict( + type='CascadeRCNN', + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) diff --git a/configs/cascade_rcnn/metafile.yml b/configs/cascade_rcnn/metafile.yml new file mode 100644 index 0000000..1007f2e --- /dev/null +++ b/configs/cascade_rcnn/metafile.yml @@ -0,0 +1,525 @@ +Collections: + - Name: Cascade R-CNN + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - Cascade R-CNN + - FPN + - RPN + - ResNet + - RoIAlign + Paper: + URL: http://dx.doi.org/10.1109/tpami.2019.2956516 + Title: 'Cascade R-CNN: Delving into High Quality Object Detection' + README: configs/cascade_rcnn/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/detectors/cascade_rcnn.py#L6 + Version: v2.0.0 + +Models: + - Name: cascade_rcnn_r50_caffe_fpn_1x_coco + In Collection: Cascade R-CNN + Config: configs/cascade_rcnn/cascade_rcnn_r50_caffe_fpn_1x_coco.py + Metadata: + Training Memory (GB): 4.2 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_caffe_fpn_1x_coco/cascade_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.404_20200504_174853-b857be87.pth + + - Name: cascade_rcnn_r50_fpn_1x_coco + In Collection: Cascade R-CNN + Config: configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py + Metadata: + Training Memory (GB): 4.4 + inference time (ms/im): + - value: 62.11 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco/cascade_rcnn_r50_fpn_1x_coco_20200316-3dc56deb.pth + + - Name: cascade_rcnn_r50_fpn_20e_coco + In Collection: Cascade R-CNN + Config: configs/cascade_rcnn/cascade_rcnn_r50_fpn_20e_coco.py + Metadata: + Training Memory (GB): 4.4 + inference time (ms/im): + - value: 62.11 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 20 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_fpn_20e_coco/cascade_rcnn_r50_fpn_20e_coco_bbox_mAP-0.41_20200504_175131-e9872a90.pth + + - Name: cascade_rcnn_r101_caffe_fpn_1x_coco + In Collection: Cascade R-CNN + Config: configs/cascade_rcnn/cascade_rcnn_r101_caffe_fpn_1x_coco.py + Metadata: + Training Memory (GB): 6.2 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_caffe_fpn_1x_coco/cascade_rcnn_r101_caffe_fpn_1x_coco_bbox_mAP-0.423_20200504_175649-cab8dbd5.pth + + - Name: cascade_rcnn_r101_fpn_1x_coco + In Collection: Cascade R-CNN + Config: configs/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco.py + Metadata: + Training Memory (GB): 6.4 + inference time (ms/im): + - value: 74.07 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco/cascade_rcnn_r101_fpn_1x_coco_20200317-0b6a2fbf.pth + + - Name: cascade_rcnn_r101_fpn_20e_coco + In Collection: Cascade R-CNN + Config: configs/cascade_rcnn/cascade_rcnn_r101_fpn_20e_coco.py + Metadata: + Training Memory (GB): 6.4 + inference time (ms/im): + - value: 74.07 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 20 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_fpn_20e_coco/cascade_rcnn_r101_fpn_20e_coco_bbox_mAP-0.425_20200504_231812-5057dcc5.pth + + - Name: cascade_rcnn_x101_32x4d_fpn_1x_coco + In Collection: Cascade R-CNN + Config: configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco.py + Metadata: + Training Memory (GB): 7.6 + inference time (ms/im): + - value: 91.74 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco/cascade_rcnn_x101_32x4d_fpn_1x_coco_20200316-95c2deb6.pth + + - Name: cascade_rcnn_x101_32x4d_fpn_20e_coco + In Collection: Cascade R-CNN + Config: configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_20e_coco.py + Metadata: + Training Memory (GB): 7.6 + Epochs: 20 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_20e_coco/cascade_rcnn_x101_32x4d_fpn_20e_coco_20200906_134608-9ae0a720.pth + + - Name: cascade_rcnn_x101_64x4d_fpn_1x_coco + In Collection: Cascade R-CNN + Config: configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco.py + Metadata: + Training Memory (GB): 10.7 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 44.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco/cascade_rcnn_x101_64x4d_fpn_1x_coco_20200515_075702-43ce6a30.pth + + - Name: cascade_rcnn_x101_64x4d_fpn_20e_coco + In Collection: Cascade R-CNN + Config: configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py + Metadata: + Training Memory (GB): 10.7 + Epochs: 20 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 44.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco/cascade_rcnn_x101_64x4d_fpn_20e_coco_20200509_224357-051557b1.pth + + - Name: cascade_mask_rcnn_r50_caffe_fpn_1x_coco + In Collection: Cascade R-CNN + Config: configs/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_1x_coco.py + Metadata: + Training Memory (GB): 5.9 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.2 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 36.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_1x_coco/cascade_mask_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.412__segm_mAP-0.36_20200504_174659-5004b251.pth + + - Name: cascade_mask_rcnn_r50_fpn_1x_coco + In Collection: Cascade R-CNN + Config: configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py + Metadata: + Training Memory (GB): 6.0 + inference time (ms/im): + - value: 89.29 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.2 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 35.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco/cascade_mask_rcnn_r50_fpn_1x_coco_20200203-9d4dcb24.pth + + - Name: cascade_mask_rcnn_r50_fpn_20e_coco + In Collection: Cascade R-CNN + Config: configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco.py + Metadata: + Training Memory (GB): 6.0 + inference time (ms/im): + - value: 89.29 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 20 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.9 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 36.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco/cascade_mask_rcnn_r50_fpn_20e_coco_bbox_mAP-0.419__segm_mAP-0.365_20200504_174711-4af8e66e.pth + + - Name: cascade_mask_rcnn_r101_caffe_fpn_1x_coco + In Collection: Cascade R-CNN + Config: configs/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_1x_coco.py + Metadata: + Training Memory (GB): 7.8 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.2 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_1x_coco/cascade_mask_rcnn_r101_caffe_fpn_1x_coco_bbox_mAP-0.432__segm_mAP-0.376_20200504_174813-5c1e9599.pth + + - Name: cascade_mask_rcnn_r101_fpn_1x_coco + In Collection: Cascade R-CNN + Config: configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco.py + Metadata: + Training Memory (GB): 7.9 + inference time (ms/im): + - value: 102.04 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.9 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco/cascade_mask_rcnn_r101_fpn_1x_coco_20200203-befdf6ee.pth + + - Name: cascade_mask_rcnn_r101_fpn_20e_coco + In Collection: Cascade R-CNN + Config: configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco.py + Metadata: + Training Memory (GB): 7.9 + inference time (ms/im): + - value: 102.04 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 20 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.4 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco/cascade_mask_rcnn_r101_fpn_20e_coco_bbox_mAP-0.434__segm_mAP-0.378_20200504_174836-005947da.pth + + - Name: cascade_mask_rcnn_x101_32x4d_fpn_1x_coco + In Collection: Cascade R-CNN + Config: configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py + Metadata: + Training Memory (GB): 9.2 + inference time (ms/im): + - value: 116.28 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 44.3 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco_20200201-0f411b1f.pth + + - Name: cascade_mask_rcnn_x101_32x4d_fpn_20e_coco + In Collection: Cascade R-CNN + Config: configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco.py + Metadata: + Training Memory (GB): 9.2 + inference time (ms/im): + - value: 116.28 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 20 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 45.0 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 39.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco_20200528_083917-ed1f4751.pth + + - Name: cascade_mask_rcnn_x101_64x4d_fpn_1x_coco + In Collection: Cascade R-CNN + Config: configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco.py + Metadata: + Training Memory (GB): 12.2 + inference time (ms/im): + - value: 149.25 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 45.3 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 39.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco_20200203-9a2db89d.pth + + - Name: cascade_mask_rcnn_x101_64x4d_fpn_20e_coco + In Collection: Cascade R-CNN + Config: configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco.py + Metadata: + Training Memory (GB): 12.2 + Epochs: 20 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 45.6 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 39.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco_20200512_161033-bdb5126a.pth + + - Name: cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco + In Collection: Cascade R-CNN + Config: configs/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 5.7 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 44.0 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco/cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco_20210707_002651-6e29b3a6.pth + + - Name: cascade_mask_rcnn_r50_fpn_mstrain_3x_coco + In Collection: Cascade R-CNN + Config: configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 5.9 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 44.3 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_mstrain_3x_coco/cascade_mask_rcnn_r50_fpn_mstrain_3x_coco_20210628_164719-5bdc3824.pth + + - Name: cascade_mask_rcnn_r101_caffe_fpn_mstrain_3x_coco + In Collection: Cascade R-CNN + Config: configs/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 7.7 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 45.4 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 39.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_mstrain_3x_coco/cascade_mask_rcnn_r101_caffe_fpn_mstrain_3x_coco_20210707_002620-a5bd2389.pth + + - Name: cascade_mask_rcnn_r101_fpn_mstrain_3x_coco + In Collection: Cascade R-CNN + Config: configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 7.8 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 45.5 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 39.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_mstrain_3x_coco/cascade_mask_rcnn_r101_fpn_mstrain_3x_coco_20210628_165236-51a2d363.pth + + - Name: cascade_mask_rcnn_x101_32x4d_fpn_mstrain_3x_coco + In Collection: Cascade R-CNN + Config: configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 9.0 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 46.3 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 40.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_mstrain_3x_coco/cascade_mask_rcnn_x101_32x4d_fpn_mstrain_3x_coco_20210706_225234-40773067.pth + + - Name: cascade_mask_rcnn_x101_32x8d_fpn_mstrain_3x_coco + In Collection: Cascade R-CNN + Config: configs/cascade_rcnn/cascade_mask_rcnn_x101_32x8d_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 12.1 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 46.1 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 39.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x8d_fpn_mstrain_3x_coco/cascade_mask_rcnn_x101_32x8d_fpn_mstrain_3x_coco_20210719_180640-9ff7e76f.pth + + - Name: cascade_mask_rcnn_x101_64x4d_fpn_mstrain_3x_coco + In Collection: Cascade R-CNN + Config: configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 12.0 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 46.6 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 40.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_mstrain_3x_coco/cascade_mask_rcnn_x101_64x4d_fpn_mstrain_3x_coco_20210719_210311-d3e64ba0.pth diff --git a/configs/cascade_rpn/README.md b/configs/cascade_rpn/README.md new file mode 100644 index 0000000..5e1e60a --- /dev/null +++ b/configs/cascade_rpn/README.md @@ -0,0 +1,29 @@ +# Cascade RPN + + + +We provide the code for reproducing experiment results of [Cascade RPN](https://arxiv.org/abs/1909.06720). + +``` +@inproceedings{vu2019cascade, + title={Cascade RPN: Delving into High-Quality Region Proposal Network with Adaptive Convolution}, + author={Vu, Thang and Jang, Hyunjun and Pham, Trung X and Yoo, Chang D}, + booktitle={Conference on Neural Information Processing Systems (NeurIPS)}, + year={2019} +} +``` + +## Benchmark + +### Region proposal performance + +| Method | Backbone | Style | Mem (GB) | Train time (s/iter) | Inf time (fps) | AR 1000 | Config | Download | +|:------:|:--------:|:-----:|:--------:|:-------------------:|:--------------:|:-------:|:-------:|:--------------------------------------:| +| CRPN | R-50-FPN | caffe | - | - | - | 72.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rpn/crpn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rpn/crpn_r50_caffe_fpn_1x_coco/cascade_rpn_r50_caffe_fpn_1x_coco-7aa93cef.pth) | + +### Detection performance + +| Method | Proposal | Backbone | Style | Schedule | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Config | Download | +|:-------------:|:-----------:|:--------:|:-------:|:--------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------------------------------------------:| +| Fast R-CNN | Cascade RPN | R-50-FPN | caffe | 1x | - | - | - | 39.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rpn/crpn_fast_rcnn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rpn/crpn_fast_rcnn_r50_caffe_fpn_1x_coco/crpn_fast_rcnn_r50_caffe_fpn_1x_coco-cb486e66.pth) | +| Faster R-CNN | Cascade RPN | R-50-FPN | caffe | 1x | - | - | - | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco.py) |[model](https://download.openmmlab.com/mmdetection/v2.0/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco/crpn_faster_rcnn_r50_caffe_fpn_1x_coco-c8283cca.pth) | diff --git a/configs/cascade_rpn/crpn_fast_rcnn_r50_caffe_fpn_1x_coco.py b/configs/cascade_rpn/crpn_fast_rcnn_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..29f5d07 --- /dev/null +++ b/configs/cascade_rpn/crpn_fast_rcnn_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,77 @@ +_base_ = '../fast_rcnn/fast_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + norm_eval=True, + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet50_caffe')), + roi_head=dict( + bbox_head=dict( + bbox_coder=dict(target_stds=[0.04, 0.04, 0.08, 0.08]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.5), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rcnn=dict( + assigner=dict( + pos_iou_thr=0.65, neg_iou_thr=0.65, min_pos_iou=0.65), + sampler=dict(num=256))), + test_cfg=dict(rcnn=dict(score_thr=1e-3))) +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=300), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=300), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='ToTensor', keys=['proposals']), + dict( + type='ToDataContainer', + fields=[dict(key='proposals', stack=False)]), + dict(type='Collect', keys=['img', 'proposals']), + ]) +] +data = dict( + train=dict( + proposal_file=data_root + + 'proposals/crpn_r50_caffe_fpn_1x_train2017.pkl', + pipeline=train_pipeline), + val=dict( + proposal_file=data_root + + 'proposals/crpn_r50_caffe_fpn_1x_val2017.pkl', + pipeline=test_pipeline), + test=dict( + proposal_file=data_root + + 'proposals/crpn_r50_caffe_fpn_1x_val2017.pkl', + pipeline=test_pipeline)) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco.py b/configs/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..bad86e6 --- /dev/null +++ b/configs/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,92 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py' +rpn_weight = 0.7 +model = dict( + rpn_head=dict( + _delete_=True, + type='CascadeRPNHead', + num_stages=2, + stages=[ + dict( + type='StageCascadeRPNHead', + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + scales=[8], + ratios=[1.0], + strides=[4, 8, 16, 32, 64]), + adapt_cfg=dict(type='dilation', dilation=3), + bridged_feature=True, + sampling=False, + with_cls=False, + reg_decoded_bbox=True, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=(.0, .0, .0, .0), + target_stds=(0.1, 0.1, 0.5, 0.5)), + loss_bbox=dict( + type='IoULoss', linear=True, + loss_weight=10.0 * rpn_weight)), + dict( + type='StageCascadeRPNHead', + in_channels=256, + feat_channels=256, + adapt_cfg=dict(type='offset'), + bridged_feature=False, + sampling=True, + with_cls=True, + reg_decoded_bbox=True, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=(.0, .0, .0, .0), + target_stds=(0.05, 0.05, 0.1, 0.1)), + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + loss_weight=1.0 * rpn_weight), + loss_bbox=dict( + type='IoULoss', linear=True, + loss_weight=10.0 * rpn_weight)) + ]), + roi_head=dict( + bbox_head=dict( + bbox_coder=dict(target_stds=[0.04, 0.04, 0.08, 0.08]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.5), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rpn=[ + dict( + assigner=dict( + type='RegionAssigner', center_ratio=0.2, ignore_ratio=0.5), + allowed_border=-1, + pos_weight=-1, + debug=False), + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.7, + min_pos_iou=0.3, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=-1, + pos_weight=-1, + debug=False) + ], + rpn_proposal=dict(max_per_img=300, nms=dict(iou_threshold=0.8)), + rcnn=dict( + assigner=dict( + pos_iou_thr=0.65, neg_iou_thr=0.65, min_pos_iou=0.65), + sampler=dict(type='RandomSampler', num=256))), + test_cfg=dict( + rpn=dict(max_per_img=300, nms=dict(iou_threshold=0.8)), + rcnn=dict(score_thr=1e-3))) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/cascade_rpn/crpn_r50_caffe_fpn_1x_coco.py b/configs/cascade_rpn/crpn_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..5562e69 --- /dev/null +++ b/configs/cascade_rpn/crpn_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,77 @@ +_base_ = '../rpn/rpn_r50_caffe_fpn_1x_coco.py' +model = dict( + rpn_head=dict( + _delete_=True, + type='CascadeRPNHead', + num_stages=2, + stages=[ + dict( + type='StageCascadeRPNHead', + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + scales=[8], + ratios=[1.0], + strides=[4, 8, 16, 32, 64]), + adapt_cfg=dict(type='dilation', dilation=3), + bridged_feature=True, + sampling=False, + with_cls=False, + reg_decoded_bbox=True, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=(.0, .0, .0, .0), + target_stds=(0.1, 0.1, 0.5, 0.5)), + loss_bbox=dict(type='IoULoss', linear=True, loss_weight=10.0)), + dict( + type='StageCascadeRPNHead', + in_channels=256, + feat_channels=256, + adapt_cfg=dict(type='offset'), + bridged_feature=False, + sampling=True, + with_cls=True, + reg_decoded_bbox=True, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=(.0, .0, .0, .0), + target_stds=(0.05, 0.05, 0.1, 0.1)), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, + loss_weight=1.0), + loss_bbox=dict(type='IoULoss', linear=True, loss_weight=10.0)) + ]), + train_cfg=dict(rpn=[ + dict( + assigner=dict( + type='RegionAssigner', center_ratio=0.2, ignore_ratio=0.5), + allowed_border=-1, + pos_weight=-1, + debug=False), + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.7, + min_pos_iou=0.3, + ignore_iof_thr=-1, + iou_calculator=dict(type='BboxOverlaps2D')), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=-1, + pos_weight=-1, + debug=False) + ]), + test_cfg=dict( + rpn=dict( + nms_pre=2000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.8), + min_bbox_size=0))) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/centernet/README.md b/configs/centernet/README.md new file mode 100644 index 0000000..a1fcf3b --- /dev/null +++ b/configs/centernet/README.md @@ -0,0 +1,30 @@ +# CenterNet + +## Introduction + + + +```latex +@article{zhou2019objects, + title={Objects as Points}, + author={Zhou, Xingyi and Wang, Dequan and Kr{\"a}henb{\"u}hl, Philipp}, + booktitle={arXiv preprint arXiv:1904.07850}, + year={2019} +} +``` + +## Results and models + +| Backbone | DCN | Mem (GB) | Box AP | Flip box AP| Config | Download | +| :-------------: | :--------: |:----------------: | :------: | :------------: | :----: | :----: | +| ResNet-18 | N | 3.45 | 25.9 | 27.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/centernet/centernet_resnet18_140e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_140e_coco/centernet_resnet18_140e_coco_20210705_093630-bb5b3bf7.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_140e_coco/centernet_resnet18_140e_coco_20210705_093630.log.json) | +| ResNet-18 | Y | 3.47 | 29.5 | 30.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/centernet/centernet_resnet18_dcnv2_140e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_dcnv2_140e_coco/centernet_resnet18_dcnv2_140e_coco_20210702_155131-c8cd631f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_dcnv2_140e_coco/centernet_resnet18_dcnv2_140e_coco_20210702_155131.log.json) | + +Note: + +- Flip box AP setting is single-scale and `flip=True`. +- Due to complex data enhancement, we find that the performance is unstable and may fluctuate by about 0.4 mAP. mAP 29.4 ~ 29.8 is acceptable in ResNet-18-DCNv2. +- Compared to the source code, we refer to [CenterNet-Better](https://github.com/FateScript/CenterNet-better), and make the following changes + - fix wrong image mean and variance in image normalization to be compatible with the pre-trained backbone. + - Use SGD rather than ADAM optimizer and add warmup and grad clip. + - Use DistributedDataParallel as other models in MMDetection rather than using DataParallel. diff --git a/configs/centernet/centernet_resnet18_140e_coco.py b/configs/centernet/centernet_resnet18_140e_coco.py new file mode 100644 index 0000000..52c86a5 --- /dev/null +++ b/configs/centernet/centernet_resnet18_140e_coco.py @@ -0,0 +1,3 @@ +_base_ = './centernet_resnet18_dcnv2_140e_coco.py' + +model = dict(neck=dict(use_dcn=False)) diff --git a/configs/centernet/centernet_resnet18_dcnv2_140e_coco.py b/configs/centernet/centernet_resnet18_dcnv2_140e_coco.py new file mode 100644 index 0000000..9eb1db7 --- /dev/null +++ b/configs/centernet/centernet_resnet18_dcnv2_140e_coco.py @@ -0,0 +1,122 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +model = dict( + type='CenterNet', + backbone=dict( + type='ResNet', + depth=18, + norm_eval=False, + norm_cfg=dict(type='BN'), + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18')), + neck=dict( + type='CTResNetNeck', + in_channel=512, + num_deconv_filters=(256, 128, 64), + num_deconv_kernels=(4, 4, 4), + use_dcn=True), + bbox_head=dict( + type='CenterNetHead', + num_classes=80, + in_channel=64, + feat_channel=64, + loss_center_heatmap=dict(type='GaussianFocalLoss', loss_weight=1.0), + loss_wh=dict(type='L1Loss', loss_weight=0.1), + loss_offset=dict(type='L1Loss', loss_weight=1.0)), + train_cfg=None, + test_cfg=dict(topk=100, local_maximum_kernel=3, max_per_img=100)) + +# We fixed the incorrect img_norm_cfg problem in the source code. +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True, color_type='color'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='RandomCenterCropPad', + crop_size=(512, 512), + ratios=(0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3), + mean=[0, 0, 0], + std=[1, 1, 1], + to_rgb=True, + test_pad_mode=None), + dict(type='Resize', img_scale=(512, 512), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) +] +test_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict( + type='MultiScaleFlipAug', + scale_factor=1.0, + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict( + type='RandomCenterCropPad', + ratios=None, + border=None, + mean=[0, 0, 0], + std=[1, 1, 1], + to_rgb=True, + test_mode=True, + test_pad_mode=['logical_or', 31], + test_pad_add_pix=1), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + meta_keys=('filename', 'ori_shape', 'img_shape', 'pad_shape', + 'scale_factor', 'flip', 'flip_direction', + 'img_norm_cfg', 'border'), + keys=['img']) + ]) +] + +dataset_type = 'CocoDataset' +data_root = 'data/coco/' + +# Use RepeatDataset to speed up training +data = dict( + samples_per_gpu=16, + workers_per_gpu=4, + train=dict( + _delete_=True, + type='RepeatDataset', + times=5, + dataset=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline)), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) + +# optimizer +# Based on the default settings of modern detectors, the SGD effect is better +# than the Adam in the source code, so we use SGD default settings and +# if you use adam+lr5e-4, the map is 29.1. +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) + +# learning policy +# Based on the default settings of modern detectors, we added warmup settings. +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=1000, + warmup_ratio=1.0 / 1000, + step=[18, 24]) # the real step is [18*5, 24*5] +runner = dict(max_epochs=28) # the real epoch is 28*5=140 diff --git a/configs/centernet/metafile.yml b/configs/centernet/metafile.yml new file mode 100644 index 0000000..e86e57b --- /dev/null +++ b/configs/centernet/metafile.yml @@ -0,0 +1,46 @@ +Collections: + - Name: CenterNet + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x TITANXP GPUs + Architecture: + - ResNet + Paper: + URL: https://arxiv.org/abs/1904.07850 + Title: 'Objects as Points' + README: configs/centernet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.13.0/mmdet/models/detectors/centernet.py#L10 + Version: v2.13.0 + +Models: + - Name: centernet_resnet18_dcnv2_140e_coco + In Collection: CenterNet + Config: configs/centernet/centernet_resnet18_dcnv2_140e_coco.py + Metadata: + Batch Size: 128 + Training Memory (GB): 3.47 + Epochs: 140 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 29.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_dcnv2_140e_coco/centernet_resnet18_dcnv2_140e_coco_20210702_155131-c8cd631f.pth + + - Name: centernet_resnet18_140e_coco + In Collection: CenterNet + Config: configs/centernet/centernet_resnet18_140e_coco.py + Metadata: + Batch Size: 128 + Training Memory (GB): 3.45 + Epochs: 140 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 25.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_140e_coco/centernet_resnet18_140e_coco_20210705_093630-bb5b3bf7.pth diff --git a/configs/centripetalnet/README.md b/configs/centripetalnet/README.md new file mode 100644 index 0000000..543cf06 --- /dev/null +++ b/configs/centripetalnet/README.md @@ -0,0 +1,26 @@ +# CentripetalNet + +## Introduction + + + +```latex +@InProceedings{Dong_2020_CVPR, +author = {Dong, Zhiwei and Li, Guoxuan and Liao, Yue and Wang, Fei and Ren, Pengju and Qian, Chen}, +title = {CentripetalNet: Pursuing High-Quality Keypoint Pairs for Object Detection}, +booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, +month = {June}, +year = {2020} +} +``` + +## Results and models + +| Backbone | Batch Size | Step/Total Epochs | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :--------: |:----------------: | :------: | :------------: | :----: | :------: | :--------: | +| HourglassNet-104 | [16 x 6](./centripetalnet_hourglass104_mstest_16x6_210e_coco.py) | 190/210 | 16.7 | 3.7 | 44.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco/centripetalnet_hourglass104_mstest_16x6_210e_coco_20200915_204804-3ccc61e5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco/centripetalnet_hourglass104_mstest_16x6_210e_coco_20200915_204804.log.json) | + +Note: + +- TTA setting is single-scale and `flip=True`. +- The model we released is the best checkpoint rather than the latest checkpoint (box AP 44.8 vs 44.6 in our experiment). diff --git a/configs/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco.py b/configs/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco.py new file mode 100644 index 0000000..e9c5def --- /dev/null +++ b/configs/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco.py @@ -0,0 +1,105 @@ +_base_ = [ + '../_base_/default_runtime.py', '../_base_/datasets/coco_detection.py' +] + +# model settings +model = dict( + type='CornerNet', + backbone=dict( + type='HourglassNet', + downsample_times=5, + num_stacks=2, + stage_channels=[256, 256, 384, 384, 384, 512], + stage_blocks=[2, 2, 2, 2, 2, 4], + norm_cfg=dict(type='BN', requires_grad=True)), + neck=None, + bbox_head=dict( + type='CentripetalHead', + num_classes=80, + in_channels=256, + num_feat_levels=2, + corner_emb_channels=0, + loss_heatmap=dict( + type='GaussianFocalLoss', alpha=2.0, gamma=4.0, loss_weight=1), + loss_offset=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1), + loss_guiding_shift=dict( + type='SmoothL1Loss', beta=1.0, loss_weight=0.05), + loss_centripetal_shift=dict( + type='SmoothL1Loss', beta=1.0, loss_weight=1)), + # training and testing settings + train_cfg=None, + test_cfg=dict( + corner_topk=100, + local_maximum_kernel=3, + distance_threshold=0.5, + score_thr=0.05, + max_per_img=100, + nms=dict(type='soft_nms', iou_threshold=0.5, method='gaussian'))) +# data settings +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='RandomCenterCropPad', + crop_size=(511, 511), + ratios=(0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3), + test_mode=False, + test_pad_mode=None, + **img_norm_cfg), + dict(type='Resize', img_scale=(511, 511), keep_ratio=False), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict( + type='MultiScaleFlipAug', + scale_factor=1.0, + flip=True, + transforms=[ + dict(type='Resize'), + dict( + type='RandomCenterCropPad', + crop_size=None, + ratios=None, + border=None, + test_mode=True, + test_pad_mode=['logical_or', 127], + **img_norm_cfg), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict( + type='Collect', + keys=['img'], + meta_keys=('filename', 'ori_shape', 'img_shape', 'pad_shape', + 'scale_factor', 'flip', 'img_norm_cfg', 'border')), + ]) +] +data = dict( + samples_per_gpu=6, + workers_per_gpu=3, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='Adam', lr=0.0005) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[190]) +runner = dict(type='EpochBasedRunner', max_epochs=210) diff --git a/configs/centripetalnet/metafile.yml b/configs/centripetalnet/metafile.yml new file mode 100644 index 0000000..61aed3e --- /dev/null +++ b/configs/centripetalnet/metafile.yml @@ -0,0 +1,39 @@ +Collections: + - Name: CentripetalNet + Metadata: + Training Data: COCO + Training Techniques: + - Adam + Training Resources: 16x V100 GPUs + Architecture: + - Corner Pooling + - Stacked Hourglass Network + Paper: + URL: https://arxiv.org/abs/2003.09119 + Title: 'CentripetalNet: Pursuing High-quality Keypoint Pairs for Object Detection' + README: configs/centripetalnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.5.0/mmdet/models/detectors/cornernet.py#L9 + Version: v2.5.0 + +Models: + - Name: centripetalnet_hourglass104_mstest_16x6_210e_coco + In Collection: CentripetalNet + Config: configs/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco.py + Metadata: + Batch Size: 96 + Training Memory (GB): 16.7 + inference time (ms/im): + - value: 270.27 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 210 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 44.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco/centripetalnet_hourglass104_mstest_16x6_210e_coco_20200915_204804-3ccc61e5.pth diff --git a/configs/cityscapes/README.md b/configs/cityscapes/README.md new file mode 100644 index 0000000..b790771 --- /dev/null +++ b/configs/cityscapes/README.md @@ -0,0 +1,33 @@ +# Cityscapes Dataset + + + +``` +@inproceedings{Cordts2016Cityscapes, + title={The Cityscapes Dataset for Semantic Urban Scene Understanding}, + author={Cordts, Marius and Omran, Mohamed and Ramos, Sebastian and Rehfeld, Timo and Enzweiler, Markus and Benenson, Rodrigo and Franke, Uwe and Roth, Stefan and Schiele, Bernt}, + booktitle={Proc. of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year={2016} +} +``` + +## Common settings + +- All baselines were trained using 8 GPU with a batch size of 8 (1 images per GPU) using the [linear scaling rule](https://arxiv.org/abs/1706.02677) to scale the learning rate. +- All models were trained on `cityscapes_train`, and tested on `cityscapes_val`. +- 1x training schedule indicates 64 epochs which corresponds to slightly less than the 24k iterations reported in the original schedule from the [Mask R-CNN paper](https://arxiv.org/abs/1703.06870) +- COCO pre-trained weights are used to initialize. +- A conversion [script](../../tools/dataset_converters/cityscapes.py) is provided to convert Cityscapes into COCO format. Please refer to [install.md](../../docs/1_exist_data_model.md#prepare-datasets) for details. +- `CityscapesDataset` implemented three evaluation methods. `bbox` and `segm` are standard COCO bbox/mask AP. `cityscapes` is the cityscapes dataset official evaluation, which may be slightly higher than COCO. + +### Faster R-CNN + +| Backbone | Style | Lr schd | Scale | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-----: | :-----: | :---: | :------: | :------------: | :----: | :------: | :--------: | +| R-50-FPN | pytorch | 1x | 800-1024 | 5.2 | - | 40.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes_20200502-829424c0.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes_20200502_114915.log.json) | + +### Mask R-CNN + +| Backbone | Style | Lr schd | Scale | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------: | :------------: | :----: | :-----: | :------: | :------: | +| R-50-FPN | pytorch | 1x | 800-1024 | 5.3 | - | 40.9 | 36.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes/mask_rcnn_r50_fpn_1x_cityscapes_20201211_133733-d2858245.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes/mask_rcnn_r50_fpn_1x_cityscapes_20201211_133733.log.json) | diff --git a/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py b/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py new file mode 100644 index 0000000..c6da80c --- /dev/null +++ b/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py @@ -0,0 +1,39 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', + '../_base_/datasets/cityscapes_detection.py', + '../_base_/default_runtime.py' +] +model = dict( + backbone=dict(init_cfg=None), + roi_head=dict( + bbox_head=dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=8, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)))) +# optimizer +# lr is set for a batch size of 8 +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + # [7] yields higher performance than [6] + step=[7]) +runner = dict( + type='EpochBasedRunner', max_epochs=8) # actual epoch = 8 * 8 = 64 +log_config = dict(interval=100) +# For better, more stable performance initialize from COCO +load_from = 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth' # noqa diff --git a/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py b/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py new file mode 100644 index 0000000..679890d --- /dev/null +++ b/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py @@ -0,0 +1,46 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/cityscapes_instance.py', '../_base_/default_runtime.py' +] +model = dict( + backbone=dict(init_cfg=None), + roi_head=dict( + bbox_head=dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=8, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), + mask_head=dict( + type='FCNMaskHead', + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=8, + loss_mask=dict( + type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)))) +# optimizer +# lr is set for a batch size of 8 +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + # [7] yields higher performance than [6] + step=[7]) +runner = dict( + type='EpochBasedRunner', max_epochs=8) # actual epoch = 8 * 8 = 64 +log_config = dict(interval=100) +# For better, more stable performance initialize from COCO +load_from = 'https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth' # noqa diff --git a/configs/common/lsj_100e_coco_instance.py b/configs/common/lsj_100e_coco_instance.py new file mode 100644 index 0000000..cacf23d --- /dev/null +++ b/configs/common/lsj_100e_coco_instance.py @@ -0,0 +1,90 @@ +_base_ = '../_base_/default_runtime.py' +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +image_size = (1024, 1024) + +file_client_args = dict(backend='disk') +# comment out the code below to use different file client +# file_client_args = dict( +# backend='petrel', +# path_mapping=dict({ +# './data/': 's3://openmmlab/datasets/detection/', +# 'data/': 's3://openmmlab/datasets/detection/' +# })) + +train_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict( + type='Resize', + img_scale=image_size, + ratio_range=(0.1, 2.0), + multiscale_mode='range', + keep_ratio=True), + dict( + type='RandomCrop', + crop_type='absolute_range', + crop_size=image_size, + recompute_bbox=True, + allow_negative_crop=True), + dict(type='FilterAnnotations', min_gt_bbox_wh=(1e-2, 1e-2)), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=image_size), # padding to image_size leads 0.5+ mAP + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile', file_client_args=file_client_args), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +# Use RepeatDataset to speed up training +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type='RepeatDataset', + times=4, # simply change this from 2 to 16 for 50e - 400e training. + dataset=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) +evaluation = dict(interval=5, metric=['bbox', 'segm']) + +# optimizer assumes bs=64 +optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.00004) +optimizer_config = dict(grad_clip=None) + +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.067, + step=[22, 24]) +runner = dict(type='EpochBasedRunner', max_epochs=25) diff --git a/configs/common/mstrain-poly_3x_coco_instance.py b/configs/common/mstrain-poly_3x_coco_instance.py new file mode 100644 index 0000000..c22ed94 --- /dev/null +++ b/configs/common/mstrain-poly_3x_coco_instance.py @@ -0,0 +1,80 @@ +_base_ = '../_base_/default_runtime.py' +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +# In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)], +# multiscale_mode='range' +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', + with_bbox=True, + with_mask=True, + poly2mask=False), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='range', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +# Use RepeatDataset to speed up training +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type='RepeatDataset', + times=3, + dataset=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) +evaluation = dict(interval=1, metric=['bbox', 'segm']) + +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) + +# learning policy +# Experiments show that using step=[9, 11] has higher performance +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[9, 11]) +runner = dict(type='EpochBasedRunner', max_epochs=12) diff --git a/configs/common/mstrain_3x_coco.py b/configs/common/mstrain_3x_coco.py new file mode 100644 index 0000000..80ec8b8 --- /dev/null +++ b/configs/common/mstrain_3x_coco.py @@ -0,0 +1,76 @@ +_base_ = '../_base_/default_runtime.py' +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +# In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)], +# multiscale_mode='range' +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='range', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +# Use RepeatDataset to speed up training +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type='RepeatDataset', + times=3, + dataset=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) +evaluation = dict(interval=1, metric='bbox') + +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) + +# learning policy +# Experiments show that using step=[9, 11] has higher performance +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[9, 11]) +runner = dict(type='EpochBasedRunner', max_epochs=12) diff --git a/configs/common/mstrain_3x_coco_instance.py b/configs/common/mstrain_3x_coco_instance.py new file mode 100644 index 0000000..50f39be --- /dev/null +++ b/configs/common/mstrain_3x_coco_instance.py @@ -0,0 +1,76 @@ +_base_ = '../_base_/default_runtime.py' +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +# In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)], +# multiscale_mode='range' +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='range', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +# Use RepeatDataset to speed up training +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type='RepeatDataset', + times=3, + dataset=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) +evaluation = dict(interval=1, metric=['bbox', 'segm']) + +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) + +# learning policy +# Experiments show that using step=[9, 11] has higher performance +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[9, 11]) +runner = dict(type='EpochBasedRunner', max_epochs=12) diff --git a/configs/cornernet/README.md b/configs/cornernet/README.md new file mode 100644 index 0000000..a060196 --- /dev/null +++ b/configs/cornernet/README.md @@ -0,0 +1,33 @@ +# CornerNet + +## Introduction + + + +```latex +@inproceedings{law2018cornernet, + title={Cornernet: Detecting objects as paired keypoints}, + author={Law, Hei and Deng, Jia}, + booktitle={15th European Conference on Computer Vision, ECCV 2018}, + pages={765--781}, + year={2018}, + organization={Springer Verlag} +} +``` + +## Results and models + +| Backbone | Batch Size | Step/Total Epochs | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :--------: |:----------------: | :------: | :------------: | :----: | :------: | :--------: | +| HourglassNet-104 | [10 x 5](./cornernet_hourglass104_mstest_10x5_210e_coco.py) | 180/210 | 13.9 | 4.2 | 41.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco/cornernet_hourglass104_mstest_10x5_210e_coco_20200824_185720-5fefbf1c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco/cornernet_hourglass104_mstest_10x5_210e_coco_20200824_185720.log.json) | +| HourglassNet-104 | [8 x 6](./cornernet_hourglass104_mstest_8x6_210e_coco.py) | 180/210 | 15.9 | 4.2 | 41.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco/cornernet_hourglass104_mstest_8x6_210e_coco_20200825_150618-79b44c30.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco/cornernet_hourglass104_mstest_8x6_210e_coco_20200825_150618.log.json) | +| HourglassNet-104 | [32 x 3](./cornernet_hourglass104_mstest_32x3_210e_coco.py) | 180/210 | 9.5 | 3.9 | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco/cornernet_hourglass104_mstest_32x3_210e_coco_20200819_203110-1efaea91.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco/cornernet_hourglass104_mstest_32x3_210e_coco_20200819_203110.log.json) | + +Note: + +- TTA setting is single-scale and `flip=True`. +- Experiments with `images_per_gpu=6` are conducted on Tesla V100-SXM2-32GB, `images_per_gpu=3` are conducted on GeForce GTX 1080 Ti. +- Here are the descriptions of each experiment setting: + - 10 x 5: 10 GPUs with 5 images per gpu. This is the same setting as that reported in the original paper. + - 8 x 6: 8 GPUs with 6 images per gpu. The total batchsize is similar to paper and only need 1 node to train. + - 32 x 3: 32 GPUs with 3 images per gpu. The default setting for 1080TI and need 4 nodes to train. diff --git a/configs/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco.py b/configs/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco.py new file mode 100644 index 0000000..89f3876 --- /dev/null +++ b/configs/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco.py @@ -0,0 +1,105 @@ +_base_ = [ + '../_base_/default_runtime.py', '../_base_/datasets/coco_detection.py' +] + +# model settings +model = dict( + type='CornerNet', + backbone=dict( + type='HourglassNet', + downsample_times=5, + num_stacks=2, + stage_channels=[256, 256, 384, 384, 384, 512], + stage_blocks=[2, 2, 2, 2, 2, 4], + norm_cfg=dict(type='BN', requires_grad=True)), + neck=None, + bbox_head=dict( + type='CornerHead', + num_classes=80, + in_channels=256, + num_feat_levels=2, + corner_emb_channels=1, + loss_heatmap=dict( + type='GaussianFocalLoss', alpha=2.0, gamma=4.0, loss_weight=1), + loss_embedding=dict( + type='AssociativeEmbeddingLoss', + pull_weight=0.10, + push_weight=0.10), + loss_offset=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1)), + # training and testing settings + train_cfg=None, + test_cfg=dict( + corner_topk=100, + local_maximum_kernel=3, + distance_threshold=0.5, + score_thr=0.05, + max_per_img=100, + nms=dict(type='soft_nms', iou_threshold=0.5, method='gaussian'))) +# data settings +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='RandomCenterCropPad', + crop_size=(511, 511), + ratios=(0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3), + test_mode=False, + test_pad_mode=None, + **img_norm_cfg), + dict(type='Resize', img_scale=(511, 511), keep_ratio=False), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict( + type='MultiScaleFlipAug', + scale_factor=1.0, + flip=True, + transforms=[ + dict(type='Resize'), + dict( + type='RandomCenterCropPad', + crop_size=None, + ratios=None, + border=None, + test_mode=True, + test_pad_mode=['logical_or', 127], + **img_norm_cfg), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict( + type='Collect', + keys=['img'], + meta_keys=('filename', 'ori_shape', 'img_shape', 'pad_shape', + 'scale_factor', 'flip', 'img_norm_cfg', 'border')), + ]) +] +data = dict( + samples_per_gpu=5, + workers_per_gpu=3, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='Adam', lr=0.0005) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[180]) +runner = dict(type='EpochBasedRunner', max_epochs=210) diff --git a/configs/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco.py b/configs/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco.py new file mode 100644 index 0000000..873d598 --- /dev/null +++ b/configs/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco.py @@ -0,0 +1,105 @@ +_base_ = [ + '../_base_/default_runtime.py', '../_base_/datasets/coco_detection.py' +] + +# model settings +model = dict( + type='CornerNet', + backbone=dict( + type='HourglassNet', + downsample_times=5, + num_stacks=2, + stage_channels=[256, 256, 384, 384, 384, 512], + stage_blocks=[2, 2, 2, 2, 2, 4], + norm_cfg=dict(type='BN', requires_grad=True)), + neck=None, + bbox_head=dict( + type='CornerHead', + num_classes=80, + in_channels=256, + num_feat_levels=2, + corner_emb_channels=1, + loss_heatmap=dict( + type='GaussianFocalLoss', alpha=2.0, gamma=4.0, loss_weight=1), + loss_embedding=dict( + type='AssociativeEmbeddingLoss', + pull_weight=0.10, + push_weight=0.10), + loss_offset=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1)), + # training and testing settings + train_cfg=None, + test_cfg=dict( + corner_topk=100, + local_maximum_kernel=3, + distance_threshold=0.5, + score_thr=0.05, + max_per_img=100, + nms=dict(type='soft_nms', iou_threshold=0.5, method='gaussian'))) +# data settings +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='RandomCenterCropPad', + crop_size=(511, 511), + ratios=(0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3), + test_mode=False, + test_pad_mode=None, + **img_norm_cfg), + dict(type='Resize', img_scale=(511, 511), keep_ratio=False), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict( + type='MultiScaleFlipAug', + scale_factor=1.0, + flip=True, + transforms=[ + dict(type='Resize'), + dict( + type='RandomCenterCropPad', + crop_size=None, + ratios=None, + border=None, + test_mode=True, + test_pad_mode=['logical_or', 127], + **img_norm_cfg), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict( + type='Collect', + keys=['img'], + meta_keys=('filename', 'ori_shape', 'img_shape', 'pad_shape', + 'scale_factor', 'flip', 'img_norm_cfg', 'border')), + ]) +] +data = dict( + samples_per_gpu=3, + workers_per_gpu=3, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='Adam', lr=0.0005) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[180]) +runner = dict(type='EpochBasedRunner', max_epochs=210) diff --git a/configs/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco.py b/configs/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco.py new file mode 100644 index 0000000..ef749cc --- /dev/null +++ b/configs/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco.py @@ -0,0 +1,105 @@ +_base_ = [ + '../_base_/default_runtime.py', '../_base_/datasets/coco_detection.py' +] + +# model settings +model = dict( + type='CornerNet', + backbone=dict( + type='HourglassNet', + downsample_times=5, + num_stacks=2, + stage_channels=[256, 256, 384, 384, 384, 512], + stage_blocks=[2, 2, 2, 2, 2, 4], + norm_cfg=dict(type='BN', requires_grad=True)), + neck=None, + bbox_head=dict( + type='CornerHead', + num_classes=80, + in_channels=256, + num_feat_levels=2, + corner_emb_channels=1, + loss_heatmap=dict( + type='GaussianFocalLoss', alpha=2.0, gamma=4.0, loss_weight=1), + loss_embedding=dict( + type='AssociativeEmbeddingLoss', + pull_weight=0.10, + push_weight=0.10), + loss_offset=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1)), + # training and testing settings + train_cfg=None, + test_cfg=dict( + corner_topk=100, + local_maximum_kernel=3, + distance_threshold=0.5, + score_thr=0.05, + max_per_img=100, + nms=dict(type='soft_nms', iou_threshold=0.5, method='gaussian'))) +# data settings +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='RandomCenterCropPad', + crop_size=(511, 511), + ratios=(0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3), + test_mode=False, + test_pad_mode=None, + **img_norm_cfg), + dict(type='Resize', img_scale=(511, 511), keep_ratio=False), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict( + type='MultiScaleFlipAug', + scale_factor=1.0, + flip=True, + transforms=[ + dict(type='Resize'), + dict( + type='RandomCenterCropPad', + crop_size=None, + ratios=None, + border=None, + test_mode=True, + test_pad_mode=['logical_or', 127], + **img_norm_cfg), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict( + type='Collect', + keys=['img'], + meta_keys=('filename', 'ori_shape', 'img_shape', 'pad_shape', + 'scale_factor', 'flip', 'img_norm_cfg', 'border')), + ]) +] +data = dict( + samples_per_gpu=6, + workers_per_gpu=3, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='Adam', lr=0.0005) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[180]) +runner = dict(type='EpochBasedRunner', max_epochs=210) diff --git a/configs/cornernet/metafile.yml b/configs/cornernet/metafile.yml new file mode 100644 index 0000000..c2f6143 --- /dev/null +++ b/configs/cornernet/metafile.yml @@ -0,0 +1,83 @@ +Collections: + - Name: CornerNet + Metadata: + Training Data: COCO + Training Techniques: + - Adam + Training Resources: 8x V100 GPUs + Architecture: + - Corner Pooling + - Stacked Hourglass Network + Paper: + URL: https://arxiv.org/abs/1808.01244 + Title: 'CornerNet: Detecting Objects as Paired Keypoints' + README: configs/cornernet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.3.0/mmdet/models/detectors/cornernet.py#L9 + Version: v2.3.0 + +Models: + - Name: cornernet_hourglass104_mstest_10x5_210e_coco + In Collection: CornerNet + Config: configs/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco.py + Metadata: + Training Resources: 10x V100 GPUs + Batch Size: 50 + Training Memory (GB): 13.9 + inference time (ms/im): + - value: 238.1 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 210 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco/cornernet_hourglass104_mstest_10x5_210e_coco_20200824_185720-5fefbf1c.pth + + - Name: cornernet_hourglass104_mstest_8x6_210e_coco + In Collection: CornerNet + Config: configs/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco.py + Metadata: + Batch Size: 48 + Training Memory (GB): 15.9 + inference time (ms/im): + - value: 238.1 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 210 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco/cornernet_hourglass104_mstest_8x6_210e_coco_20200825_150618-79b44c30.pth + + - Name: cornernet_hourglass104_mstest_32x3_210e_coco + In Collection: CornerNet + Config: configs/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco.py + Metadata: + Training Resources: 32x V100 GPUs + Batch Size: 96 + Training Memory (GB): 9.5 + inference time (ms/im): + - value: 256.41 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 210 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco/cornernet_hourglass104_mstest_32x3_210e_coco_20200819_203110-1efaea91.pth diff --git a/configs/dcn/README.md b/configs/dcn/README.md new file mode 100644 index 0000000..044ff29 --- /dev/null +++ b/configs/dcn/README.md @@ -0,0 +1,52 @@ +# Deformable Convolutional Networks + +## Introduction + + + +```none +@inproceedings{dai2017deformable, + title={Deformable Convolutional Networks}, + author={Dai, Jifeng and Qi, Haozhi and Xiong, Yuwen and Li, Yi and Zhang, Guodong and Hu, Han and Wei, Yichen}, + booktitle={Proceedings of the IEEE international conference on computer vision}, + year={2017} +} +``` + + + +``` +@article{zhu2018deformable, + title={Deformable ConvNets v2: More Deformable, Better Results}, + author={Zhu, Xizhou and Hu, Han and Lin, Stephen and Dai, Jifeng}, + journal={arXiv preprint arXiv:1811.11168}, + year={2018} +} +``` + +## Results and Models + +| Backbone | Model | Style | Conv | Pool | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +|:----------------:|:------------:|:-------:|:-------------:|:------:|:-------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| +| R-50-FPN | Faster | pytorch | dconv(c3-c5) | - | 1x | 4.0 | 17.8 | 41.3 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200130-d68aed1e.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200130_212941.log.json) | +| R-50-FPN | Faster | pytorch | mdconv(c3-c5) | - | 1x | 4.1 | 17.6 | 41.4 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco_20200130-d099253b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco_20200130_222144.log.json) | +| *R-50-FPN (dg=4) | Faster | pytorch | mdconv(c3-c5) | - | 1x | 4.2 | 17.4 | 41.5 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco_20200130-01262257.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco_20200130_222058.log.json) | +| R-50-FPN | Faster | pytorch | - | dpool | 1x | 5.0 | 17.2 | 38.9 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_dpool_1x_coco/faster_rcnn_r50_fpn_dpool_1x_coco_20200307-90d3c01d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_dpool_1x_coco/faster_rcnn_r50_fpn_dpool_1x_coco_20200307_203250.log.json) | +| R-50-FPN | Faster | pytorch | - | mdpool | 1x | 5.8 | 16.6 | 38.7 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco/faster_rcnn_r50_fpn_mdpool_1x_coco_20200307-c0df27ff.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco/faster_rcnn_r50_fpn_mdpool_1x_coco_20200307_203304.log.json) | +| R-101-FPN | Faster | pytorch | dconv(c3-c5) | - | 1x | 6.0 | 12.5 | 42.7 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200203-1377f13d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200203_230019.log.json) | +| X-101-32x4d-FPN | Faster | pytorch | dconv(c3-c5) | - | 1x | 7.3 | 10.0 | 44.5 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco_20200203-4f85c69c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco_20200203_001325.log.json) | +| R-50-FPN | Mask | pytorch | dconv(c3-c5) | - | 1x | 4.5 | 15.4 | 41.8 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200203-4d9ad43b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200203_061339.log.json) | +| R-50-FPN | Mask | pytorch | mdconv(c3-c5) | - | 1x | 4.5 | 15.1 | 41.5 | 37.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco_20200203-ad97591f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco_20200203_063443.log.json) | +| R-101-FPN | Mask | pytorch | dconv(c3-c5) | - | 1x | 6.5 | 11.7 | 43.5 | 38.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200216-a71f5bce.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200216_191601.log.json) | +| R-50-FPN | Cascade | pytorch | dconv(c3-c5) | - | 1x | 4.5 | 14.6 | 43.8 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200130-2f1fca44.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200130_220843.log.json) | +| R-101-FPN | Cascade | pytorch | dconv(c3-c5) | - | 1x | 6.4 | 11.0 | 45.0 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200203-3b2f0594.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200203_224829.log.json) | +| R-50-FPN | Cascade Mask | pytorch | dconv(c3-c5) | - | 1x | 6.0 | 10.0 | 44.4 | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200202-42e767a2.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200202_010309.log.json) | +| R-101-FPN | Cascade Mask | pytorch | dconv(c3-c5) | - | 1x | 8.0 | 8.6 | 45.8 | 39.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200204-df0c5f10.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200204_134006.log.json) | +| X-101-32x4d-FPN | Cascade Mask | pytorch | dconv(c3-c5) | - | 1x | 9.2 | | 47.3 | 41.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco-e75f90c8.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco-20200606_183737.log.json) | + +**Notes:** + +- `dconv` and `mdconv` denote (modulated) deformable convolution, `c3-c5` means adding dconv in resnet stage 3 to 5. `dpool` and `mdpool` denote (modulated) deformable roi pooling. +- The dcn ops are modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch, which should be more memory efficient and slightly faster. +- (*) For R-50-FPN (dg=4), dg is short for deformable_group. This model is trained and tested on Amazon EC2 p3dn.24xlarge instance. +- **Memory, Train/Inf time is outdated.** diff --git a/configs/dcn/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..081b998 --- /dev/null +++ b/configs/dcn/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = '../cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..3b3683a --- /dev/null +++ b/configs/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = '../cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..daaa472 --- /dev/null +++ b/configs/dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = '../cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..a01df33 --- /dev/null +++ b/configs/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = '../cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..aa664bd --- /dev/null +++ b/configs/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = '../cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..f5fee7e --- /dev/null +++ b/configs/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = '../faster_rcnn/faster_rcnn_r101_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..8787088 --- /dev/null +++ b/configs/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py b/configs/dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py new file mode 100644 index 0000000..1b695f0 --- /dev/null +++ b/configs/dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py @@ -0,0 +1,12 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + roi_head=dict( + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict( + _delete_=True, + type='DeformRoIPoolPack', + output_size=7, + output_channels=256), + out_channels=256, + featmap_strides=[4, 8, 16, 32]))) diff --git a/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py b/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..d1bcf3c --- /dev/null +++ b/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco.py b/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco.py new file mode 100644 index 0000000..d0ab89c --- /dev/null +++ b/configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCNv2', deform_groups=4, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py b/configs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py new file mode 100644 index 0000000..ad7b034 --- /dev/null +++ b/configs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py @@ -0,0 +1,12 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + roi_head=dict( + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict( + _delete_=True, + type='ModulatedDeformRoIPoolPack', + output_size=7, + output_channels=256), + out_channels=256, + featmap_strides=[4, 8, 16, 32]))) diff --git a/configs/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..e3bea19 --- /dev/null +++ b/configs/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py @@ -0,0 +1,16 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) diff --git a/configs/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..cb34002 --- /dev/null +++ b/configs/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = '../mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py b/configs/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..ababe58 --- /dev/null +++ b/configs/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py b/configs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..5ca2a67 --- /dev/null +++ b/configs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/dcn/metafile.yml b/configs/dcn/metafile.yml new file mode 100644 index 0000000..4fcbce6 --- /dev/null +++ b/configs/dcn/metafile.yml @@ -0,0 +1,335 @@ +Collections: + - Name: Deformable Convolutional Networks + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - Deformable Convolution + Paper: + URL: https://arxiv.org/abs/1811.11168 + Title: 'Deformable ConvNets v2: More Deformable, Better Results' + README: configs/dcn/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/ops/dcn/deform_conv.py#L15 + Version: v2.0.0 + +Models: + - Name: faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco + In Collection: Deformable Convolutional Networks + Config: configs/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 4.0 + inference time (ms/im): + - value: 56.18 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200130-d68aed1e.pth + + - Name: faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco + In Collection: Deformable Convolutional Networks + Config: configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 4.1 + inference time (ms/im): + - value: 56.82 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco_20200130-d099253b.pth + + - Name: faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco + In Collection: Deformable Convolutional Networks + Config: configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco.py + Metadata: + Training Memory (GB): 4.2 + inference time (ms/im): + - value: 57.47 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco_20200130-01262257.pth + + - Name: faster_rcnn_r50_fpn_dpool_1x_coco + In Collection: Deformable Convolutional Networks + Config: configs/dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py + Metadata: + Training Memory (GB): 5.0 + inference time (ms/im): + - value: 58.14 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 38.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_dpool_1x_coco/faster_rcnn_r50_fpn_dpool_1x_coco_20200307-90d3c01d.pth + + - Name: faster_rcnn_r50_fpn_mdpool_1x_coco + In Collection: Deformable Convolutional Networks + Config: configs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py + Metadata: + Training Memory (GB): 5.8 + inference time (ms/im): + - value: 60.24 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 38.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco/faster_rcnn_r50_fpn_mdpool_1x_coco_20200307-c0df27ff.pth + + - Name: faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco + In Collection: Deformable Convolutional Networks + Config: configs/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 6.0 + inference time (ms/im): + - value: 80 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200203-1377f13d.pth + + - Name: faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco + In Collection: Deformable Convolutional Networks + Config: configs/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 7.3 + inference time (ms/im): + - value: 100 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 44.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco_20200203-4f85c69c.pth + + - Name: mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco + In Collection: Deformable Convolutional Networks + Config: configs/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 4.5 + inference time (ms/im): + - value: 64.94 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.8 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200203-4d9ad43b.pth + + - Name: mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco + In Collection: Deformable Convolutional Networks + Config: configs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 4.5 + inference time (ms/im): + - value: 66.23 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.5 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco_20200203-ad97591f.pth + + - Name: mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco + In Collection: Deformable Convolutional Networks + Config: configs/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 6.5 + inference time (ms/im): + - value: 85.47 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.5 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200216-a71f5bce.pth + + - Name: cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco + In Collection: Deformable Convolutional Networks + Config: configs/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 4.5 + inference time (ms/im): + - value: 68.49 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200130-2f1fca44.pth + + - Name: cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco + In Collection: Deformable Convolutional Networks + Config: configs/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 6.4 + inference time (ms/im): + - value: 90.91 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 45.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200203-3b2f0594.pth + + - Name: cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco + In Collection: Deformable Convolutional Networks + Config: configs/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 6.0 + inference time (ms/im): + - value: 100 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 44.4 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200202-42e767a2.pth + + - Name: cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco + In Collection: Deformable Convolutional Networks + Config: configs/dcn/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 8.0 + inference time (ms/im): + - value: 116.28 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 45.8 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 39.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200204-df0c5f10.pth + + - Name: cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco + In Collection: Deformable Convolutional Networks + Config: configs/dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 9.2 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 47.3 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 41.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco-e75f90c8.pth diff --git a/configs/deepfashion/README.md b/configs/deepfashion/README.md new file mode 100644 index 0000000..e21cce3 --- /dev/null +++ b/configs/deepfashion/README.md @@ -0,0 +1,56 @@ +# DeepFashion + + + +[MMFashion](https://github.com/open-mmlab/mmfashion) develops "fashion parsing and segmentation" module +based on the dataset +[DeepFashion-Inshop](https://drive.google.com/drive/folders/0B7EVK8r0v71pVDZFQXRsMDZCX1E?usp=sharing). +Its annotation follows COCO style. +To use it, you need to first download the data. Note that we only use "img_highres" in this task. +The file tree should be like this: + +```sh +mmdetection +├── mmdet +├── tools +├── configs +├── data +│ ├── DeepFashion +│ │ ├── In-shop +│ │ ├── Anno +│ │ │ ├── segmentation +│ │ │ | ├── DeepFashion_segmentation_train.json +│ │ │ | ├── DeepFashion_segmentation_query.json +│ │ │ | ├── DeepFashion_segmentation_gallery.json +│ │ │ ├── list_bbox_inshop.txt +│ │ │ ├── list_description_inshop.json +│ │ │ ├── list_item_inshop.txt +│ │ │ └── list_landmarks_inshop.txt +│ │ ├── Eval +│ │ │ └── list_eval_partition.txt +│ │ ├── Img +│ │ │ ├── img +│ │ │ │ ├──XXX.jpg +│ │ │ ├── img_highres +│ │ │ └── ├──XXX.jpg + +``` + +After that you can train the Mask RCNN r50 on DeepFashion-In-shop dataset by launching training with the `mask_rcnn_r50_fpn_1x.py` config +or creating your own config file. + +``` +@inproceedings{liuLQWTcvpr16DeepFashion, + author = {Liu, Ziwei and Luo, Ping and Qiu, Shi and Wang, Xiaogang and Tang, Xiaoou}, + title = {DeepFashion: Powering Robust Clothes Recognition and Retrieval with Rich Annotations}, + booktitle = {Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {June}, + year = {2016} +} +``` + +## Model Zoo + +| Backbone | Model type | Dataset | bbox detection Average Precision | segmentation Average Precision | Config | Download (Google) | +| :---------: | :----------: | :-----------------: | :--------------------------------: | :----------------------------: | :---------:| :-------------------------: | +| ResNet50 | Mask RCNN | DeepFashion-In-shop | 0.599 | 0.584 |[config](https://github.com/open-mmlab/mmdetection/blob/master/configs/deepfashion/mask_rcnn_r50_fpn_15e_deepfashion.py)| [model](https://download.openmmlab.com/mmdetection/v2.0/deepfashion/mask_rcnn_r50_fpn_15e_deepfashion/mask_rcnn_r50_fpn_15e_deepfashion_20200329_192752.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/deepfashion/mask_rcnn_r50_fpn_15e_deepfashion/20200329_192752.log.json) | diff --git a/configs/deepfashion/mask_rcnn_r50_fpn_15e_deepfashion.py b/configs/deepfashion/mask_rcnn_r50_fpn_15e_deepfashion.py new file mode 100644 index 0000000..c4e8638 --- /dev/null +++ b/configs/deepfashion/mask_rcnn_r50_fpn_15e_deepfashion.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/deepfashion.py', '../_base_/schedules/schedule_1x.py', + '../_base_/default_runtime.py' +] +model = dict( + roi_head=dict( + bbox_head=dict(num_classes=15), mask_head=dict(num_classes=15))) +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=15) diff --git a/configs/deformable_detr/README.md b/configs/deformable_detr/README.md new file mode 100644 index 0000000..fe68002 --- /dev/null +++ b/configs/deformable_detr/README.md @@ -0,0 +1,31 @@ +# Deformable DETR + +## Introduction + + + +We provide the config files for Deformable DETR: [Deformable DETR: Deformable Transformers for End-to-End Object Detection](https://arxiv.org/abs/2010.04159). + +``` +@inproceedings{ +zhu2021deformable, +title={Deformable DETR: Deformable Transformers for End-to-End Object Detection}, +author={Xizhou Zhu and Weijie Su and Lewei Lu and Bin Li and Xiaogang Wang and Jifeng Dai}, +booktitle={International Conference on Learning Representations}, +year={2021}, +url={https://openreview.net/forum?id=gZ9hCDWe6ke} +} +``` + +## Results and Models + +| Backbone | Model | Lr schd | box AP | Config | Download | +|:------:|:--------:|:--------------:|:------:|:------:|:--------:| +| R-50 | Deformable DETR |50e | 44.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/deformable_detr/deformable_detr_r50_16x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/deformable_detr/deformable_detr_r50_16x2_50e_coco/deformable_detr_r50_16x2_50e_coco_20210419_220030-a12b9512.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/deformable_detr/deformable_detr_r50_16x2_50e_coco/deformable_detr_r50_16x2_50e_coco_20210419_220030-a12b9512.log.json) | +| R-50 | + iterative bounding box refinement |50e | 46.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/deformable_detr/deformable_detr_refine_r50_16x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/deformable_detr/deformable_detr_refine_r50_16x2_50e_coco/deformable_detr_refine_r50_16x2_50e_coco_20210419_220503-5f5dff21.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/deformable_detr/deformable_detr_refine_r50_16x2_50e_coco/deformable_detr_refine_r50_16x2_50e_coco_20210419_220503-5f5dff21.log.json) | +| R-50 | ++ two-stage Deformable DETR |50e | 46.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/deformable_detr/deformable_detr_twostage_refine_r50_16x2_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/deformable_detr/deformable_detr_twostage_refine_r50_16x2_50e_coco/deformable_detr_twostage_refine_r50_16x2_50e_coco_20210419_220613-9d28ab72.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/deformable_detr/deformable_detr_twostage_refine_r50_16x2_50e_coco/deformable_detr_twostage_refine_r50_16x2_50e_coco_20210419_220613-9d28ab72.log.json) | + +# NOTE + +1. All models are trained with batch size 32. +2. The performance is unstable. `Deformable DETR` and `iterative bounding box refinement` may fluctuate about 0.3 mAP. `two-stage Deformable DETR` may fluctuate about 0.2 mAP. diff --git a/configs/deformable_detr/deformable_detr_r50_16x2_50e_coco.py b/configs/deformable_detr/deformable_detr_r50_16x2_50e_coco.py new file mode 100644 index 0000000..0a58d9a --- /dev/null +++ b/configs/deformable_detr/deformable_detr_r50_16x2_50e_coco.py @@ -0,0 +1,172 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py' +] +model = dict( + type='DeformableDETR', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict( + type='ChannelMapper', + in_channels=[512, 1024, 2048], + kernel_size=1, + out_channels=256, + act_cfg=None, + norm_cfg=dict(type='GN', num_groups=32), + num_outs=4), + bbox_head=dict( + type='DeformableDETRHead', + num_query=300, + num_classes=80, + in_channels=2048, + sync_cls_avg_factor=True, + as_two_stage=False, + transformer=dict( + type='DeformableDetrTransformer', + encoder=dict( + type='DetrTransformerEncoder', + num_layers=6, + transformerlayers=dict( + type='BaseTransformerLayer', + attn_cfgs=dict( + type='MultiScaleDeformableAttention', embed_dims=256), + feedforward_channels=1024, + ffn_dropout=0.1, + operation_order=('self_attn', 'norm', 'ffn', 'norm'))), + decoder=dict( + type='DeformableDetrTransformerDecoder', + num_layers=6, + return_intermediate=True, + transformerlayers=dict( + type='DetrTransformerDecoderLayer', + attn_cfgs=[ + dict( + type='MultiheadAttention', + embed_dims=256, + num_heads=8, + dropout=0.1), + dict( + type='MultiScaleDeformableAttention', + embed_dims=256) + ], + feedforward_channels=1024, + ffn_dropout=0.1, + operation_order=('self_attn', 'norm', 'cross_attn', 'norm', + 'ffn', 'norm')))), + positional_encoding=dict( + type='SinePositionalEncoding', + num_feats=128, + normalize=True, + offset=-0.5), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=2.0), + loss_bbox=dict(type='L1Loss', loss_weight=5.0), + loss_iou=dict(type='GIoULoss', loss_weight=2.0)), + # training and testing settings + train_cfg=dict( + assigner=dict( + type='HungarianAssigner', + cls_cost=dict(type='FocalLossCost', weight=2.0), + reg_cost=dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'), + iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))), + test_cfg=dict(max_per_img=100)) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different +# from the default setting in mmdet. +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict( + type='AutoAugment', + policies=[ + [ + dict( + type='Resize', + img_scale=[(480, 1333), (512, 1333), (544, 1333), + (576, 1333), (608, 1333), (640, 1333), + (672, 1333), (704, 1333), (736, 1333), + (768, 1333), (800, 1333)], + multiscale_mode='value', + keep_ratio=True) + ], + [ + dict( + type='Resize', + # The radio of all image in train dataset < 7 + # follow the original impl + img_scale=[(400, 4200), (500, 4200), (600, 4200)], + multiscale_mode='value', + keep_ratio=True), + dict( + type='RandomCrop', + crop_type='absolute_range', + crop_size=(384, 600), + allow_negative_crop=True), + dict( + type='Resize', + img_scale=[(480, 1333), (512, 1333), (544, 1333), + (576, 1333), (608, 1333), (640, 1333), + (672, 1333), (704, 1333), (736, 1333), + (768, 1333), (800, 1333)], + multiscale_mode='value', + override=True, + keep_ratio=True) + ] + ]), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=1), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) +] +# test_pipeline, NOTE the Pad's size_divisor is different from the default +# setting (size_divisor=32). While there is little effect on the performance +# whether we use the default setting or use size_divisor=1. +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=1), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict(filter_empty_gt=False, pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict( + type='AdamW', + lr=2e-4, + weight_decay=0.0001, + paramwise_cfg=dict( + custom_keys={ + 'backbone': dict(lr_mult=0.1), + 'sampling_offsets': dict(lr_mult=0.1), + 'reference_points': dict(lr_mult=0.1) + })) +optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2)) +# learning policy +lr_config = dict(policy='step', step=[40]) +runner = dict(type='EpochBasedRunner', max_epochs=50) diff --git a/configs/deformable_detr/deformable_detr_refine_r50_16x2_50e_coco.py b/configs/deformable_detr/deformable_detr_refine_r50_16x2_50e_coco.py new file mode 100644 index 0000000..01f13df --- /dev/null +++ b/configs/deformable_detr/deformable_detr_refine_r50_16x2_50e_coco.py @@ -0,0 +1,2 @@ +_base_ = 'deformable_detr_r50_16x2_50e_coco.py' +model = dict(bbox_head=dict(with_box_refine=True)) diff --git a/configs/deformable_detr/deformable_detr_twostage_refine_r50_16x2_50e_coco.py b/configs/deformable_detr/deformable_detr_twostage_refine_r50_16x2_50e_coco.py new file mode 100644 index 0000000..2aa840d --- /dev/null +++ b/configs/deformable_detr/deformable_detr_twostage_refine_r50_16x2_50e_coco.py @@ -0,0 +1,2 @@ +_base_ = 'deformable_detr_refine_r50_16x2_50e_coco.py' +model = dict(bbox_head=dict(as_two_stage=True)) diff --git a/configs/deformable_detr/metafile.yml b/configs/deformable_detr/metafile.yml new file mode 100644 index 0000000..873292d --- /dev/null +++ b/configs/deformable_detr/metafile.yml @@ -0,0 +1,56 @@ +Collections: + - Name: Deformable DETR + Metadata: + Training Data: COCO + Training Techniques: + - AdamW + - Multi Scale Train + - Gradient Clip + Training Resources: 8x V100 GPUs + Architecture: + - ResNet + - Transformer + Paper: + URL: https://openreview.net/forum?id=gZ9hCDWe6ke + Title: 'Deformable DETR: Deformable Transformers for End-to-End Object Detection' + README: configs/deformable_detr/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.12.0/mmdet/models/detectors/deformable_detr.py#L6 + Version: v2.12.0 + +Models: + - Name: deformable_detr_r50_16x2_50e_coco + In Collection: Deformable DETR + Config: configs/deformable_detr/deformable_detr_r50_16x2_50e_coco.py + Metadata: + Epochs: 50 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 44.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/deformable_detr/deformable_detr_r50_16x2_50e_coco/deformable_detr_r50_16x2_50e_coco_20210419_220030-a12b9512.pth + + - Name: deformable_detr_refine_r50_16x2_50e_coco + In Collection: Deformable DETR + Config: configs/deformable_detr/deformable_detr_refine_r50_16x2_50e_coco.py + Metadata: + Epochs: 50 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 46.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/deformable_detr/deformable_detr_refine_r50_16x2_50e_coco/deformable_detr_refine_r50_16x2_50e_coco_20210419_220503-5f5dff21.pth + + - Name: deformable_detr_twostage_refine_r50_16x2_50e_coco + In Collection: Deformable DETR + Config: configs/deformable_detr/deformable_detr_twostage_refine_r50_16x2_50e_coco.py + Metadata: + Epochs: 50 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 46.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/deformable_detr/deformable_detr_twostage_refine_r50_16x2_50e_coco/deformable_detr_twostage_refine_r50_16x2_50e_coco_20210419_220613-9d28ab72.pth diff --git a/configs/detectors/README.md b/configs/detectors/README.md new file mode 100644 index 0000000..37c151f --- /dev/null +++ b/configs/detectors/README.md @@ -0,0 +1,59 @@ +# DetectoRS + +## Introduction + + + +We provide the config files for [DetectoRS: Detecting Objects with Recursive Feature Pyramid and Switchable Atrous Convolution](https://arxiv.org/pdf/2006.02334.pdf). + +```BibTeX +@article{qiao2020detectors, + title={DetectoRS: Detecting Objects with Recursive Feature Pyramid and Switchable Atrous Convolution}, + author={Qiao, Siyuan and Chen, Liang-Chieh and Yuille, Alan}, + journal={arXiv preprint arXiv:2006.02334}, + year={2020} +} +``` + +## Dataset + +DetectoRS requires COCO and [COCO-stuff](http://calvin.inf.ed.ac.uk/wp-content/uploads/data/cocostuffdataset/stuffthingmaps_trainval2017.zip) dataset for training. You need to download and extract it in the COCO dataset path. +The directory should be like this. + +```none +mmdetection +├── mmdet +├── tools +├── configs +├── data +│ ├── coco +│ │ ├── annotations +│ │ ├── train2017 +│ │ ├── val2017 +│ │ ├── test2017 +| | ├── stuffthingmaps +``` + +## Results and Models + +DetectoRS includes two major components: + +- Recursive Feature Pyramid (RFP). +- Switchable Atrous Convolution (SAC). + +They can be used independently. +Combining them together results in DetectoRS. +The results on COCO 2017 val are shown in the below table. + +| Method | Detector | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +|:------:|:--------:|:-------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| +| RFP | Cascade + ResNet-50 | 1x | 7.5 | - | 44.8 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detectors/cascade_rcnn_r50_rfp_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/detectors/cascade_rcnn_r50_rfp_1x_coco/cascade_rcnn_r50_rfp_1x_coco-8cf51bfd.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/detectors/cascade_rcnn_r50_rfp_1x_coco/cascade_rcnn_r50_rfp_1x_coco_20200624_104126.log.json) | +| SAC | Cascade + ResNet-50 | 1x | 5.6 | - | 45.0| | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detectors/cascade_rcnn_r50_sac_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/detectors/cascade_rcnn_r50_sac_1x_coco/cascade_rcnn_r50_sac_1x_coco-24bfda62.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/detectors/cascade_rcnn_r50_sac_1x_coco/cascade_rcnn_r50_sac_1x_coco_20200624_104402.log.json) | +| DetectoRS | Cascade + ResNet-50 | 1x | 9.9 | - | 47.4 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detectors/detectors_cascade_rcnn_r50_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/detectors/detectors_cascade_rcnn_r50_1x_coco/detectors_cascade_rcnn_r50_1x_coco-32a10ba0.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/detectors/detectors_cascade_rcnn_r50_1x_coco/detectors_cascade_rcnn_r50_1x_coco_20200706_001203.log.json) | +| RFP | HTC + ResNet-50 | 1x | 11.2 | - | 46.6 | 40.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detectors/htc_r50_rfp_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/detectors/htc_r50_rfp_1x_coco/htc_r50_rfp_1x_coco-8ff87c51.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/detectors/htc_r50_rfp_1x_coco/htc_r50_rfp_1x_coco_20200624_103053.log.json) | +| SAC | HTC + ResNet-50 | 1x | 9.3 | - | 46.4 | 40.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detectors/htc_r50_sac_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/detectors/htc_r50_sac_1x_coco/htc_r50_sac_1x_coco-bfa60c54.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/detectors/htc_r50_sac_1x_coco/htc_r50_sac_1x_coco_20200624_103111.log.json) | +| DetectoRS | HTC + ResNet-50 | 1x | 13.6 | - | 49.1 | 42.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detectors/detectors_htc_r50_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/detectors/detectors_htc_r50_1x_coco/detectors_htc_r50_1x_coco-329b1453.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/detectors/detectors_htc_r50_1x_coco/detectors_htc_r50_1x_coco_20200624_103659.log.json) | +| DetectoRS | HTC + ResNet-101 | 20e | 19.6 | | 50.5 | 43.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detectors/detectors_htc_r101_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/detectors/detectors_htc_r101_20e_coco/detectors_htc_r101_20e_coco_20210419_203638-348d533b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/detectors/detectors_htc_r101_20e_coco/detectors_htc_r101_20e_coco_20210419_203638.log.json) | + +*Note*: This is a re-implementation based on MMDetection-V2. +The original implementation is based on MMDetection-V1. diff --git a/configs/detectors/cascade_rcnn_r50_rfp_1x_coco.py b/configs/detectors/cascade_rcnn_r50_rfp_1x_coco.py new file mode 100644 index 0000000..4430d8a --- /dev/null +++ b/configs/detectors/cascade_rcnn_r50_rfp_1x_coco.py @@ -0,0 +1,28 @@ +_base_ = [ + '../_base_/models/cascade_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +model = dict( + backbone=dict( + type='DetectoRS_ResNet', + conv_cfg=dict(type='ConvAWS'), + output_img=True), + neck=dict( + type='RFP', + rfp_steps=2, + aspp_out_channels=64, + aspp_dilations=(1, 3, 6, 1), + rfp_backbone=dict( + rfp_inplanes=256, + type='DetectoRS_ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + conv_cfg=dict(type='ConvAWS'), + pretrained='torchvision://resnet50', + style='pytorch'))) diff --git a/configs/detectors/cascade_rcnn_r50_sac_1x_coco.py b/configs/detectors/cascade_rcnn_r50_sac_1x_coco.py new file mode 100644 index 0000000..ccd9319 --- /dev/null +++ b/configs/detectors/cascade_rcnn_r50_sac_1x_coco.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/cascade_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +model = dict( + backbone=dict( + type='DetectoRS_ResNet', + conv_cfg=dict(type='ConvAWS'), + sac=dict(type='SAC', use_deform=True), + stage_with_sac=(False, True, True, True))) diff --git a/configs/detectors/detectors_cascade_rcnn_r50_1x_coco.py b/configs/detectors/detectors_cascade_rcnn_r50_1x_coco.py new file mode 100644 index 0000000..f760404 --- /dev/null +++ b/configs/detectors/detectors_cascade_rcnn_r50_1x_coco.py @@ -0,0 +1,32 @@ +_base_ = [ + '../_base_/models/cascade_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +model = dict( + backbone=dict( + type='DetectoRS_ResNet', + conv_cfg=dict(type='ConvAWS'), + sac=dict(type='SAC', use_deform=True), + stage_with_sac=(False, True, True, True), + output_img=True), + neck=dict( + type='RFP', + rfp_steps=2, + aspp_out_channels=64, + aspp_dilations=(1, 3, 6, 1), + rfp_backbone=dict( + rfp_inplanes=256, + type='DetectoRS_ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + conv_cfg=dict(type='ConvAWS'), + sac=dict(type='SAC', use_deform=True), + stage_with_sac=(False, True, True, True), + pretrained='torchvision://resnet50', + style='pytorch'))) diff --git a/configs/detectors/detectors_htc_r101_20e_coco.py b/configs/detectors/detectors_htc_r101_20e_coco.py new file mode 100644 index 0000000..93d7d2b --- /dev/null +++ b/configs/detectors/detectors_htc_r101_20e_coco.py @@ -0,0 +1,28 @@ +_base_ = '../htc/htc_r101_fpn_20e_coco.py' + +model = dict( + backbone=dict( + type='DetectoRS_ResNet', + conv_cfg=dict(type='ConvAWS'), + sac=dict(type='SAC', use_deform=True), + stage_with_sac=(False, True, True, True), + output_img=True), + neck=dict( + type='RFP', + rfp_steps=2, + aspp_out_channels=64, + aspp_dilations=(1, 3, 6, 1), + rfp_backbone=dict( + rfp_inplanes=256, + type='DetectoRS_ResNet', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + conv_cfg=dict(type='ConvAWS'), + sac=dict(type='SAC', use_deform=True), + stage_with_sac=(False, True, True, True), + pretrained='torchvision://resnet101', + style='pytorch'))) diff --git a/configs/detectors/detectors_htc_r50_1x_coco.py b/configs/detectors/detectors_htc_r50_1x_coco.py new file mode 100644 index 0000000..0d2fc4f --- /dev/null +++ b/configs/detectors/detectors_htc_r50_1x_coco.py @@ -0,0 +1,28 @@ +_base_ = '../htc/htc_r50_fpn_1x_coco.py' + +model = dict( + backbone=dict( + type='DetectoRS_ResNet', + conv_cfg=dict(type='ConvAWS'), + sac=dict(type='SAC', use_deform=True), + stage_with_sac=(False, True, True, True), + output_img=True), + neck=dict( + type='RFP', + rfp_steps=2, + aspp_out_channels=64, + aspp_dilations=(1, 3, 6, 1), + rfp_backbone=dict( + rfp_inplanes=256, + type='DetectoRS_ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + conv_cfg=dict(type='ConvAWS'), + sac=dict(type='SAC', use_deform=True), + stage_with_sac=(False, True, True, True), + pretrained='torchvision://resnet50', + style='pytorch'))) diff --git a/configs/detectors/htc_r50_rfp_1x_coco.py b/configs/detectors/htc_r50_rfp_1x_coco.py new file mode 100644 index 0000000..496104e --- /dev/null +++ b/configs/detectors/htc_r50_rfp_1x_coco.py @@ -0,0 +1,24 @@ +_base_ = '../htc/htc_r50_fpn_1x_coco.py' + +model = dict( + backbone=dict( + type='DetectoRS_ResNet', + conv_cfg=dict(type='ConvAWS'), + output_img=True), + neck=dict( + type='RFP', + rfp_steps=2, + aspp_out_channels=64, + aspp_dilations=(1, 3, 6, 1), + rfp_backbone=dict( + rfp_inplanes=256, + type='DetectoRS_ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + conv_cfg=dict(type='ConvAWS'), + pretrained='torchvision://resnet50', + style='pytorch'))) diff --git a/configs/detectors/htc_r50_sac_1x_coco.py b/configs/detectors/htc_r50_sac_1x_coco.py new file mode 100644 index 0000000..72d4db9 --- /dev/null +++ b/configs/detectors/htc_r50_sac_1x_coco.py @@ -0,0 +1,8 @@ +_base_ = '../htc/htc_r50_fpn_1x_coco.py' + +model = dict( + backbone=dict( + type='DetectoRS_ResNet', + conv_cfg=dict(type='ConvAWS'), + sac=dict(type='SAC', use_deform=True), + stage_with_sac=(False, True, True, True))) diff --git a/configs/detectors/metafile.yml b/configs/detectors/metafile.yml new file mode 100644 index 0000000..4bed569 --- /dev/null +++ b/configs/detectors/metafile.yml @@ -0,0 +1,114 @@ +Collections: + - Name: DetectoRS + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - ASPP + - FPN + - RFP + - RPN + - ResNet + - RoIAlign + - SAC + Paper: + URL: https://arxiv.org/abs/2006.02334 + Title: 'DetectoRS: Detecting Objects with Recursive Feature Pyramid and Switchable Atrous Convolution' + README: configs/detectors/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.2.0/mmdet/models/backbones/detectors_resnet.py#L205 + Version: v2.2.0 + +Models: + - Name: cascade_rcnn_r50_rfp_1x_coco + In Collection: DetectoRS + Config: configs/detectors/cascade_rcnn_r50_rfp_1x_coco.py + Metadata: + Training Memory (GB): 7.5 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 44.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/detectors/cascade_rcnn_r50_rfp_1x_coco/cascade_rcnn_r50_rfp_1x_coco-8cf51bfd.pth + + - Name: cascade_rcnn_r50_sac_1x_coco + In Collection: DetectoRS + Config: configs/detectors/cascade_rcnn_r50_sac_1x_coco.py + Metadata: + Training Memory (GB): 5.6 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 45.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/detectors/cascade_rcnn_r50_sac_1x_coco/cascade_rcnn_r50_sac_1x_coco-24bfda62.pth + + - Name: detectors_cascade_rcnn_r50_1x_coco + In Collection: DetectoRS + Config: configs/detectors/detectors_cascade_rcnn_r50_1x_coco.py + Metadata: + Training Memory (GB): 9.9 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 47.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/detectors/detectors_cascade_rcnn_r50_1x_coco/detectors_cascade_rcnn_r50_1x_coco-32a10ba0.pth + + - Name: htc_r50_rfp_1x_coco + In Collection: DetectoRS + Config: configs/detectors/htc_r50_rfp_1x_coco.py + Metadata: + Training Memory (GB): 11.2 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 46.6 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 40.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/detectors/htc_r50_rfp_1x_coco/htc_r50_rfp_1x_coco-8ff87c51.pth + + - Name: htc_r50_sac_1x_coco + In Collection: DetectoRS + Config: configs/detectors/htc_r50_sac_1x_coco.py + Metadata: + Training Memory (GB): 9.3 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 46.4 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 40.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/detectors/htc_r50_sac_1x_coco/htc_r50_sac_1x_coco-bfa60c54.pth + + - Name: detectors_htc_r50_1x_coco + In Collection: DetectoRS + Config: configs/detectors/detectors_htc_r50_1x_coco.py + Metadata: + Training Memory (GB): 13.6 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 49.1 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 42.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/detectors/detectors_htc_r50_1x_coco/detectors_htc_r50_1x_coco-329b1453.pth diff --git a/configs/detr/README.md b/configs/detr/README.md new file mode 100644 index 0000000..617a8fb --- /dev/null +++ b/configs/detr/README.md @@ -0,0 +1,27 @@ +# DETR + +## Introduction + + + +We provide the config files for DETR: [End-to-End Object Detection with Transformers](https://arxiv.org/abs/2005.12872). + +```BibTeX +@inproceedings{detr, + author = {Nicolas Carion and + Francisco Massa and + Gabriel Synnaeve and + Nicolas Usunier and + Alexander Kirillov and + Sergey Zagoruyko}, + title = {End-to-End Object Detection with Transformers}, + booktitle = {ECCV}, + year = {2020} +} +``` + +## Results and Models + +| Backbone | Model | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:------:|:--------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| +| R-50 | DETR |150e |7.9| | 40.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detr/detr_r50_8x2_150e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/detr/detr_r50_8x2_150e_coco/detr_r50_8x2_150e_coco_20201130_194835-2c4b8974.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/detr/detr_r50_8x2_150e_coco/detr_r50_8x2_150e_coco_20201130_194835.log.json) | diff --git a/configs/detr/detr_r50_8x2_150e_coco.py b/configs/detr/detr_r50_8x2_150e_coco.py new file mode 100644 index 0000000..892447d --- /dev/null +++ b/configs/detr/detr_r50_8x2_150e_coco.py @@ -0,0 +1,150 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py' +] +model = dict( + type='DETR', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(3, ), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + bbox_head=dict( + type='DETRHead', + num_classes=80, + in_channels=2048, + transformer=dict( + type='Transformer', + encoder=dict( + type='DetrTransformerEncoder', + num_layers=6, + transformerlayers=dict( + type='BaseTransformerLayer', + attn_cfgs=[ + dict( + type='MultiheadAttention', + embed_dims=256, + num_heads=8, + dropout=0.1) + ], + feedforward_channels=2048, + ffn_dropout=0.1, + operation_order=('self_attn', 'norm', 'ffn', 'norm'))), + decoder=dict( + type='DetrTransformerDecoder', + return_intermediate=True, + num_layers=6, + transformerlayers=dict( + type='DetrTransformerDecoderLayer', + attn_cfgs=dict( + type='MultiheadAttention', + embed_dims=256, + num_heads=8, + dropout=0.1), + feedforward_channels=2048, + ffn_dropout=0.1, + operation_order=('self_attn', 'norm', 'cross_attn', 'norm', + 'ffn', 'norm')), + )), + positional_encoding=dict( + type='SinePositionalEncoding', num_feats=128, normalize=True), + loss_cls=dict( + type='CrossEntropyLoss', + bg_cls_weight=0.1, + use_sigmoid=False, + loss_weight=1.0, + class_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=5.0), + loss_iou=dict(type='GIoULoss', loss_weight=2.0)), + # training and testing settings + train_cfg=dict( + assigner=dict( + type='HungarianAssigner', + cls_cost=dict(type='ClassificationCost', weight=1.), + reg_cost=dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'), + iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))), + test_cfg=dict(max_per_img=100)) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different +# from the default setting in mmdet. +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict( + type='AutoAugment', + policies=[[ + dict( + type='Resize', + img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), + (608, 1333), (640, 1333), (672, 1333), (704, 1333), + (736, 1333), (768, 1333), (800, 1333)], + multiscale_mode='value', + keep_ratio=True) + ], + [ + dict( + type='Resize', + img_scale=[(400, 1333), (500, 1333), (600, 1333)], + multiscale_mode='value', + keep_ratio=True), + dict( + type='RandomCrop', + crop_type='absolute_range', + crop_size=(384, 600), + allow_negative_crop=True), + dict( + type='Resize', + img_scale=[(480, 1333), (512, 1333), (544, 1333), + (576, 1333), (608, 1333), (640, 1333), + (672, 1333), (704, 1333), (736, 1333), + (768, 1333), (800, 1333)], + multiscale_mode='value', + override=True, + keep_ratio=True) + ]]), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=1), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) +] +# test_pipeline, NOTE the Pad's size_divisor is different from the default +# setting (size_divisor=32). While there is little effect on the performance +# whether we use the default setting or use size_divisor=1. +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=1), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict( + type='AdamW', + lr=0.0001, + weight_decay=0.0001, + paramwise_cfg=dict( + custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)})) +optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2)) +# learning policy +lr_config = dict(policy='step', step=[100]) +runner = dict(type='EpochBasedRunner', max_epochs=150) diff --git a/configs/detr/detr_r50_8x2_150e_coco_merge.py b/configs/detr/detr_r50_8x2_150e_coco_merge.py new file mode 100644 index 0000000..947cd02 --- /dev/null +++ b/configs/detr/detr_r50_8x2_150e_coco_merge.py @@ -0,0 +1,152 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py' +] +model = dict( + type='DETRBase', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(3, ), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + bbox_head=dict( + type='DETRProHead', + num_classes=80, + in_channels=2048, + num_vocal=4096, + split_loss=True, + transformer=dict( + type='Transformer', + encoder=dict( + type='DetrTransformerEncoder', + num_layers=6, + transformerlayers=dict( + type='BaseTransformerLayer', + attn_cfgs=[ + dict( + type='MultiheadAttention', + embed_dims=256, + num_heads=8, + dropout=0.1) + ], + feedforward_channels=2048, + ffn_dropout=0.1, + operation_order=('self_attn', 'norm', 'ffn', 'norm'))), + decoder=dict( + type='DetrTransformerDecoder', + return_intermediate=True, + num_layers=6, + transformerlayers=dict( + type='DetrTransformerDecoderLayer', + attn_cfgs=dict( + type='MultiheadAttention', + embed_dims=256, + num_heads=8, + dropout=0.1), + feedforward_channels=2048, + ffn_dropout=0.1, + operation_order=('self_attn', 'norm', 'cross_attn', 'norm', + 'ffn', 'norm')), + )), + positional_encoding=dict( + type='SinePositionalEncoding', num_feats=128, normalize=True), + loss_cls=dict( + type='CrossEntropyLoss', + bg_cls_weight=0.1, + use_sigmoid=False, + loss_weight=1.0, + class_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=5.0), + loss_iou=dict(type='GIoULoss', loss_weight=2.0)), + # training and testing settings + train_cfg=dict( + assigner=dict( + type='HungarianAssigner', + cls_cost=dict(type='ClassificationCost', weight=1.), + reg_cost=dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'), + iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))), + test_cfg=dict(max_per_img=100)) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different +# from the default setting in mmdet. +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict( + type='AutoAugment', + policies=[[ + dict( + type='Resize', + img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), + (608, 1333), (640, 1333), (672, 1333), (704, 1333), + (736, 1333), (768, 1333), (800, 1333)], + multiscale_mode='value', + keep_ratio=True) + ], + [ + dict( + type='Resize', + img_scale=[(400, 1333), (500, 1333), (600, 1333)], + multiscale_mode='value', + keep_ratio=True), + dict( + type='RandomCrop', + crop_type='absolute_range', + crop_size=(384, 600), + allow_negative_crop=True), + dict( + type='Resize', + img_scale=[(480, 1333), (512, 1333), (544, 1333), + (576, 1333), (608, 1333), (640, 1333), + (672, 1333), (704, 1333), (736, 1333), + (768, 1333), (800, 1333)], + multiscale_mode='value', + override=True, + keep_ratio=True) + ]]), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=1), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) +] +# test_pipeline, NOTE the Pad's size_divisor is different from the default +# setting (size_divisor=32). While there is little effect on the performance +# whether we use the default setting or use size_divisor=1. +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=1), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict( + type='AdamW', + lr=0.0001, + weight_decay=0.0001, + paramwise_cfg=dict( + custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)})) +optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2)) +# learning policy +lr_config = dict(policy='step', step=[100]) +runner = dict(type='EpochBasedRunner', max_epochs=150) diff --git a/configs/detr/metafile.yml b/configs/detr/metafile.yml new file mode 100644 index 0000000..45622cf --- /dev/null +++ b/configs/detr/metafile.yml @@ -0,0 +1,33 @@ +Collections: + - Name: DETR + Metadata: + Training Data: COCO + Training Techniques: + - AdamW + - Multi Scale Train + - Gradient Clip + Training Resources: 8x V100 GPUs + Architecture: + - ResNet + - Transformer + Paper: + URL: https://arxiv.org/abs/2005.12872 + Title: 'End-to-End Object Detection with Transformers' + README: configs/detr/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.7.0/mmdet/models/detectors/detr.py#L7 + Version: v2.7.0 + +Models: + - Name: detr_r50_8x2_150e_coco + In Collection: DETR + Config: configs/detr/detr_r50_8x2_150e_coco.py + Metadata: + Training Memory (GB): 7.9 + Epochs: 150 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/detr/detr_r50_8x2_150e_coco/detr_r50_8x2_150e_coco_20201130_194835-2c4b8974.pth diff --git a/configs/double_heads/README.md b/configs/double_heads/README.md new file mode 100644 index 0000000..872211d --- /dev/null +++ b/configs/double_heads/README.md @@ -0,0 +1,22 @@ +# Rethinking Classification and Localization for Object Detection + +## Introduction + + + +```latex +@article{wu2019rethinking, + title={Rethinking Classification and Localization for Object Detection}, + author={Yue Wu and Yinpeng Chen and Lu Yuan and Zicheng Liu and Lijuan Wang and Hongzhi Li and Yun Fu}, + year={2019}, + eprint={1904.06493}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +## Results and models + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | +| R-50-FPN | pytorch | 1x | 6.8 | 9.5 | 40.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/double_heads/dh_faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/double_heads/dh_faster_rcnn_r50_fpn_1x_coco/dh_faster_rcnn_r50_fpn_1x_coco_20200130-586b67df.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/double_heads/dh_faster_rcnn_r50_fpn_1x_coco/dh_faster_rcnn_r50_fpn_1x_coco_20200130_220238.log.json) | diff --git a/configs/double_heads/dh_faster_rcnn_r50_fpn_1x_coco.py b/configs/double_heads/dh_faster_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..9b8118b --- /dev/null +++ b/configs/double_heads/dh_faster_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,23 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + roi_head=dict( + type='DoubleHeadRoIHead', + reg_roi_scale_factor=1.3, + bbox_head=dict( + _delete_=True, + type='DoubleConvFCBBoxHead', + num_convs=4, + num_fcs=2, + in_channels=256, + conv_out_channels=1024, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=2.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=2.0)))) diff --git a/configs/double_heads/metafile.yml b/configs/double_heads/metafile.yml new file mode 100644 index 0000000..6fe9b7a --- /dev/null +++ b/configs/double_heads/metafile.yml @@ -0,0 +1,41 @@ +Collections: + - Name: Rethinking Classification and Localization for Object Detection + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - FPN + - RPN + - ResNet + - RoIAlign + Paper: + URL: https://arxiv.org/pdf/1904.06493 + Title: 'Rethinking Classification and Localization for Object Detection' + README: configs/double_heads/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/roi_heads/double_roi_head.py#L6 + Version: v2.0.0 + +Models: + - Name: dh_faster_rcnn_r50_fpn_1x_coco + In Collection: Rethinking Classification and Localization for Object Detection + Config: configs/double_heads/dh_faster_rcnn_r50_fpn_1x_coco.py + Metadata: + Training Memory (GB): 6.8 + inference time (ms/im): + - value: 105.26 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/double_heads/dh_faster_rcnn_r50_fpn_1x_coco/dh_faster_rcnn_r50_fpn_1x_coco_20200130-586b67df.pth diff --git a/configs/dynamic_rcnn/README.md b/configs/dynamic_rcnn/README.md new file mode 100644 index 0000000..2b087c3 --- /dev/null +++ b/configs/dynamic_rcnn/README.md @@ -0,0 +1,20 @@ +# Dynamic R-CNN: Towards High Quality Object Detection via Dynamic Training + +## Introduction + + + +``` +@article{DynamicRCNN, + author = {Hongkai Zhang and Hong Chang and Bingpeng Ma and Naiyan Wang and Xilin Chen}, + title = {Dynamic {R-CNN}: Towards High Quality Object Detection via Dynamic Training}, + journal = {arXiv preprint arXiv:2004.06002}, + year = {2020} +} +``` + +## Results and Models + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:---------:|:-------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| +| R-50 | pytorch | 1x | 3.8 | | 38.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x/dynamic_rcnn_r50_fpn_1x-62a3f276.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x/dynamic_rcnn_r50_fpn_1x_20200618_095048.log.json) | diff --git a/configs/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x_coco.py b/configs/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..f2deb99 --- /dev/null +++ b/configs/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,28 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + roi_head=dict( + type='DynamicRoIHead', + bbox_head=dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))), + train_cfg=dict( + rpn_proposal=dict(nms=dict(iou_threshold=0.85)), + rcnn=dict( + dynamic_rcnn=dict( + iou_topk=75, + beta_topk=10, + update_iter_interval=100, + initial_iou=0.4, + initial_beta=1.0))), + test_cfg=dict(rpn=dict(nms=dict(iou_threshold=0.85)))) diff --git a/configs/dynamic_rcnn/metafile.yml b/configs/dynamic_rcnn/metafile.yml new file mode 100644 index 0000000..fec43db --- /dev/null +++ b/configs/dynamic_rcnn/metafile.yml @@ -0,0 +1,35 @@ +Collections: + - Name: Dynamic R-CNN + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - Dynamic R-CNN + - FPN + - RPN + - ResNet + - RoIAlign + Paper: + URL: https://arxiv.org/pdf/2004.06002 + Title: 'Dynamic R-CNN: Towards High Quality Object Detection via Dynamic Training' + README: configs/dynamic_rcnn/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.2.0/mmdet/models/roi_heads/dynamic_roi_head.py#L11 + Version: v2.2.0 + +Models: + - Name: dynamic_rcnn_r50_fpn_1x_coco + In Collection: Dynamic R-CNN + Config: configs/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x_coco.py + Metadata: + Training Memory (GB): 3.8 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 38.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x/dynamic_rcnn_r50_fpn_1x-62a3f276.pth diff --git a/configs/empirical_attention/README.md b/configs/empirical_attention/README.md new file mode 100644 index 0000000..f8647bd --- /dev/null +++ b/configs/empirical_attention/README.md @@ -0,0 +1,23 @@ +# An Empirical Study of Spatial Attention Mechanisms in Deep Networks + +## Introduction + + + +```latex +@article{zhu2019empirical, + title={An Empirical Study of Spatial Attention Mechanisms in Deep Networks}, + author={Zhu, Xizhou and Cheng, Dazhi and Zhang, Zheng and Lin, Stephen and Dai, Jifeng}, + journal={arXiv preprint arXiv:1904.05873}, + year={2019} +} +``` + +## Results and Models + +| Backbone | Attention Component | DCN | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:---------:|:-------------------:|:----:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| +| R-50 | 1111 | N | 1x | 8.0 | 13.8 | 40.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco/faster_rcnn_r50_fpn_attention_1111_1x_coco_20200130-403cccba.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco/faster_rcnn_r50_fpn_attention_1111_1x_coco_20200130_210344.log.json) | +| R-50 | 0010 | N | 1x | 4.2 | 18.4 | 39.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x_coco/faster_rcnn_r50_fpn_attention_0010_1x_coco_20200130-7cb0c14d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x_coco/faster_rcnn_r50_fpn_attention_0010_1x_coco_20200130_210125.log.json) | +| R-50 | 1111 | Y | 1x | 8.0 | 12.7 | 42.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco_20200130-8b2523a6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco_20200130_204442.log.json) | +| R-50 | 0010 | Y | 1x | 4.2 | 17.1 | 42.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco_20200130-1a2e831d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco_20200130_210410.log.json) | diff --git a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x_coco.py b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x_coco.py new file mode 100644 index 0000000..a544e3a --- /dev/null +++ b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict(plugins=[ + dict( + cfg=dict( + type='GeneralizedAttention', + spatial_range=-1, + num_heads=8, + attention_type='0010', + kv_stride=2), + stages=(False, False, True, True), + position='after_conv2') + ])) diff --git a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco.py b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco.py new file mode 100644 index 0000000..bbefd27 --- /dev/null +++ b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco.py @@ -0,0 +1,16 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + plugins=[ + dict( + cfg=dict( + type='GeneralizedAttention', + spatial_range=-1, + num_heads=8, + attention_type='0010', + kv_stride=2), + stages=(False, False, True, True), + position='after_conv2') + ], + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco.py b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco.py new file mode 100644 index 0000000..13a4645 --- /dev/null +++ b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict(plugins=[ + dict( + cfg=dict( + type='GeneralizedAttention', + spatial_range=-1, + num_heads=8, + attention_type='1111', + kv_stride=2), + stages=(False, False, True, True), + position='after_conv2') + ])) diff --git a/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco.py b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco.py new file mode 100644 index 0000000..b1f26c0 --- /dev/null +++ b/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco.py @@ -0,0 +1,16 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + plugins=[ + dict( + cfg=dict( + type='GeneralizedAttention', + spatial_range=-1, + num_heads=8, + attention_type='1111', + kv_stride=2), + stages=(False, False, True, True), + position='after_conv2') + ], + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) diff --git a/configs/empirical_attention/metafile.yml b/configs/empirical_attention/metafile.yml new file mode 100644 index 0000000..923bcb2 --- /dev/null +++ b/configs/empirical_attention/metafile.yml @@ -0,0 +1,103 @@ +Collections: + - Name: Empirical Attention + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - Deformable Convolution + - FPN + - RPN + - ResNet + - RoIAlign + - Spatial Attention + Paper: + URL: https://arxiv.org/pdf/1904.05873 + Title: 'An Empirical Study of Spatial Attention Mechanisms in Deep Networks' + README: configs/empirical_attention/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/ops/generalized_attention.py#L10 + Version: v2.0.0 + +Models: + - Name: faster_rcnn_r50_fpn_attention_1111_1x_coco + In Collection: Empirical Attention + Config: configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco.py + Metadata: + Training Memory (GB): 8.0 + inference time (ms/im): + - value: 72.46 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco/faster_rcnn_r50_fpn_attention_1111_1x_coco_20200130-403cccba.pth + + - Name: faster_rcnn_r50_fpn_attention_0010_1x_coco + In Collection: Empirical Attention + Config: configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x_coco.py + Metadata: + Training Memory (GB): 4.2 + inference time (ms/im): + - value: 54.35 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x_coco/faster_rcnn_r50_fpn_attention_0010_1x_coco_20200130-7cb0c14d.pth + + - Name: faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco + In Collection: Empirical Attention + Config: configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco.py + Metadata: + Training Memory (GB): 8.0 + inference time (ms/im): + - value: 78.74 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco_20200130-8b2523a6.pth + + - Name: faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco + In Collection: Empirical Attention + Config: configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco.py + Metadata: + Training Memory (GB): 4.2 + inference time (ms/im): + - value: 58.48 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco_20200130-1a2e831d.pth diff --git a/configs/fast_rcnn/README.md b/configs/fast_rcnn/README.md new file mode 100644 index 0000000..3be862a --- /dev/null +++ b/configs/fast_rcnn/README.md @@ -0,0 +1,16 @@ +# Fast R-CNN + +## Introduction + + + +```latex +@inproceedings{girshick2015fast, + title={Fast r-cnn}, + author={Girshick, Ross}, + booktitle={Proceedings of the IEEE international conference on computer vision}, + year={2015} +} +``` + +## Results and models diff --git a/configs/fast_rcnn/fast_rcnn_r101_caffe_fpn_1x_coco.py b/configs/fast_rcnn/fast_rcnn_r101_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..3ab8e98 --- /dev/null +++ b/configs/fast_rcnn/fast_rcnn_r101_caffe_fpn_1x_coco.py @@ -0,0 +1,7 @@ +_base_ = './fast_rcnn_r50_caffe_fpn_1x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet101_caffe'))) diff --git a/configs/fast_rcnn/fast_rcnn_r101_fpn_1x_coco.py b/configs/fast_rcnn/fast_rcnn_r101_fpn_1x_coco.py new file mode 100644 index 0000000..83852b2 --- /dev/null +++ b/configs/fast_rcnn/fast_rcnn_r101_fpn_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = './fast_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/fast_rcnn/fast_rcnn_r101_fpn_2x_coco.py b/configs/fast_rcnn/fast_rcnn_r101_fpn_2x_coco.py new file mode 100644 index 0000000..c220885 --- /dev/null +++ b/configs/fast_rcnn/fast_rcnn_r101_fpn_2x_coco.py @@ -0,0 +1,6 @@ +_base_ = './fast_rcnn_r50_fpn_2x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/fast_rcnn/fast_rcnn_r50_caffe_fpn_1x_coco.py b/configs/fast_rcnn/fast_rcnn_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..f1b29ef --- /dev/null +++ b/configs/fast_rcnn/fast_rcnn_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,48 @@ +_base_ = './fast_rcnn_r50_fpn_1x_coco.py' + +model = dict( + backbone=dict( + norm_cfg=dict(type='BN', requires_grad=False), + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet50_caffe'))) + +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=2000), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=None), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='ToTensor', keys=['proposals']), + dict( + type='ToDataContainer', + fields=[dict(key='proposals', stack=False)]), + dict(type='Collect', keys=['img', 'proposals']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/fast_rcnn/fast_rcnn_r50_fpn_1x_coco.py b/configs/fast_rcnn/fast_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..d2f080e --- /dev/null +++ b/configs/fast_rcnn/fast_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,52 @@ +_base_ = [ + '../_base_/models/fast_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=2000), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=None), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='ToTensor', keys=['proposals']), + dict( + type='ToDataContainer', + fields=[dict(key='proposals', stack=False)]), + dict(type='Collect', keys=['img', 'proposals']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl', + pipeline=train_pipeline), + val=dict( + proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', + pipeline=test_pipeline), + test=dict( + proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', + pipeline=test_pipeline)) diff --git a/configs/fast_rcnn/fast_rcnn_r50_fpn_2x_coco.py b/configs/fast_rcnn/fast_rcnn_r50_fpn_2x_coco.py new file mode 100644 index 0000000..228e856 --- /dev/null +++ b/configs/fast_rcnn/fast_rcnn_r50_fpn_2x_coco.py @@ -0,0 +1,5 @@ +_base_ = './fast_rcnn_r50_fpn_1x_coco.py' + +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/faster_rcnn/README.md b/configs/faster_rcnn/README.md new file mode 100644 index 0000000..772a6c4 --- /dev/null +++ b/configs/faster_rcnn/README.md @@ -0,0 +1,67 @@ +# Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks + +## Introduction + + + +```latex +@article{Ren_2017, + title={Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks}, + journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, + publisher={Institute of Electrical and Electronics Engineers (IEEE)}, + author={Ren, Shaoqing and He, Kaiming and Girshick, Ross and Sun, Jian}, + year={2017}, + month={Jun}, +} +``` + +## Results and models + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | +| R-50-DC5 | caffe | 1x | - | - | 37.2 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco/faster_rcnn_r50_caffe_dc5_1x_coco_20201030_151909-531f0f43.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco/faster_rcnn_r50_caffe_dc5_1x_coco_20201030_151909.log.json) | +| R-50-FPN | caffe | 1x | 3.8 | | 37.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco/faster_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.378_20200504_180032-c5925ee5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco/faster_rcnn_r50_caffe_fpn_1x_coco_20200504_180032.log.json) | +| R-50-FPN | pytorch | 1x | 4.0 | 21.4 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130_204655.log.json) | +| R-50-FPN | pytorch | 2x | - | - | 38.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_bbox_mAP-0.384_20200504_210434-a5d8aa15.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_20200504_210434.log.json) | +| R-101-FPN | caffe | 1x | 5.7 | | 39.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_caffe_fpn_1x_coco/faster_rcnn_r101_caffe_fpn_1x_coco_bbox_mAP-0.398_20200504_180057-b269e9dd.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_caffe_fpn_1x_coco/faster_rcnn_r101_caffe_fpn_1x_coco_20200504_180057.log.json) | +| R-101-FPN | pytorch | 1x | 6.0 | 15.6 | 39.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_1x_coco/faster_rcnn_r101_fpn_1x_coco_20200130-f513f705.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_1x_coco/faster_rcnn_r101_fpn_1x_coco_20200130_204655.log.json) | +| R-101-FPN | pytorch | 2x | - | - | 39.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r101_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_2x_coco/faster_rcnn_r101_fpn_2x_coco_bbox_mAP-0.398_20200504_210455-1d2dac9c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_2x_coco/faster_rcnn_r101_fpn_2x_coco_20200504_210455.log.json) | +| X-101-32x4d-FPN | pytorch | 1x | 7.2 | 13.8 | 41.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco/faster_rcnn_x101_32x4d_fpn_1x_coco_20200203-cff10310.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco/faster_rcnn_x101_32x4d_fpn_1x_coco_20200203_000520.log.json) | +| X-101-32x4d-FPN | pytorch | 2x | - | - | 41.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x4d_fpn_2x_coco/faster_rcnn_x101_32x4d_fpn_2x_coco_bbox_mAP-0.412_20200506_041400-64a12c0b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x4d_fpn_2x_coco/faster_rcnn_x101_32x4d_fpn_2x_coco_20200506_041400.log.json) | +| X-101-64x4d-FPN | pytorch | 1x | 10.3 | 9.4 | 42.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco/faster_rcnn_x101_64x4d_fpn_1x_coco_20200204-833ee192.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco/faster_rcnn_x101_64x4d_fpn_1x_coco_20200204_134340.log.json) | +| X-101-64x4d-FPN | pytorch | 2x | - | - | 41.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_2x_coco/faster_rcnn_x101_64x4d_fpn_2x_coco_20200512_161033-5961fa95.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_2x_coco/faster_rcnn_x101_64x4d_fpn_2x_coco_20200512_161033.log.json) | + +## Different regression loss + +We trained with R-50-FPN pytorch style backbone for 1x schedule. + +| Backbone | Loss type | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-------: | :------: | :------------: | :----: | :------: | :--------: | +| R-50-FPN | L1Loss | 4.0 | 21.4 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130_204655.log.json) | +| R-50-FPN | IoULoss | | | 37.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_iou_1x_coco-fdd207f3.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_iou_1x_coco_20200506_095954.log.json) | +| R-50-FPN | GIoULoss | | | 37.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_giou_1x_coco-0eada910.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_giou_1x_coco_20200505_161120.log.json) | +| R-50-FPN | BoundedIoULoss | | | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_bounded_iou_1x_coco-98ad993b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_bounded_iou_1x_coco_20200505_160738.log.json) | + +## Pre-trained Models + +We also train some models with longer schedules and multi-scale training. The users could finetune them for downstream tasks. + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | +| [R-50-DC5](./faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py) | caffe | 1x | - | | 37.4 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco_20201028_233851-b33d21b9.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco_20201028_233851.log.json) | +| [R-50-DC5](./faster_rcnn_r50_caffe_dc5_mstrain_3x_coco.py) | caffe | 3x | - | | 38.7 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco_20201028_002107-34a53b2c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco_20201028_002107.log.json) | +| [R-50-FPN](./faster_rcnn_r50_caffe_fpn_mstrain_2x_coco.py) | caffe | 2x | 3.7 | | 39.7 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco_bbox_mAP-0.397_20200504_231813-10b2de58.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco_20200504_231813.log.json) | +| [R-50-FPN](./faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py) | caffe | 3x | 3.7 | | 39.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_20210526_095054-1f77628b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_20210526_095054.log.json) | +| [R-50-FPN](./faster_rcnn_r50_fpn_mstrain_3x_coco.py) | pytorch | 3x | 3.9 | | 40.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_mstrain_3x_coco/faster_rcnn_r50_fpn_mstrain_3x_coco_20210524_110822-e10bd31c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_mstrain_3x_coco/faster_rcnn_r50_fpn_mstrain_3x_coco_20210524_110822.log.json) | +| [R-101-FPN](./faster_rcnn_r101_caffe_fpn_mstrain_3x_coco.py) | caffe | 3x | 5.6 | | 42.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r101_caffe_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_caffe_fpn_mstrain_3x_coco/faster_rcnn_r101_caffe_fpn_mstrain_3x_coco_20210526_095742-a7ae426d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_caffe_fpn_mstrain_3x_coco/faster_rcnn_r101_caffe_fpn_mstrain_3x_coco_20210526_095742.log.json) | +| [R-101-FPN](./faster_rcnn_r101_fpn_mstrain_3x_coco.py) | pytorch | 3x | 5.8 | | 41.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r101_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_mstrain_3x_coco/faster_rcnn_r101_fpn_mstrain_3x_coco_20210524_110822-4d4d2ca8.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_mstrain_3x_coco/faster_rcnn_r101_fpn_mstrain_3x_coco_20210524_110822.log.json) | +| [X-101-32x4d-FPN](./faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco.py) | pytorch | 3x | 7.0 | | 42.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco/faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco_20210524_124151-16b9b260.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco/faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco_20210524_124151.log.json) | +| [X-101-32x8d-FPN](./faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco.py) | pytorch | 3x | 10.1 | | 42.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco/faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco_20210604_182954-002e082a.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco/faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco_20210604_182954.log.json) | +| [X-101-64x4d-FPN](./faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py) | pytorch | 3x | 10.0 | | 43.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco_20210524_124528-26c63de6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco_20210524_124528.log.json) | + +We further finetune some pre-trained models on the COCO subsets, which only contain only a few of the 80 categories. + +| Backbone | Style | Class name | Pre-traind model | Mem (GB) | box AP | Config | Download | +| ------------------------------------------------------------ | ----- | ------------------ | ------------------------------------------------------------ | -------- | ------ | ------------------------------------------------------------ | ------------------------------------------------------------ | +| [R-50-FPN](./faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person.py) | caffe | person | [R-50-FPN-Caffe-3x](./faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py) | 3.7 | 55.8 | [config](./faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco-person/faster_rcnn_r50_fpn_1x_coco-person_20201216_175929-d022e227.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco-person/faster_rcnn_r50_fpn_1x_coco-person_20201216_175929.log.json) | +| [R-50-FPN](./faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person-bicycle-car.py) | caffe | person-bicycle-car | [R-50-FPN-Caffe-3x](./faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py) | 3.7 | 44.1 | [config](./faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person-bicycle-car.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco-person-bicycle-car/faster_rcnn_r50_fpn_1x_coco-person-bicycle-car_20201216_173117-6eda6d92.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco-person-bicycle-car/faster_rcnn_r50_fpn_1x_coco-person-bicycle-car_20201216_173117.log.json) | diff --git a/configs/faster_rcnn/faster_rcnn_r101_caffe_fpn_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r101_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..c6f078c --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r101_caffe_fpn_1x_coco.py @@ -0,0 +1,7 @@ +_base_ = './faster_rcnn_r50_caffe_fpn_1x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet101_caffe'))) diff --git a/configs/faster_rcnn/faster_rcnn_r101_caffe_fpn_mstrain_3x_coco.py b/configs/faster_rcnn/faster_rcnn_r101_caffe_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..6a13fe9 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r101_caffe_fpn_mstrain_3x_coco.py @@ -0,0 +1,49 @@ +_base_ = 'faster_rcnn_r50_fpn_mstrain_3x_coco.py' + +model = dict( + backbone=dict( + depth=101, + norm_cfg=dict(requires_grad=False), + norm_eval=True, + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet101_caffe'))) + +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='range', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +data = dict( + train=dict(dataset=dict(pipeline=train_pipeline)), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/faster_rcnn/faster_rcnn_r101_fpn_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r101_fpn_1x_coco.py new file mode 100644 index 0000000..1de53a6 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r101_fpn_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = './faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/faster_rcnn/faster_rcnn_r101_fpn_2x_coco.py b/configs/faster_rcnn/faster_rcnn_r101_fpn_2x_coco.py new file mode 100644 index 0000000..0d41599 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r101_fpn_2x_coco.py @@ -0,0 +1,6 @@ +_base_ = './faster_rcnn_r50_fpn_2x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/faster_rcnn/faster_rcnn_r101_fpn_mstrain_3x_coco.py b/configs/faster_rcnn/faster_rcnn_r101_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..0b498bb --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r101_fpn_mstrain_3x_coco.py @@ -0,0 +1,7 @@ +_base_ = 'faster_rcnn_r50_fpn_mstrain_3x_coco.py' + +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/faster_rcnn/faster_rcnn_r50_caffe_c4_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_caffe_c4_1x_coco.py new file mode 100644 index 0000000..92344a1 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_caffe_c4_1x_coco.py @@ -0,0 +1,39 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_caffe_c4.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco.py new file mode 100644 index 0000000..ee2010c --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco.py @@ -0,0 +1,37 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_caffe_dc5.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py new file mode 100644 index 0000000..14eaef2 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py @@ -0,0 +1,42 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_caffe_dc5.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), + (1333, 768), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco.py new file mode 100644 index 0000000..403747f --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco.py @@ -0,0 +1,4 @@ +_base_ = './faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py' +# learning policy +lr_config = dict(step=[28, 34]) +runner = dict(type='EpochBasedRunner', max_epochs=36) diff --git a/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..56c01bd --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,41 @@ +_base_ = './faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(requires_grad=False), + norm_eval=True, + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet50_caffe'))) +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_90k_coco.py b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_90k_coco.py new file mode 100644 index 0000000..b5aea6a --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_90k_coco.py @@ -0,0 +1,15 @@ +_base_ = 'faster_rcnn_r50_caffe_fpn_1x_coco.py' + +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[60000, 80000]) + +# Runner type +runner = dict(_delete_=True, type='IterBasedRunner', max_iters=90000) + +checkpoint_config = dict(interval=10000) +evaluation = dict(interval=10000, metric='bbox') diff --git a/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person-bicycle-car.py b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person-bicycle-car.py new file mode 100644 index 0000000..4f1f376 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person-bicycle-car.py @@ -0,0 +1,9 @@ +_base_ = './faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py' +model = dict(roi_head=dict(bbox_head=dict(num_classes=3))) +classes = ('person', 'bicycle', 'car') +data = dict( + train=dict(classes=classes), + val=dict(classes=classes), + test=dict(classes=classes)) + +load_from = 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_bbox_mAP-0.398_20200504_163323-30042637.pth' # noqa diff --git a/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person.py b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person.py new file mode 100644 index 0000000..b5dfb4f --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person.py @@ -0,0 +1,9 @@ +_base_ = './faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py' +model = dict(roi_head=dict(bbox_head=dict(num_classes=1))) +classes = ('person', ) +data = dict( + train=dict(classes=classes), + val=dict(classes=classes), + test=dict(classes=classes)) + +load_from = 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_bbox_mAP-0.398_20200504_163323-30042637.pth' # noqa diff --git a/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py new file mode 100644 index 0000000..f807a19 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py @@ -0,0 +1,46 @@ +_base_ = './faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(requires_grad=False), + norm_eval=True, + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet50_caffe'))) +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), + (1333, 768), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco.py new file mode 100644 index 0000000..df58973 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 23]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..9eeaace --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py @@ -0,0 +1,47 @@ +_base_ = 'faster_rcnn_r50_fpn_mstrain_3x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(requires_grad=False), + norm_eval=True, + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet50_caffe'))) + +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='range', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +data = dict( + train=dict(dataset=dict(pipeline=train_pipeline)), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_90k_coco.py b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_90k_coco.py new file mode 100644 index 0000000..74dca24 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_90k_coco.py @@ -0,0 +1,15 @@ +_base_ = 'faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py' + +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[60000, 80000]) + +# Runner type +runner = dict(_delete_=True, type='IterBasedRunner', max_iters=90000) + +checkpoint_config = dict(interval=10000) +evaluation = dict(interval=10000, metric='bbox') diff --git a/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..009bd93 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] diff --git a/configs/faster_rcnn/faster_rcnn_r50_fpn_2x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_fpn_2x_coco.py new file mode 100644 index 0000000..e77a7fa --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_fpn_2x_coco.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py' +] diff --git a/configs/faster_rcnn/faster_rcnn_r50_fpn_bounded_iou_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_fpn_bounded_iou_1x_coco.py new file mode 100644 index 0000000..648081f --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_fpn_bounded_iou_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = './faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + roi_head=dict( + bbox_head=dict( + reg_decoded_bbox=True, + loss_bbox=dict(type='BoundedIoULoss', loss_weight=10.0)))) diff --git a/configs/faster_rcnn/faster_rcnn_r50_fpn_ciou_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_fpn_ciou_1x_coco.py new file mode 100644 index 0000000..886d566 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_fpn_ciou_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = './faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + roi_head=dict( + bbox_head=dict( + reg_decoded_bbox=True, + loss_bbox=dict(type='CIoULoss', loss_weight=12.0)))) diff --git a/configs/faster_rcnn/faster_rcnn_r50_fpn_giou_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_fpn_giou_1x_coco.py new file mode 100644 index 0000000..5556c49 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_fpn_giou_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = './faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + roi_head=dict( + bbox_head=dict( + reg_decoded_bbox=True, + loss_bbox=dict(type='GIoULoss', loss_weight=10.0)))) diff --git a/configs/faster_rcnn/faster_rcnn_r50_fpn_iou_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_fpn_iou_1x_coco.py new file mode 100644 index 0000000..ddf663e --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_fpn_iou_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = './faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + roi_head=dict( + bbox_head=dict( + reg_decoded_bbox=True, + loss_bbox=dict(type='IoULoss', loss_weight=10.0)))) diff --git a/configs/faster_rcnn/faster_rcnn_r50_fpn_mstrain_3x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..faf8f92 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_fpn_mstrain_3x_coco.py @@ -0,0 +1,3 @@ +_base_ = [ + '../common/mstrain_3x_coco.py', '../_base_/models/faster_rcnn_r50_fpn.py' +] diff --git a/configs/faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py new file mode 100644 index 0000000..f897e7c --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py @@ -0,0 +1,2 @@ +_base_ = './faster_rcnn_r50_fpn_1x_coco.py' +model = dict(train_cfg=dict(rcnn=dict(sampler=dict(type='OHEMSampler')))) diff --git a/configs/faster_rcnn/faster_rcnn_r50_fpn_soft_nms_1x_coco.py b/configs/faster_rcnn/faster_rcnn_r50_fpn_soft_nms_1x_coco.py new file mode 100644 index 0000000..759ae3a --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_r50_fpn_soft_nms_1x_coco.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +model = dict( + test_cfg=dict( + rcnn=dict( + score_thr=0.05, + nms=dict(type='soft_nms', iou_threshold=0.5), + max_per_img=100))) diff --git a/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco.py b/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000..3808c9f --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) diff --git a/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_2x_coco.py b/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_2x_coco.py new file mode 100644 index 0000000..e93f5d8 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_2x_coco.py @@ -0,0 +1,14 @@ +_base_ = './faster_rcnn_r50_fpn_2x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) diff --git a/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco.py b/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..f55985d --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco.py @@ -0,0 +1,16 @@ +_base_ = [ + '../common/mstrain_3x_coco.py', '../_base_/models/faster_rcnn_r50_fpn.py' +] +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) diff --git a/configs/faster_rcnn/faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco.py b/configs/faster_rcnn/faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..a5d5aeb --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco.py @@ -0,0 +1,62 @@ +_base_ = [ + '../common/mstrain_3x_coco.py', '../_base_/models/faster_rcnn_r50_fpn.py' +] +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=8, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + style='pytorch', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnext101_32x8d'))) + +# ResNeXt-101-32x8d model trained with Caffe2 at FB, +# so the mean and std need to be changed. +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], + std=[57.375, 57.120, 58.395], + to_rgb=False) + +# In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)], +# multiscale_mode='range' +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='range', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +# Use RepeatDataset to speed up training +data = dict( + train=dict(dataset=dict(pipeline=train_pipeline)), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco.py b/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000..8bf2b65 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) diff --git a/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_2x_coco.py b/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_2x_coco.py new file mode 100644 index 0000000..7ea9b2d --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_2x_coco.py @@ -0,0 +1,14 @@ +_base_ = './faster_rcnn_r50_fpn_2x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) diff --git a/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py b/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..80397f4 --- /dev/null +++ b/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py @@ -0,0 +1,16 @@ +_base_ = [ + '../common/mstrain_3x_coco.py', '../_base_/models/faster_rcnn_r50_fpn.py' +] +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) diff --git a/configs/faster_rcnn/metafile.yml b/configs/faster_rcnn/metafile.yml new file mode 100644 index 0000000..97faf53 --- /dev/null +++ b/configs/faster_rcnn/metafile.yml @@ -0,0 +1,383 @@ +Collections: + - Name: Faster R-CNN + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - FPN + - RPN + - ResNet + - RoIPool + Paper: + URL: https://arxiv.org/abs/1506.01497 + Title: 'Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks' + README: configs/faster_rcnn/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/detectors/faster_rcnn.py#L6 + Version: v2.0.0 + +Models: + - Name: faster_rcnn_r50_caffe_dc5_1x_coco + In Collection: Faster R-CNN + Config: configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 37.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco/faster_rcnn_r50_caffe_dc5_1x_coco_20201030_151909-531f0f43.pth + + - Name: faster_rcnn_r50_caffe_fpn_1x_coco + In Collection: Faster R-CNN + Config: configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py + Metadata: + Training Memory (GB): 3.8 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 37.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco/faster_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.378_20200504_180032-c5925ee5.pth + + - Name: faster_rcnn_r50_fpn_1x_coco + In Collection: Faster R-CNN + Config: configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py + Metadata: + Training Memory (GB): 4.0 + inference time (ms/im): + - value: 46.73 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 37.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth + + - Name: faster_rcnn_r50_fpn_2x_coco + In Collection: Faster R-CNN + Config: configs/faster_rcnn/faster_rcnn_r50_fpn_2x_coco.py + Metadata: + Training Memory (GB): 4.0 + inference time (ms/im): + - value: 46.73 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 38.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_bbox_mAP-0.384_20200504_210434-a5d8aa15.pth + + - Name: faster_rcnn_r101_caffe_fpn_1x_coco + In Collection: Faster R-CNN + Config: configs/faster_rcnn/faster_rcnn_r101_caffe_fpn_1x_coco.py + Metadata: + Training Memory (GB): 5.7 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_caffe_fpn_1x_coco/faster_rcnn_r101_caffe_fpn_1x_coco_bbox_mAP-0.398_20200504_180057-b269e9dd.pth + + - Name: faster_rcnn_r101_fpn_1x_coco + In Collection: Faster R-CNN + Config: configs/faster_rcnn/faster_rcnn_r101_fpn_1x_coco.py + Metadata: + Training Memory (GB): 6.0 + inference time (ms/im): + - value: 64.1 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_1x_coco/faster_rcnn_r101_fpn_1x_coco_20200130-f513f705.pth + + - Name: faster_rcnn_r101_fpn_2x_coco + In Collection: Faster R-CNN + Config: configs/faster_rcnn/faster_rcnn_r101_fpn_2x_coco.py + Metadata: + Training Memory (GB): 6.0 + inference time (ms/im): + - value: 64.1 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_2x_coco/faster_rcnn_r101_fpn_2x_coco_bbox_mAP-0.398_20200504_210455-1d2dac9c.pth + + - Name: faster_rcnn_x101_32x4d_fpn_1x_coco + In Collection: Faster R-CNN + Config: configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco.py + Metadata: + Training Memory (GB): 7.2 + inference time (ms/im): + - value: 72.46 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco/faster_rcnn_x101_32x4d_fpn_1x_coco_20200203-cff10310.pth + + - Name: faster_rcnn_x101_32x4d_fpn_2x_coco + In Collection: Faster R-CNN + Config: configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_2x_coco.py + Metadata: + Training Memory (GB): 7.2 + inference time (ms/im): + - value: 72.46 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x4d_fpn_2x_coco/faster_rcnn_x101_32x4d_fpn_2x_coco_bbox_mAP-0.412_20200506_041400-64a12c0b.pth + + - Name: faster_rcnn_x101_64x4d_fpn_1x_coco + In Collection: Faster R-CNN + Config: configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco.py + Metadata: + Training Memory (GB): 10.3 + inference time (ms/im): + - value: 106.38 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco/faster_rcnn_x101_64x4d_fpn_1x_coco_20200204-833ee192.pth + + - Name: faster_rcnn_x101_64x4d_fpn_2x_coco + In Collection: Faster R-CNN + Config: configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_2x_coco.py + Metadata: + Training Memory (GB): 10.3 + inference time (ms/im): + - value: 106.38 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_2x_coco/faster_rcnn_x101_64x4d_fpn_2x_coco_20200512_161033-5961fa95.pth + + - Name: faster_rcnn_r50_fpn_iou_1x_coco + In Collection: Faster R-CNN + Config: configs/faster_rcnn/faster_rcnn_r50_fpn_iou_1x_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 37.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_iou_1x_coco-fdd207f3.pth + + - Name: faster_rcnn_r50_fpn_giou_1x_coco + In Collection: Faster R-CNN + Config: configs/faster_rcnn/faster_rcnn_r50_fpn_giou_1x_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 37.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_giou_1x_coco-0eada910.pth + + - Name: faster_rcnn_r50_fpn_bounded_iou_1x_coco + In Collection: Faster R-CNN + Config: configs/faster_rcnn/faster_rcnn_r50_fpn_bounded_iou_1x_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 37.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_bounded_iou_1x_coco-98ad993b.pth + + - Name: faster_rcnn_r50_caffe_dc5_mstrain_1x_coco + In Collection: Faster R-CNN + Config: configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 37.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco_20201028_233851-b33d21b9.pth + + - Name: faster_rcnn_r50_caffe_dc5_mstrain_3x_coco + In Collection: Faster R-CNN + Config: configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco.py + Metadata: + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 38.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco_20201028_002107-34a53b2c.pth + + - Name: faster_rcnn_r50_caffe_fpn_mstrain_2x_coco + In Collection: Faster R-CNN + Config: configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco.py + Metadata: + Training Memory (GB): 4.3 + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco_bbox_mAP-0.397_20200504_231813-10b2de58.pth + + - Name: faster_rcnn_r50_caffe_fpn_mstrain_3x_coco + In Collection: Faster R-CNN + Config: configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 3.7 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_20210526_095054-1f77628b.pth + + - Name: faster_rcnn_r50_fpn_mstrain_3x_coco + In Collection: Faster R-CNN + Config: configs/faster_rcnn/faster_rcnn_r50_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 3.9 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_mstrain_3x_coco/faster_rcnn_r50_fpn_mstrain_3x_coco_20210524_110822-e10bd31c.pth + + - Name: faster_rcnn_r101_caffe_fpn_mstrain_3x_coco + In Collection: Faster R-CNN + Config: configs/faster_rcnn/faster_rcnn_r101_caffe_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 5.6 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_caffe_fpn_mstrain_3x_coco/faster_rcnn_r101_caffe_fpn_mstrain_3x_coco_20210526_095742-a7ae426d.pth + + - Name: faster_rcnn_r101_fpn_mstrain_3x_coco + In Collection: Faster R-CNN + Config: configs/faster_rcnn/faster_rcnn_r101_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 5.8 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_mstrain_3x_coco/faster_rcnn_r101_fpn_mstrain_3x_coco_20210524_110822-4d4d2ca8.pth + + - Name: faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco + In Collection: Faster R-CNN + Config: configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 7.0 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco/faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco_20210524_124151-16b9b260.pth + + - Name: faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco + In Collection: Faster R-CNN + Config: configs/faster_rcnn/faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 10.1 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco/faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco_20210604_182954-002e082a.pth + + - Name: faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco + In Collection: Faster R-CNN + Config: configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 10.0 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco_20210524_124528-26c63de6.pth diff --git a/configs/fcos/README.md b/configs/fcos/README.md new file mode 100644 index 0000000..dd20d57 --- /dev/null +++ b/configs/fcos/README.md @@ -0,0 +1,35 @@ +# FCOS: Fully Convolutional One-Stage Object Detection + +## Introduction + + + +```latex +@article{tian2019fcos, + title={FCOS: Fully Convolutional One-Stage Object Detection}, + author={Tian, Zhi and Shen, Chunhua and Chen, Hao and He, Tong}, + journal={arXiv preprint arXiv:1904.01355}, + year={2019} +} +``` + +## Results and Models + +| Backbone | Style | GN | MS train | Tricks | DCN | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:---------:|:-------:|:-------:|:--------:|:-------:|:-------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| +| R-50 | caffe | Y | N | N | N | 1x | 3.6 | 22.7 | 36.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos/fcos_r50_caffe_fpn_gn-head_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r50_caffe_fpn_gn-head_1x_coco/fcos_r50_caffe_fpn_gn-head_1x_coco-821213aa.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r50_caffe_fpn_gn-head_1x_coco/20201227_180009.log.json) | +| R-50 | caffe | Y | N | Y | N | 1x | 3.7 | - | 38.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco-0a0d75a8.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco/20210105_135818.log.json)| +| R-50 | caffe | Y | N | Y | Y | 1x | 3.8 | - | 42.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco-ae4d8b3d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco/20210105_224556.log.json)| +| R-101 | caffe | Y | N | N | N | 1x | 5.5 | 17.3 | 39.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos/fcos_r101_caffe_fpn_gn-head_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r101_caffe_fpn_gn-head_1x_coco/fcos_r101_caffe_fpn_gn-head_1x_coco-0e37b982.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r101_caffe_fpn_gn-head_1x_coco/20210103_155046.log.json) | + +| Backbone | Style | GN | MS train | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:---------:|:-------:|:-------:|:--------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| +| R-50 | caffe | Y | Y | 2x | 2.6 | 22.9 | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco-d92ceeea.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco/20201227_161900.log.json) | +| R-101 | caffe | Y | Y | 2x | 5.5 | 17.3 | 40.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco-511424d6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco/20210103_155046.log.json) | +| X-101 | pytorch | Y | Y | 2x | 10.0 | 9.7 | 42.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco-ede514a8.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco/20210114_133041.log.json) | + +**Notes:** + +- The X-101 backbone is X-101-64x4d. +- Tricks means setting `norm_on_bbox`, `centerness_on_reg`, `center_sampling` as `True`. +- DCN means using `DCNv2` in both backbone and head. diff --git a/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py b/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py new file mode 100644 index 0000000..2699bdb --- /dev/null +++ b/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py @@ -0,0 +1,54 @@ +_base_ = 'fcos_r50_caffe_fpn_gn-head_1x_coco.py' + +model = dict( + backbone=dict( + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet50_caffe')), + bbox_head=dict( + norm_on_bbox=True, + centerness_on_reg=True, + dcn_on_last_conv=False, + center_sampling=True, + conv_bias=True, + loss_bbox=dict(type='GIoULoss', loss_weight=1.0)), + # training and testing settings + test_cfg=dict(nms=dict(type='nms', iou_threshold=0.6))) + +# dataset settings +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +optimizer_config = dict(_delete_=True, grad_clip=None) + +lr_config = dict(warmup='linear') diff --git a/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco.py b/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco.py new file mode 100644 index 0000000..cf93c91 --- /dev/null +++ b/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco.py @@ -0,0 +1,56 @@ +_base_ = 'fcos_r50_caffe_fpn_gn-head_1x_coco.py' + +model = dict( + backbone=dict( + dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True), + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet50_caffe')), + bbox_head=dict( + norm_on_bbox=True, + centerness_on_reg=True, + dcn_on_last_conv=True, + center_sampling=True, + conv_bias=True, + loss_bbox=dict(type='GIoULoss', loss_weight=1.0)), + # training and testing settings + test_cfg=dict(nms=dict(type='nms', iou_threshold=0.6))) + +# dataset settings +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +optimizer_config = dict(_delete_=True, grad_clip=None) + +lr_config = dict(warmup='linear') diff --git a/configs/fcos/fcos_center_r50_caffe_fpn_gn-head_1x_coco.py b/configs/fcos/fcos_center_r50_caffe_fpn_gn-head_1x_coco.py new file mode 100644 index 0000000..9f502e7 --- /dev/null +++ b/configs/fcos/fcos_center_r50_caffe_fpn_gn-head_1x_coco.py @@ -0,0 +1,2 @@ +_base_ = './fcos_r50_caffe_fpn_gn-head_1x_coco.py' +model = dict(bbox_head=dict(center_sampling=True, center_sample_radius=1.5)) diff --git a/configs/fcos/fcos_r101_caffe_fpn_gn-head_1x_coco.py b/configs/fcos/fcos_r101_caffe_fpn_gn-head_1x_coco.py new file mode 100644 index 0000000..45bea48 --- /dev/null +++ b/configs/fcos/fcos_r101_caffe_fpn_gn-head_1x_coco.py @@ -0,0 +1,7 @@ +_base_ = './fcos_r50_caffe_fpn_gn-head_1x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron/resnet101_caffe'))) diff --git a/configs/fcos/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py b/configs/fcos/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py new file mode 100644 index 0000000..f4d36f1 --- /dev/null +++ b/configs/fcos/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py @@ -0,0 +1,47 @@ +_base_ = './fcos_r50_caffe_fpn_gn-head_1x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron/resnet101_caffe'))) +img_norm_cfg = dict( + mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/fcos/fcos_r50_caffe_fpn_gn-head_1x_coco.py b/configs/fcos/fcos_r50_caffe_fpn_gn-head_1x_coco.py new file mode 100644 index 0000000..955787b --- /dev/null +++ b/configs/fcos/fcos_r50_caffe_fpn_gn-head_1x_coco.py @@ -0,0 +1,106 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# model settings +model = dict( + type='FCOS', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + norm_eval=True, + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron/resnet50_caffe')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs='on_output', # use P5 + num_outs=5, + relu_before_extra_convs=True), + bbox_head=dict( + type='FCOSHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + strides=[8, 16, 32, 64, 128], + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='IoULoss', loss_weight=1.0), + loss_centerness=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), + # training and testing settings + train_cfg=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0, + ignore_iof_thr=-1), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100)) +img_norm_cfg = dict( + mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict( + lr=0.01, paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.)) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='constant', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[8, 11]) +runner = dict(type='EpochBasedRunner', max_epochs=12) diff --git a/configs/fcos/fcos_r50_caffe_fpn_gn-head_4x4_1x_coco.py b/configs/fcos/fcos_r50_caffe_fpn_gn-head_4x4_1x_coco.py new file mode 100644 index 0000000..2816b16 --- /dev/null +++ b/configs/fcos/fcos_r50_caffe_fpn_gn-head_4x4_1x_coco.py @@ -0,0 +1,4 @@ +# TODO: Remove this config after benchmarking all related configs +_base_ = 'fcos_r50_caffe_fpn_gn-head_1x_coco.py' + +data = dict(samples_per_gpu=4, workers_per_gpu=4) diff --git a/configs/fcos/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py b/configs/fcos/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py new file mode 100644 index 0000000..497d03f --- /dev/null +++ b/configs/fcos/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py @@ -0,0 +1,39 @@ +_base_ = './fcos_r50_caffe_fpn_gn-head_1x_coco.py' +img_norm_cfg = dict( + mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/fcos/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco.py b/configs/fcos/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco.py new file mode 100644 index 0000000..e70e465 --- /dev/null +++ b/configs/fcos/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco.py @@ -0,0 +1,60 @@ +_base_ = './fcos_r50_caffe_fpn_gn-head_1x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict( + lr=0.01, paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.)) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/fcos/metafile.yml b/configs/fcos/metafile.yml new file mode 100644 index 0000000..ae922eb --- /dev/null +++ b/configs/fcos/metafile.yml @@ -0,0 +1,146 @@ +Collections: + - Name: FCOS + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - FPN + - Group Normalization + - ResNet + Paper: + URL: https://arxiv.org/abs/1904.01355 + Title: 'FCOS: Fully Convolutional One-Stage Object Detection' + README: configs/fcos/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/detectors/fcos.py#L6 + Version: v2.0.0 + +Models: + - Name: fcos_r50_caffe_fpn_gn-head_1x_coco + In Collection: FCOS + Config: configs/fcos/fcos_r50_caffe_fpn_gn-head_1x_coco.py + Metadata: + Training Memory (GB): 3.6 + inference time (ms/im): + - value: 44.05 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 36.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r50_caffe_fpn_gn-head_1x_coco/fcos_r50_caffe_fpn_gn-head_1x_coco-821213aa.pth + + - Name: fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco + In Collection: FCOS + Config: configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py + Metadata: + Training Memory (GB): 3.7 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 38.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco-0a0d75a8.pth + + - Name: fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco + In Collection: FCOS + Config: configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco.py + Metadata: + Training Memory (GB): 3.8 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco-ae4d8b3d.pth + + - Name: fcos_r101_caffe_fpn_gn-head_1x_coco + In Collection: FCOS + Config: configs/fcos/fcos_r101_caffe_fpn_gn-head_1x_coco.py + Metadata: + Training Memory (GB): 5.5 + inference time (ms/im): + - value: 57.8 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r101_caffe_fpn_gn-head_1x_coco/fcos_r101_caffe_fpn_gn-head_1x_coco-0e37b982.pth + + - Name: fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco + In Collection: FCOS + Config: configs/fcos/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py + Metadata: + Training Memory (GB): 2.6 + inference time (ms/im): + - value: 43.67 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 38.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco-d92ceeea.pth + + - Name: fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco + In Collection: FCOS + Config: configs/fcos/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py + Metadata: + Training Memory (GB): 5.5 + inference time (ms/im): + - value: 57.8 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco-511424d6.pth + + - Name: fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco + In Collection: FCOS + Config: configs/fcos/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco.py + Metadata: + Training Memory (GB): 10.0 + inference time (ms/im): + - value: 103.09 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco-ede514a8.pth diff --git a/configs/foveabox/README.md b/configs/foveabox/README.md new file mode 100644 index 0000000..b9b5684 --- /dev/null +++ b/configs/foveabox/README.md @@ -0,0 +1,41 @@ +# FoveaBox: Beyond Anchor-based Object Detector + + + +FoveaBox is an accurate, flexible and completely anchor-free object detection system for object detection framework, as presented in our paper [https://arxiv.org/abs/1904.03797](https://arxiv.org/abs/1904.03797): +Different from previous anchor-based methods, FoveaBox directly learns the object existing possibility and the bounding box coordinates without anchor reference. This is achieved by: (a) predicting category-sensitive semantic maps for the object existing possibility, and (b) producing category-agnostic bounding box for each position that potentially contains an object. + +## Main Results + +### Results on R50/101-FPN + +| Backbone | Style | align | ms-train| Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:---------:|:-------:|:-------:|:-------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| +| R-50 | pytorch | N | N | 1x | 5.6 | 24.1 | 36.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_r50_fpn_4x4_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r50_fpn_4x4_1x_coco/fovea_r50_fpn_4x4_1x_coco_20200219-ee4d5303.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r50_fpn_4x4_1x_coco/fovea_r50_fpn_4x4_1x_coco_20200219_223025.log.json) | +| R-50 | pytorch | N | N | 2x | 5.6 | - | 37.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_r50_fpn_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r50_fpn_4x4_2x_coco/fovea_r50_fpn_4x4_2x_coco_20200203-2df792b1.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r50_fpn_4x4_2x_coco/fovea_r50_fpn_4x4_2x_coco_20200203_112043.log.json) | +| R-50 | pytorch | Y | N | 2x | 8.1 | 19.4 | 37.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco/fovea_align_r50_fpn_gn-head_4x4_2x_coco_20200203-8987880d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco/fovea_align_r50_fpn_gn-head_4x4_2x_coco_20200203_134252.log.json) | +| R-50 | pytorch | Y | Y | 2x | 8.1 | 18.3 | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco_20200205-85ce26cb.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco_20200205_112557.log.json) | +| R-101 | pytorch | N | N | 1x | 9.2 | 17.4 | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_r101_fpn_4x4_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r101_fpn_4x4_1x_coco/fovea_r101_fpn_4x4_1x_coco_20200219-05e38f1c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r101_fpn_4x4_1x_coco/fovea_r101_fpn_4x4_1x_coco_20200219_011740.log.json) | +| R-101 | pytorch | N | N | 2x | 11.7 | - | 40.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_r101_fpn_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r101_fpn_4x4_2x_coco/fovea_r101_fpn_4x4_2x_coco_20200208-02320ea4.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r101_fpn_4x4_2x_coco/fovea_r101_fpn_4x4_2x_coco_20200208_202059.log.json) | +| R-101 | pytorch | Y | N | 2x | 11.7 | 14.7 | 40.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_align_r101_fpn_gn-head_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r101_fpn_gn-head_4x4_2x_coco/fovea_align_r101_fpn_gn-head_4x4_2x_coco_20200208-c39a027a.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r101_fpn_gn-head_4x4_2x_coco/fovea_align_r101_fpn_gn-head_4x4_2x_coco_20200208_203337.log.json) | +| R-101 | pytorch | Y | Y | 2x | 11.7 | 14.7 | 42.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco_20200208-649c5eb6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco_20200208_202124.log.json) | + +[1] *1x and 2x mean the model is trained for 12 and 24 epochs, respectively.* \ +[2] *Align means utilizing deformable convolution to align the cls branch.* \ +[3] *All results are obtained with a single model and without any test time data augmentation.*\ +[4] *We use 4 GPUs for training.* + +Any pull requests or issues are welcome. + +## Citations + +Please consider citing our paper in your publications if the project helps your research. BibTeX reference is as follows. + +```latex +@article{kong2019foveabox, + title={FoveaBox: Beyond Anchor-based Object Detector}, + author={Kong, Tao and Sun, Fuchun and Liu, Huaping and Jiang, Yuning and Shi, Jianbo}, + journal={arXiv preprint arXiv:1904.03797}, + year={2019} +} +``` diff --git a/configs/foveabox/fovea_align_r101_fpn_gn-head_4x4_2x_coco.py b/configs/foveabox/fovea_align_r101_fpn_gn-head_4x4_2x_coco.py new file mode 100644 index 0000000..c5d1784 --- /dev/null +++ b/configs/foveabox/fovea_align_r101_fpn_gn-head_4x4_2x_coco.py @@ -0,0 +1,12 @@ +_base_ = './fovea_r50_fpn_4x4_1x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101')), + bbox_head=dict( + with_deform=True, + norm_cfg=dict(type='GN', num_groups=32, requires_grad=True))) +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/foveabox/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py b/configs/foveabox/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py new file mode 100644 index 0000000..cc5affe --- /dev/null +++ b/configs/foveabox/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py @@ -0,0 +1,29 @@ +_base_ = './fovea_r50_fpn_4x4_1x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101')), + bbox_head=dict( + with_deform=True, + norm_cfg=dict(type='GN', num_groups=32, requires_grad=True))) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +data = dict(train=dict(pipeline=train_pipeline)) +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py b/configs/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py new file mode 100644 index 0000000..e7265bc --- /dev/null +++ b/configs/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py @@ -0,0 +1,10 @@ +_base_ = './fovea_r50_fpn_4x4_1x_coco.py' +model = dict( + bbox_head=dict( + with_deform=True, + norm_cfg=dict(type='GN', num_groups=32, requires_grad=True))) +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py b/configs/foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py new file mode 100644 index 0000000..8fc39be --- /dev/null +++ b/configs/foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py @@ -0,0 +1,25 @@ +_base_ = './fovea_r50_fpn_4x4_1x_coco.py' +model = dict( + bbox_head=dict( + with_deform=True, + norm_cfg=dict(type='GN', num_groups=32, requires_grad=True))) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +data = dict(train=dict(pipeline=train_pipeline)) +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/foveabox/fovea_r101_fpn_4x4_1x_coco.py b/configs/foveabox/fovea_r101_fpn_4x4_1x_coco.py new file mode 100644 index 0000000..9201af1 --- /dev/null +++ b/configs/foveabox/fovea_r101_fpn_4x4_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = './fovea_r50_fpn_4x4_1x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/foveabox/fovea_r101_fpn_4x4_2x_coco.py b/configs/foveabox/fovea_r101_fpn_4x4_2x_coco.py new file mode 100644 index 0000000..1ef5243 --- /dev/null +++ b/configs/foveabox/fovea_r101_fpn_4x4_2x_coco.py @@ -0,0 +1,6 @@ +_base_ = './fovea_r50_fpn_4x4_2x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/foveabox/fovea_r50_fpn_4x4_1x_coco.py b/configs/foveabox/fovea_r50_fpn_4x4_1x_coco.py new file mode 100644 index 0000000..7e986eb --- /dev/null +++ b/configs/foveabox/fovea_r50_fpn_4x4_1x_coco.py @@ -0,0 +1,52 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# model settings +model = dict( + type='FOVEA', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + num_outs=5, + add_extra_convs='on_input'), + bbox_head=dict( + type='FoveaHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + strides=[8, 16, 32, 64, 128], + base_edge_list=[16, 32, 64, 128, 256], + scale_ranges=((1, 64), (32, 128), (64, 256), (128, 512), (256, 2048)), + sigma=0.4, + with_deform=False, + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=1.50, + alpha=0.4, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)), + # training and testing settings + train_cfg=dict(), + test_cfg=dict( + nms_pre=1000, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100)) +data = dict(samples_per_gpu=4, workers_per_gpu=4) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/foveabox/fovea_r50_fpn_4x4_2x_coco.py b/configs/foveabox/fovea_r50_fpn_4x4_2x_coco.py new file mode 100644 index 0000000..68ce4d2 --- /dev/null +++ b/configs/foveabox/fovea_r50_fpn_4x4_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './fovea_r50_fpn_4x4_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/foveabox/metafile.yml b/configs/foveabox/metafile.yml new file mode 100644 index 0000000..fe9a283 --- /dev/null +++ b/configs/foveabox/metafile.yml @@ -0,0 +1,172 @@ +Collections: + - Name: FoveaBox + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 4x V100 GPUs + Architecture: + - FPN + - ResNet + Paper: + URL: https://arxiv.org/abs/1904.03797 + Title: 'FoveaBox: Beyond Anchor-based Object Detector' + README: configs/foveabox/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/detectors/fovea.py#L6 + Version: v2.0.0 + +Models: + - Name: fovea_r50_fpn_4x4_1x_coco + In Collection: FoveaBox + Config: configs/foveabox/fovea_r50_fpn_4x4_1x_coco.py + Metadata: + Training Memory (GB): 5.6 + inference time (ms/im): + - value: 41.49 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 36.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r50_fpn_4x4_1x_coco/fovea_r50_fpn_4x4_1x_coco_20200219-ee4d5303.pth + + - Name: fovea_r50_fpn_4x4_2x_coco + In Collection: FoveaBox + Config: configs/foveabox/fovea_r50_fpn_4x4_2x_coco.py + Metadata: + Training Memory (GB): 5.6 + inference time (ms/im): + - value: 41.49 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 37.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r50_fpn_4x4_2x_coco/fovea_r50_fpn_4x4_2x_coco_20200203-2df792b1.pth + + - Name: fovea_align_r50_fpn_gn-head_4x4_2x_coco + In Collection: FoveaBox + Config: configs/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py + Metadata: + Training Memory (GB): 8.1 + inference time (ms/im): + - value: 51.55 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 37.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco/fovea_align_r50_fpn_gn-head_4x4_2x_coco_20200203-8987880d.pth + + - Name: fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco + In Collection: FoveaBox + Config: configs/foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py + Metadata: + Training Memory (GB): 8.1 + inference time (ms/im): + - value: 54.64 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco_20200205-85ce26cb.pth + + - Name: fovea_r101_fpn_4x4_1x_coco + In Collection: FoveaBox + Config: configs/foveabox/fovea_r101_fpn_4x4_1x_coco.py + Metadata: + Training Memory (GB): 9.2 + inference time (ms/im): + - value: 57.47 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 38.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r101_fpn_4x4_1x_coco/fovea_r101_fpn_4x4_1x_coco_20200219-05e38f1c.pth + + - Name: fovea_r101_fpn_4x4_2x_coco + In Collection: FoveaBox + Config: configs/foveabox/fovea_r101_fpn_4x4_2x_coco.py + Metadata: + Training Memory (GB): 11.7 + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r101_fpn_4x4_2x_coco/fovea_r101_fpn_4x4_2x_coco_20200208-02320ea4.pth + + - Name: fovea_align_r101_fpn_gn-head_4x4_2x_coco + In Collection: FoveaBox + Config: configs/foveabox/fovea_align_r101_fpn_gn-head_4x4_2x_coco.py + Metadata: + Training Memory (GB): 11.7 + inference time (ms/im): + - value: 68.03 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r101_fpn_gn-head_4x4_2x_coco/fovea_align_r101_fpn_gn-head_4x4_2x_coco_20200208-c39a027a.pth + + - Name: fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco + In Collection: FoveaBox + Config: configs/foveabox/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py + Metadata: + Training Memory (GB): 11.7 + inference time (ms/im): + - value: 68.03 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco_20200208-649c5eb6.pth diff --git a/configs/fp16/README.md b/configs/fp16/README.md new file mode 100644 index 0000000..171aa1b --- /dev/null +++ b/configs/fp16/README.md @@ -0,0 +1,24 @@ +# Mixed Precision Training + +## Introduction + + + +```latex +@article{micikevicius2017mixed, + title={Mixed precision training}, + author={Micikevicius, Paulius and Narang, Sharan and Alben, Jonah and Diamos, Gregory and Elsen, Erich and Garcia, David and Ginsburg, Boris and Houston, Michael and Kuchaiev, Oleksii and Venkatesh, Ganesh and others}, + journal={arXiv preprint arXiv:1710.03740}, + year={2017} +} +``` + +## Results and Models + +| Architecture | Backbone | Style | Conv | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +|:------------:|:---------:|:-------:|:------------:|:-------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| +| Faster R-CNN | R-50 | pytorch | - | 1x | 3.4 | 28.8 | 37.5 | - |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fp16/faster_rcnn_r50_fpn_fp16_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fp16/faster_rcnn_r50_fpn_fp16_1x_coco/faster_rcnn_r50_fpn_fp16_1x_coco_20200204-d4dc1471.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fp16/faster_rcnn_r50_fpn_fp16_1x_coco/faster_rcnn_r50_fpn_fp16_1x_coco_20200204_143530.log.json) | +| Mask R-CNN | R-50 | pytorch | - | 1x | 3.6 | 24.1 | 38.1 | 34.7 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fp16/mask_rcnn_r50_fpn_fp16_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_1x_coco/mask_rcnn_r50_fpn_fp16_1x_coco_20200205-59faf7e4.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_1x_coco/mask_rcnn_r50_fpn_fp16_1x_coco_20200205_130539.log.json) | +| Mask R-CNN | R-50 | pytorch | dconv(c3-c5) | 1x | 3.0 | | 41.9 | 37.5 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fp16/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco_20210520_180247-c06429d2.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco_20210520_180247.log.json) | +| Mask R-CNN | R-50 | pytorch | mdconv(c3-c5)| 1x | 3.1 | | 42.0 | 37.6 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fp16/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco_20210520_180434-cf8fefa5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco_20210520_180434.log.json) | +| Retinanet | R-50 | pytorch | - | 1x | 2.8 | 31.6 | 36.4 | |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fp16/retinanet_r50_fpn_fp16_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fp16/retinanet_r50_fpn_fp16_1x_coco/retinanet_r50_fpn_fp16_1x_coco_20200702-0dbfb212.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fp16/retinanet_r50_fpn_fp16_1x_coco/retinanet_r50_fpn_fp16_1x_coco_20200702_020127.log.json) | diff --git a/configs/fp16/faster_rcnn_r50_fpn_fp16_1x_coco.py b/configs/fp16/faster_rcnn_r50_fpn_fp16_1x_coco.py new file mode 100644 index 0000000..78fa5b6 --- /dev/null +++ b/configs/fp16/faster_rcnn_r50_fpn_fp16_1x_coco.py @@ -0,0 +1,3 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +# fp16 settings +fp16 = dict(loss_scale=512.) diff --git a/configs/fp16/mask_rcnn_r50_fpn_fp16_1x_coco.py b/configs/fp16/mask_rcnn_r50_fpn_fp16_1x_coco.py new file mode 100644 index 0000000..f506ea8 --- /dev/null +++ b/configs/fp16/mask_rcnn_r50_fpn_fp16_1x_coco.py @@ -0,0 +1,3 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +# fp16 settings +fp16 = dict(loss_scale=512.) diff --git a/configs/fp16/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco.py b/configs/fp16/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..ee5cca7 --- /dev/null +++ b/configs/fp16/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco.py @@ -0,0 +1,7 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) + +fp16 = dict(loss_scale=512.) diff --git a/configs/fp16/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco.py b/configs/fp16/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..7e21454 --- /dev/null +++ b/configs/fp16/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco.py @@ -0,0 +1,7 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True))) + +fp16 = dict(loss_scale=512.) diff --git a/configs/fp16/metafile.yml b/configs/fp16/metafile.yml new file mode 100644 index 0000000..70d6649 --- /dev/null +++ b/configs/fp16/metafile.yml @@ -0,0 +1,113 @@ +Collections: + - Name: FP16 + Metadata: + Training Data: COCO + Training Techniques: + - Mixed Precision Training + Training Resources: 8x V100 GPUs + Paper: + URL: https://arxiv.org/abs/1710.03740 + Title: 'Mixed Precision Training' + README: configs/fp16/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/core/fp16/hooks.py#L11 + Version: v2.0.0 + +Models: + - Name: faster_rcnn_r50_fpn_fp16_1x_coco + In Collection: FP16 + Config: configs/fp16/faster_rcnn_r50_fpn_fp16_1x_coco.py + Metadata: + Training Memory (GB): 3.4 + inference time (ms/im): + - value: 34.72 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP16 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 37.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/fp16/faster_rcnn_r50_fpn_fp16_1x_coco/faster_rcnn_r50_fpn_fp16_1x_coco_20200204-d4dc1471.pth + + - Name: mask_rcnn_r50_fpn_fp16_1x_coco + In Collection: FP16 + Config: configs/fp16/mask_rcnn_r50_fpn_fp16_1x_coco.py + Metadata: + Training Memory (GB): 3.6 + inference time (ms/im): + - value: 41.49 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP16 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 38.1 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 34.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_1x_coco/mask_rcnn_r50_fpn_fp16_1x_coco_20200205-59faf7e4.pth + + - Name: mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco + In Collection: FP16 + Config: configs/fp16/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 3.0 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.9 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco_20210520_180247-c06429d2.pth + + - Name: mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco + In Collection: FP16 + Config: configs/fp16/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 3.1 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.0 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco_20210520_180434-cf8fefa5.pth + + - Name: retinanet_r50_fpn_fp16_1x_coco + In Collection: FP16 + Config: configs/fp16/retinanet_r50_fpn_fp16_1x_coco.py + Metadata: + Training Memory (GB): 2.8 + inference time (ms/im): + - value: 31.65 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP16 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 36.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/fp16/retinanet_r50_fpn_fp16_1x_coco/retinanet_r50_fpn_fp16_1x_coco_20200702-0dbfb212.pth diff --git a/configs/fp16/retinanet_r50_fpn_fp16_1x_coco.py b/configs/fp16/retinanet_r50_fpn_fp16_1x_coco.py new file mode 100644 index 0000000..519c4db --- /dev/null +++ b/configs/fp16/retinanet_r50_fpn_fp16_1x_coco.py @@ -0,0 +1,3 @@ +_base_ = '../retinanet/retinanet_r50_fpn_1x_coco.py' +# fp16 settings +fp16 = dict(loss_scale=512.) diff --git a/configs/fpg/README.md b/configs/fpg/README.md new file mode 100644 index 0000000..c0545da --- /dev/null +++ b/configs/fpg/README.md @@ -0,0 +1,30 @@ +# Feature Pyramid Grids + + +## Introduction + +```latex +@article{chen2020feature, + title={Feature pyramid grids}, + author={Chen, Kai and Cao, Yuhang and Loy, Chen Change and Lin, Dahua and Feichtenhofer, Christoph}, + journal={arXiv preprint arXiv:2004.03580}, + year={2020} +} +``` + +## Results and Models + +We benchmark the new training schedule (crop training, large batch, unfrozen BN, 50 epochs) introduced in NAS-FPN. +All backbones are Resnet-50 in pytorch style. + +| Method | Neck | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +|:------------:|:-----------:|:-------:|:--------:|:--------------:|:------:|:-------:|:-------:|:--------:| +| Faster R-CNN | FPG | 50e | 20.0 | - | 42.2 | - |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fpg/faster_rcnn_r50_fpg_crop640_50e_coco.py) |[model](https://download.openmmlab.com/mmdetection/v2.0/fpg/faster_rcnn_r50_fpg_crop640_50e_coco/faster_rcnn_r50_fpg_crop640_50e_coco-76220505.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fpg/faster_rcnn_r50_fpg_crop640_50e_coco/20210218_223520.log.json) | +| Faster R-CNN | FPG-chn128 | 50e | 11.9 | - | 41.2 | - |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fpg/faster_rcnn_r50_fpg-chn128_crop640_50e_coco.py) |[model](https://download.openmmlab.com/mmdetection/v2.0/fpg/faster_rcnn_r50_fpg-chn128_crop640_50e_coco/faster_rcnn_r50_fpg-chn128_crop640_50e_coco-24257de9.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fpg/faster_rcnn_r50_fpg-chn128_crop640_50e_coco/20210218_221412.log.json) | +| Mask R-CNN | FPG | 50e | 23.2 | - | 42.7 | 37.8 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fpg/mask_rcnn_r50_fpg_crop640_50e_coco.py) |[model](https://download.openmmlab.com/mmdetection/v2.0/fpg/mask_rcnn_r50_fpg_crop640_50e_coco/mask_rcnn_r50_fpg_crop640_50e_coco-c5860453.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fpg/mask_rcnn_r50_fpg_crop640_50e_coco/20210222_205447.log.json) | +| Mask R-CNN | FPG-chn128 | 50e | 15.3 | - | 41.7 | 36.9 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fpg/mask_rcnn_r50_fpg-chn128_crop640_50e_coco.py) |[model](https://download.openmmlab.com/mmdetection/v2.0/fpg/mask_rcnn_r50_fpg-chn128_crop640_50e_coco/mask_rcnn_r50_fpg-chn128_crop640_50e_coco-5c6ea10d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fpg/mask_rcnn_r50_fpg-chn128_crop640_50e_coco/20210223_025039.log.json) | +| RetinaNet | FPG | 50e | 20.8 | - | 40.5 | - |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fpg/retinanet_r50_fpg_crop640_50e_coco.py) |[model](https://download.openmmlab.com/mmdetection/v2.0/fpg/retinanet_r50_fpg_crop640_50e_coco/retinanet_r50_fpg_crop640_50e_coco-46fdd1c6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fpg/retinanet_r50_fpg_crop640_50e_coco/20210225_143957.log.json) | +| RetinaNet | FPG-chn128 | 50e | 19.9 | - | 40.3 | - |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fpg/retinanet_r50_fpg-chn128_crop640_50e_coco.py) |[model](https://download.openmmlab.com/mmdetection/v2.0/fpg/retinanet_r50_fpg-chn128_crop640_50e_coco/retinanet_r50_fpg-chn128_crop640_50e_coco-5cf33c76.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fpg/retinanet_r50_fpg-chn128_crop640_50e_coco/20210225_184328.log.json) | + +**Note**: Chn128 means to decrease the number of channels of features and convs from 256 (default) to 128 in +Neck and BBox Head, which can greatly decrease memory consumption without sacrificing much precision. diff --git a/configs/fpg/faster_rcnn_r50_fpg-chn128_crop640_50e_coco.py b/configs/fpg/faster_rcnn_r50_fpg-chn128_crop640_50e_coco.py new file mode 100644 index 0000000..4535034 --- /dev/null +++ b/configs/fpg/faster_rcnn_r50_fpg-chn128_crop640_50e_coco.py @@ -0,0 +1,9 @@ +_base_ = 'faster_rcnn_r50_fpg_crop640_50e_coco.py' + +norm_cfg = dict(type='BN', requires_grad=True) +model = dict( + neck=dict(out_channels=128, inter_channels=128), + rpn_head=dict(in_channels=128), + roi_head=dict( + bbox_roi_extractor=dict(out_channels=128), + bbox_head=dict(in_channels=128))) diff --git a/configs/fpg/faster_rcnn_r50_fpg_crop640_50e_coco.py b/configs/fpg/faster_rcnn_r50_fpg_crop640_50e_coco.py new file mode 100644 index 0000000..3ab2a2c --- /dev/null +++ b/configs/fpg/faster_rcnn_r50_fpg_crop640_50e_coco.py @@ -0,0 +1,48 @@ +_base_ = 'faster_rcnn_r50_fpn_crop640_50e_coco.py' + +norm_cfg = dict(type='BN', requires_grad=True) +model = dict( + neck=dict( + type='FPG', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + inter_channels=256, + num_outs=5, + stack_times=9, + paths=['bu'] * 9, + same_down_trans=None, + same_up_trans=dict( + type='conv', + kernel_size=3, + stride=2, + padding=1, + norm_cfg=norm_cfg, + inplace=False, + order=('act', 'conv', 'norm')), + across_lateral_trans=dict( + type='conv', + kernel_size=1, + norm_cfg=norm_cfg, + inplace=False, + order=('act', 'conv', 'norm')), + across_down_trans=dict( + type='interpolation_conv', + mode='nearest', + kernel_size=3, + norm_cfg=norm_cfg, + order=('act', 'conv', 'norm'), + inplace=False), + across_up_trans=None, + across_skip_trans=dict( + type='conv', + kernel_size=1, + norm_cfg=norm_cfg, + inplace=False, + order=('act', 'conv', 'norm')), + output_trans=dict( + type='last_conv', + kernel_size=3, + order=('act', 'conv', 'norm'), + inplace=False), + norm_cfg=norm_cfg, + skip_inds=[(0, 1, 2, 3), (0, 1, 2), (0, 1), (0, ), ()])) diff --git a/configs/fpg/faster_rcnn_r50_fpn_crop640_50e_coco.py b/configs/fpg/faster_rcnn_r50_fpn_crop640_50e_coco.py new file mode 100644 index 0000000..95f4e91 --- /dev/null +++ b/configs/fpg/faster_rcnn_r50_fpn_crop640_50e_coco.py @@ -0,0 +1,68 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +norm_cfg = dict(type='BN', requires_grad=True) +model = dict( + backbone=dict(norm_cfg=norm_cfg, norm_eval=False), + neck=dict(norm_cfg=norm_cfg), + roi_head=dict(bbox_head=dict(norm_cfg=norm_cfg))) +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict( + type='Resize', + img_scale=(640, 640), + ratio_range=(0.8, 1.2), + keep_ratio=True), + dict(type='RandomCrop', crop_size=(640, 640)), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=(640, 640)), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(640, 640), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=64), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=8, + workers_per_gpu=4, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# learning policy +optimizer = dict( + type='SGD', + lr=0.08, + momentum=0.9, + weight_decay=0.0001, + paramwise_cfg=dict(norm_decay_mult=0, bypass_duplicate=True)) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=1000, + warmup_ratio=0.1, + step=[30, 40]) +# runtime settings +runner = dict(max_epochs=50) +evaluation = dict(interval=2) diff --git a/configs/fpg/mask_rcnn_r50_fpg-chn128_crop640_50e_coco.py b/configs/fpg/mask_rcnn_r50_fpg-chn128_crop640_50e_coco.py new file mode 100644 index 0000000..baa4a5a --- /dev/null +++ b/configs/fpg/mask_rcnn_r50_fpg-chn128_crop640_50e_coco.py @@ -0,0 +1,10 @@ +_base_ = 'mask_rcnn_r50_fpg_crop640_50e_coco.py' + +model = dict( + neck=dict(out_channels=128, inter_channels=128), + rpn_head=dict(in_channels=128), + roi_head=dict( + bbox_roi_extractor=dict(out_channels=128), + bbox_head=dict(in_channels=128), + mask_roi_extractor=dict(out_channels=128), + mask_head=dict(in_channels=128))) diff --git a/configs/fpg/mask_rcnn_r50_fpg_crop640_50e_coco.py b/configs/fpg/mask_rcnn_r50_fpg_crop640_50e_coco.py new file mode 100644 index 0000000..3c9ea27 --- /dev/null +++ b/configs/fpg/mask_rcnn_r50_fpg_crop640_50e_coco.py @@ -0,0 +1,48 @@ +_base_ = 'mask_rcnn_r50_fpn_crop640_50e_coco.py' + +norm_cfg = dict(type='BN', requires_grad=True) +model = dict( + neck=dict( + type='FPG', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + inter_channels=256, + num_outs=5, + stack_times=9, + paths=['bu'] * 9, + same_down_trans=None, + same_up_trans=dict( + type='conv', + kernel_size=3, + stride=2, + padding=1, + norm_cfg=norm_cfg, + inplace=False, + order=('act', 'conv', 'norm')), + across_lateral_trans=dict( + type='conv', + kernel_size=1, + norm_cfg=norm_cfg, + inplace=False, + order=('act', 'conv', 'norm')), + across_down_trans=dict( + type='interpolation_conv', + mode='nearest', + kernel_size=3, + norm_cfg=norm_cfg, + order=('act', 'conv', 'norm'), + inplace=False), + across_up_trans=None, + across_skip_trans=dict( + type='conv', + kernel_size=1, + norm_cfg=norm_cfg, + inplace=False, + order=('act', 'conv', 'norm')), + output_trans=dict( + type='last_conv', + kernel_size=3, + order=('act', 'conv', 'norm'), + inplace=False), + norm_cfg=norm_cfg, + skip_inds=[(0, 1, 2, 3), (0, 1, 2), (0, 1), (0, ), ()])) diff --git a/configs/fpg/mask_rcnn_r50_fpn_crop640_50e_coco.py b/configs/fpg/mask_rcnn_r50_fpn_crop640_50e_coco.py new file mode 100644 index 0000000..8dfdbb4 --- /dev/null +++ b/configs/fpg/mask_rcnn_r50_fpn_crop640_50e_coco.py @@ -0,0 +1,74 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +norm_cfg = dict(type='BN', requires_grad=True) +model = dict( + backbone=dict(norm_cfg=norm_cfg, norm_eval=False), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + norm_cfg=norm_cfg, + num_outs=5), + roi_head=dict( + bbox_head=dict(norm_cfg=norm_cfg), mask_head=dict(norm_cfg=norm_cfg))) +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict( + type='Resize', + img_scale=(640, 640), + ratio_range=(0.8, 1.2), + keep_ratio=True), + dict(type='RandomCrop', crop_size=(640, 640)), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=(640, 640)), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(640, 640), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=64), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=8, + workers_per_gpu=4, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# learning policy +optimizer = dict( + type='SGD', + lr=0.08, + momentum=0.9, + weight_decay=0.0001, + paramwise_cfg=dict(norm_decay_mult=0, bypass_duplicate=True)) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=1000, + warmup_ratio=0.1, + step=[30, 40]) +# runtime settings +runner = dict(max_epochs=50) +evaluation = dict(interval=2) diff --git a/configs/fpg/metafile.yml b/configs/fpg/metafile.yml new file mode 100644 index 0000000..885d857 --- /dev/null +++ b/configs/fpg/metafile.yml @@ -0,0 +1,104 @@ +Collections: + - Name: Feature Pyramid Grids + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - Feature Pyramid Grids + Paper: + URL: https://arxiv.org/abs/2004.03580 + Title: 'Feature Pyramid Grids' + README: configs/fpg/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.10.0/mmdet/models/necks/fpg.py#L101 + Version: v2.10.0 + +Models: + - Name: faster_rcnn_r50_fpg_crop640_50e_coco + In Collection: Feature Pyramid Grids + Config: configs/fpg/faster_rcnn_r50_fpg_crop640_50e_coco.py + Metadata: + Training Memory (GB): 20.0 + Epochs: 50 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/fpg/faster_rcnn_r50_fpg_crop640_50e_coco/faster_rcnn_r50_fpg_crop640_50e_coco-76220505.pth + + - Name: faster_rcnn_r50_fpg-chn128_crop640_50e_coco + In Collection: Feature Pyramid Grids + Config: configs/fpg/faster_rcnn_r50_fpg-chn128_crop640_50e_coco.py + Metadata: + Training Memory (GB): 11.9 + Epochs: 50 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/fpg/faster_rcnn_r50_fpg-chn128_crop640_50e_coco/faster_rcnn_r50_fpg-chn128_crop640_50e_coco-24257de9.pth + + - Name: mask_rcnn_r50_fpg_crop640_50e_coco + In Collection: Feature Pyramid Grids + Config: configs/fpg/mask_rcnn_r50_fpg_crop640_50e_coco.py + Metadata: + Training Memory (GB): 23.2 + Epochs: 50 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.7 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/fpg/mask_rcnn_r50_fpg_crop640_50e_coco/mask_rcnn_r50_fpg_crop640_50e_coco-c5860453.pth + + - Name: mask_rcnn_r50_fpg-chn128_crop640_50e_coco + In Collection: Feature Pyramid Grids + Config: configs/fpg/mask_rcnn_r50_fpg-chn128_crop640_50e_coco.py + Metadata: + Training Memory (GB): 15.3 + Epochs: 50 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.7 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 36.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/fpg/mask_rcnn_r50_fpg-chn128_crop640_50e_coco/mask_rcnn_r50_fpg-chn128_crop640_50e_coco-5c6ea10d.pth + + - Name: retinanet_r50_fpg_crop640_50e_coco + In Collection: Feature Pyramid Grids + Config: configs/fpg/retinanet_r50_fpg_crop640_50e_coco.py + Metadata: + Training Memory (GB): 20.8 + Epochs: 50 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/fpg/retinanet_r50_fpg_crop640_50e_coco/retinanet_r50_fpg_crop640_50e_coco-46fdd1c6.pth + + - Name: retinanet_r50_fpg-chn128_crop640_50e_coco + In Collection: Feature Pyramid Grids + Config: configs/fpg/retinanet_r50_fpg-chn128_crop640_50e_coco.py + Metadata: + Training Memory (GB): 19.9 + Epochs: 50 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/fpg/retinanet_r50_fpg-chn128_crop640_50e_coco/retinanet_r50_fpg-chn128_crop640_50e_coco-5cf33c76.pth diff --git a/configs/fpg/retinanet_r50_fpg-chn128_crop640_50e_coco.py b/configs/fpg/retinanet_r50_fpg-chn128_crop640_50e_coco.py new file mode 100644 index 0000000..9a6cf7e --- /dev/null +++ b/configs/fpg/retinanet_r50_fpg-chn128_crop640_50e_coco.py @@ -0,0 +1,5 @@ +_base_ = 'retinanet_r50_fpg_crop640_50e_coco.py' + +model = dict( + neck=dict(out_channels=128, inter_channels=128), + bbox_head=dict(in_channels=128)) diff --git a/configs/fpg/retinanet_r50_fpg_crop640_50e_coco.py b/configs/fpg/retinanet_r50_fpg_crop640_50e_coco.py new file mode 100644 index 0000000..504ed5e --- /dev/null +++ b/configs/fpg/retinanet_r50_fpg_crop640_50e_coco.py @@ -0,0 +1,53 @@ +_base_ = '../nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py' + +norm_cfg = dict(type='BN', requires_grad=True) +model = dict( + neck=dict( + _delete_=True, + type='FPG', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + inter_channels=256, + num_outs=5, + add_extra_convs=True, + start_level=1, + stack_times=9, + paths=['bu'] * 9, + same_down_trans=None, + same_up_trans=dict( + type='conv', + kernel_size=3, + stride=2, + padding=1, + norm_cfg=norm_cfg, + inplace=False, + order=('act', 'conv', 'norm')), + across_lateral_trans=dict( + type='conv', + kernel_size=1, + norm_cfg=norm_cfg, + inplace=False, + order=('act', 'conv', 'norm')), + across_down_trans=dict( + type='interpolation_conv', + mode='nearest', + kernel_size=3, + norm_cfg=norm_cfg, + order=('act', 'conv', 'norm'), + inplace=False), + across_up_trans=None, + across_skip_trans=dict( + type='conv', + kernel_size=1, + norm_cfg=norm_cfg, + inplace=False, + order=('act', 'conv', 'norm')), + output_trans=dict( + type='last_conv', + kernel_size=3, + order=('act', 'conv', 'norm'), + inplace=False), + norm_cfg=norm_cfg, + skip_inds=[(0, 1, 2, 3), (0, 1, 2), (0, 1), (0, ), ()])) + +evaluation = dict(interval=2) diff --git a/configs/free_anchor/README.md b/configs/free_anchor/README.md new file mode 100644 index 0000000..5014bc0 --- /dev/null +++ b/configs/free_anchor/README.md @@ -0,0 +1,27 @@ +# FreeAnchor: Learning to Match Anchors for Visual Object Detection + +## Introduction + + + +```latex +@inproceedings{zhang2019freeanchor, + title = {{FreeAnchor}: Learning to Match Anchors for Visual Object Detection}, + author = {Zhang, Xiaosong and Wan, Fang and Liu, Chang and Ji, Rongrong and Ye, Qixiang}, + booktitle = {Neural Information Processing Systems}, + year = {2019} +} +``` + +## Results and Models + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:--------:|:-------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| +| R-50 | pytorch | 1x | 4.9 | 18.4 | 38.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco/retinanet_free_anchor_r50_fpn_1x_coco_20200130-0f67375f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco/retinanet_free_anchor_r50_fpn_1x_coco_20200130_095625.log.json) | +| R-101 | pytorch | 1x | 6.8 | 14.9 | 40.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/free_anchor/retinanet_free_anchor_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_r101_fpn_1x_coco/retinanet_free_anchor_r101_fpn_1x_coco_20200130-358324e6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_r101_fpn_1x_coco/retinanet_free_anchor_r101_fpn_1x_coco_20200130_100723.log.json) | +| X-101-32x4d | pytorch | 1x | 8.1 | 11.1 | 41.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/free_anchor/retinanet_free_anchor_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_x101_32x4d_fpn_1x_coco/retinanet_free_anchor_x101_32x4d_fpn_1x_coco_20200130-d4846968.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_x101_32x4d_fpn_1x_coco/retinanet_free_anchor_x101_32x4d_fpn_1x_coco_20200130_095627.log.json) | + +**Notes:** + +- We use 8 GPUs with 2 images/GPU. +- For more settings and models, please refer to the [official repo](https://github.com/zhangxiaosong18/FreeAnchor). diff --git a/configs/free_anchor/metafile.yml b/configs/free_anchor/metafile.yml new file mode 100644 index 0000000..170fb5c --- /dev/null +++ b/configs/free_anchor/metafile.yml @@ -0,0 +1,79 @@ +Collections: + - Name: FreeAnchor + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - FreeAnchor + - ResNet + Paper: + URL: https://arxiv.org/abs/1909.02466 + Title: 'FreeAnchor: Learning to Match Anchors for Visual Object Detection' + README: configs/free_anchor/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/dense_heads/free_anchor_retina_head.py#L10 + Version: v2.0.0 + +Models: + - Name: retinanet_free_anchor_r50_fpn_1x_coco + In Collection: FreeAnchor + Config: configs/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py + Metadata: + Training Memory (GB): 4.9 + inference time (ms/im): + - value: 54.35 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 38.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco/retinanet_free_anchor_r50_fpn_1x_coco_20200130-0f67375f.pth + + - Name: retinanet_free_anchor_r101_fpn_1x_coco + In Collection: FreeAnchor + Config: configs/free_anchor/retinanet_free_anchor_r101_fpn_1x_coco.py + Metadata: + Training Memory (GB): 6.8 + inference time (ms/im): + - value: 67.11 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_r101_fpn_1x_coco/retinanet_free_anchor_r101_fpn_1x_coco_20200130-358324e6.pth + + - Name: retinanet_free_anchor_x101_32x4d_fpn_1x_coco + In Collection: FreeAnchor + Config: configs/free_anchor/retinanet_free_anchor_x101_32x4d_fpn_1x_coco.py + Metadata: + Training Memory (GB): 8.1 + inference time (ms/im): + - value: 90.09 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_x101_32x4d_fpn_1x_coco/retinanet_free_anchor_x101_32x4d_fpn_1x_coco_20200130-d4846968.pth diff --git a/configs/free_anchor/retinanet_free_anchor_r101_fpn_1x_coco.py b/configs/free_anchor/retinanet_free_anchor_r101_fpn_1x_coco.py new file mode 100644 index 0000000..f4aea53 --- /dev/null +++ b/configs/free_anchor/retinanet_free_anchor_r101_fpn_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = './retinanet_free_anchor_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py b/configs/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py new file mode 100644 index 0000000..28f983c --- /dev/null +++ b/configs/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py @@ -0,0 +1,22 @@ +_base_ = '../retinanet/retinanet_r50_fpn_1x_coco.py' +model = dict( + bbox_head=dict( + _delete_=True, + type='FreeAnchorRetinaHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=4, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[8, 16, 32, 64, 128]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.1, 0.1, 0.2, 0.2]), + loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=0.75))) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/free_anchor/retinanet_free_anchor_x101_32x4d_fpn_1x_coco.py b/configs/free_anchor/retinanet_free_anchor_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000..65f8a9e --- /dev/null +++ b/configs/free_anchor/retinanet_free_anchor_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = './retinanet_free_anchor_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) diff --git a/configs/fsaf/README.md b/configs/fsaf/README.md new file mode 100644 index 0000000..4d64392 --- /dev/null +++ b/configs/fsaf/README.md @@ -0,0 +1,45 @@ +# Feature Selective Anchor-Free Module for Single-Shot Object Detection + + + +FSAF is an anchor-free method published in CVPR2019 ([https://arxiv.org/pdf/1903.00621.pdf](https://arxiv.org/pdf/1903.00621.pdf)). +Actually it is equivalent to the anchor-based method with only one anchor at each feature map position in each FPN level. +And this is how we implemented it. +Only the anchor-free branch is released for its better compatibility with the current framework and less computational budget. + +In the original paper, feature maps within the central 0.2-0.5 area of a gt box are tagged as ignored. However, +it is empirically found that a hard threshold (0.2-0.2) gives a further gain on the performance. (see the table below) + +## Main Results + +### Results on R50/R101/X101-FPN + +| Backbone | ignore range | ms-train| Lr schd |Train Mem (GB)| Train time (s/iter) | Inf time (fps) | box AP | Config | Download | +|:----------:| :-------: |:-------:|:-------:|:------------:|:---------------:|:--------------:|:-------------:|:------:|:--------:| +| R-50 | 0.2-0.5 | N | 1x | 3.15 | 0.43 | 12.3 | 36.0 (35.9) | | [model](https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_pscale0.2_nscale0.5_r50_fpn_1x_coco/fsaf_pscale0.2_nscale0.5_r50_fpn_1x_coco_20200715-b555b0e0.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_pscale0.2_nscale0.5_r50_fpn_1x_coco/fsaf_pscale0.2_nscale0.5_r50_fpn_1x_coco_20200715_094657.log.json) | +| R-50 | 0.2-0.2 | N | 1x | 3.15 | 0.43 | 13.0 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf/fsaf_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r50_fpn_1x_coco/fsaf_r50_fpn_1x_coco-94ccc51f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r50_fpn_1x_coco/fsaf_r50_fpn_1x_coco_20200428_072327.log.json)| +| R-101 | 0.2-0.2 | N | 1x | 5.08 | 0.58 | 10.8 | 39.3 (37.9) | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf/fsaf_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r101_fpn_1x_coco/fsaf_r101_fpn_1x_coco-9e71098f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r101_fpn_1x_coco/fsaf_r101_fpn_1x_coco_20200428_160348.log.json)| +| X-101 | 0.2-0.2 | N | 1x | 9.38 | 1.23 | 5.6 | 42.4 (41.0) | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf/fsaf_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_x101_64x4d_fpn_1x_coco/fsaf_x101_64x4d_fpn_1x_coco-e3f6e6fd.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_x101_64x4d_fpn_1x_coco/fsaf_x101_64x4d_fpn_1x_coco_20200428_160424.log.json)| + +**Notes:** + +- *1x means the model is trained for 12 epochs.* +- *AP values in the brackets represent those reported in the original paper.* +- *All results are obtained with a single model and single-scale test.* +- *X-101 backbone represents ResNext-101-64x4d.* +- *All pretrained backbones use pytorch style.* +- *All models are trained on 8 Titan-XP gpus and tested on a single gpu.* + +## Citations + +BibTeX reference is as follows. + +```latex +@inproceedings{zhu2019feature, + title={Feature Selective Anchor-Free Module for Single-Shot Object Detection}, + author={Zhu, Chenchen and He, Yihui and Savvides, Marios}, + booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, + pages={840--849}, + year={2019} +} +``` diff --git a/configs/fsaf/fsaf_r101_fpn_1x_coco.py b/configs/fsaf/fsaf_r101_fpn_1x_coco.py new file mode 100644 index 0000000..12b49fe --- /dev/null +++ b/configs/fsaf/fsaf_r101_fpn_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = './fsaf_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/fsaf/fsaf_r50_fpn_1x_coco.py b/configs/fsaf/fsaf_r50_fpn_1x_coco.py new file mode 100644 index 0000000..67f3ec1 --- /dev/null +++ b/configs/fsaf/fsaf_r50_fpn_1x_coco.py @@ -0,0 +1,48 @@ +_base_ = '../retinanet/retinanet_r50_fpn_1x_coco.py' +# model settings +model = dict( + type='FSAF', + bbox_head=dict( + type='FSAFHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + reg_decoded_bbox=True, + # Only anchor-free branch is implemented. The anchor generator only + # generates 1 anchor at each feature point, as a substitute of the + # grid of features. + anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=1, + scales_per_octave=1, + ratios=[1.0], + strides=[8, 16, 32, 64, 128]), + bbox_coder=dict(_delete_=True, type='TBLRBBoxCoder', normalizer=4.0), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0, + reduction='none'), + loss_bbox=dict( + _delete_=True, + type='IoULoss', + eps=1e-6, + loss_weight=1.0, + reduction='none')), + # training and testing settings + train_cfg=dict( + assigner=dict( + _delete_=True, + type='CenterRegionAssigner', + pos_scale=0.2, + neg_scale=0.2, + min_pos_iof=0.01), + allowed_border=-1, + pos_weight=-1, + debug=False)) +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=10, norm_type=2)) diff --git a/configs/fsaf/fsaf_x101_64x4d_fpn_1x_coco.py b/configs/fsaf/fsaf_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000..89c0c63 --- /dev/null +++ b/configs/fsaf/fsaf_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './fsaf_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) diff --git a/configs/fsaf/metafile.yml b/configs/fsaf/metafile.yml new file mode 100644 index 0000000..5434e9a --- /dev/null +++ b/configs/fsaf/metafile.yml @@ -0,0 +1,80 @@ +Collections: + - Name: FSAF + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x Titan-XP GPUs + Architecture: + - FPN + - FSAF + - ResNet + Paper: + URL: https://arxiv.org/abs/1903.00621 + Title: 'Feature Selective Anchor-Free Module for Single-Shot Object Detection' + README: configs/fsaf/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/detectors/fsaf.py#L6 + Version: v2.1.0 + +Models: + - Name: fsaf_r50_fpn_1x_coco + In Collection: FSAF + Config: configs/fsaf/fsaf_r50_fpn_1x_coco.py + Metadata: + Training Memory (GB): 3.15 + inference time (ms/im): + - value: 76.92 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 37.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r50_fpn_1x_coco/fsaf_r50_fpn_1x_coco-94ccc51f.pth + + - Name: fsaf_r101_fpn_1x_coco + In Collection: FSAF + Config: configs/fsaf/fsaf_r101_fpn_1x_coco.py + Metadata: + Training Memory (GB): 5.08 + inference time (ms/im): + - value: 92.59 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.3 (37.9) + Weights: https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r101_fpn_1x_coco/fsaf_r101_fpn_1x_coco-9e71098f.pth + + - Name: fsaf_x101_64x4d_fpn_1x_coco + In Collection: FSAF + Config: configs/fsaf/fsaf_x101_64x4d_fpn_1x_coco.py + Metadata: + Training Memory (GB): 9.38 + inference time (ms/im): + - value: 178.57 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.4 (41.0) + Weights: https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_x101_64x4d_fpn_1x_coco/fsaf_x101_64x4d_fpn_1x_coco-e3f6e6fd.pth diff --git a/configs/gcnet/README.md b/configs/gcnet/README.md new file mode 100644 index 0000000..0ee6a94 --- /dev/null +++ b/configs/gcnet/README.md @@ -0,0 +1,59 @@ +# GCNet for Object Detection + +By [Yue Cao](http://yue-cao.me), [Jiarui Xu](http://jerryxu.net), [Stephen Lin](https://scholar.google.com/citations?user=c3PYmxUAAAAJ&hl=en), Fangyun Wei, [Han Hu](https://sites.google.com/site/hanhushomepage/). + +We provide config files to reproduce the results in the paper for +["GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond"](https://arxiv.org/abs/1904.11492) on COCO object detection. + +## Introduction + + + +**GCNet** is initially described in [arxiv](https://arxiv.org/abs/1904.11492). Via absorbing advantages of Non-Local Networks (NLNet) and Squeeze-Excitation Networks (SENet), GCNet provides a simple, fast and effective approach for global context modeling, which generally outperforms both NLNet and SENet on major benchmarks for various recognition tasks. + +## Citing GCNet + +```latex +@article{cao2019GCNet, + title={GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond}, + author={Cao, Yue and Xu, Jiarui and Lin, Stephen and Wei, Fangyun and Hu, Han}, + journal={arXiv preprint arXiv:1904.11492}, + year={2019} +} +``` + +## Results and models + +The results on COCO 2017val are shown in the below table. + +| Backbone | Model | Context | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------: | :--------------: | :------------: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | +| R-50-FPN | Mask | GC(c3-c5, r16) | 1x | 5.0 | | 39.7 | 35.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco_20200515_211915-187da160.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco_20200515_211915.log.json) | +| R-50-FPN | Mask | GC(c3-c5, r4) | 1x | 5.1 | 15.0 | 39.9 | 36.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco_20200204-17235656.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco_20200204_024626.log.json) | +| R-101-FPN | Mask | GC(c3-c5, r16) | 1x | 7.6 | 11.4 | 41.3 | 37.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco_20200205-e58ae947.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco_20200205_192835.log.json) | +| R-101-FPN | Mask | GC(c3-c5, r4) | 1x | 7.8 | 11.6 | 42.2 | 37.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco_20200206-af22dc9d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco_20200206_112128.log.json) | + +| Backbone | Model | Context | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------: | :--------------: | :------------: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :-------: | +| R-50-FPN | Mask | - | 1x | 4.4 | 16.6 | 38.4 | 34.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco_20200202-bb3eb55c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco_20200202_214122.log.json) | +| R-50-FPN | Mask | GC(c3-c5, r16) | 1x | 5.0 | 15.5 | 40.4 | 36.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200202-587b99aa.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200202_174907.log.json) | +| R-50-FPN | Mask | GC(c3-c5, r4) | 1x | 5.1 | 15.1 | 40.7 | 36.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200202-50b90e5c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200202_085547.log.json) | +| R-101-FPN | Mask | - | 1x | 6.4 | 13.3 | 40.5 | 36.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco_20200210-81658c8a.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco_20200210_220422.log.json) | +| R-101-FPN | Mask | GC(c3-c5, r16) | 1x | 7.6 | 12.0 | 42.2 | 37.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200207-945e77ca.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200207_015330.log.json) | +| R-101-FPN | Mask | GC(c3-c5, r4) | 1x | 7.8 | 11.8 | 42.2 | 37.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200206-8407a3f0.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200206_142508.log.json) | +| X-101-FPN | Mask | - | 1x | 7.6 | 11.3 | 42.4 | 37.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco_20200211-7584841c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco_20200211_054326.log.json) | +| X-101-FPN | Mask | GC(c3-c5, r16) | 1x | 8.8 | 9.8 | 43.5 | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200211-cbed3d2c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200211_164715.log.json) | +| X-101-FPN | Mask | GC(c3-c5, r4) | 1x | 9.0 | 9.7 | 43.9 | 39.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200212-68164964.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200212_070942.log.json) | +| X-101-FPN | Cascade Mask | - | 1x | 9.2 | 8.4 | 44.7 | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco_20200310-d5ad2a5e.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco_20200310_115217.log.json) | +| X-101-FPN | Cascade Mask | GC(c3-c5, r16) | 1x | 10.3 | 7.7 | 46.2 | 39.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200211-10bf2463.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200211_184154.log.json) | +| X-101-FPN | Cascade Mask | GC(c3-c5, r4) | 1x | 10.6 | | 46.4 | 40.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200703_180653-ed035291.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200703_180653.log.json) | +| X-101-FPN | DCN Cascade Mask | - | 1x | | | 47.5 | 40.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco_20210615_211019-abbc39ea.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco_20210615_211019.log.json)| +| X-101-FPN | DCN Cascade Mask | GC(c3-c5, r16) | 1x | | | 48.0 | 41.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco_20210615_215648-44aa598a.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco_20210615_215648.log.json) | +| X-101-FPN | DCN Cascade Mask | GC(c3-c5, r4) | 1x | | | 47.9 | 41.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco_20210615_161851-720338ec.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco_20210615_161851.log.json) | + +**Notes:** + +- The `SyncBN` is added in the backbone for all models in **Table 2**. +- `GC` denotes Global Context (GC) block is inserted after 1x1 conv of backbone. +- `DCN` denotes replace 3x3 conv with 3x3 Deformable Convolution in `c3-c5` stages of backbone. +- `r4` and `r16` denote ratio 4 and ratio 16 in GC block respectively. diff --git a/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco.py b/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco.py new file mode 100644 index 0000000..5118895 --- /dev/null +++ b/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = '../cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), norm_eval=False)) diff --git a/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco.py b/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..413499d --- /dev/null +++ b/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = '../dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), norm_eval=False)) diff --git a/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco.py b/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000..50689aa --- /dev/null +++ b/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco.py @@ -0,0 +1,11 @@ +_base_ = '../dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + plugins=[ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16), + stages=(False, True, True, True), + position='after_conv3') + ])) diff --git a/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco.py b/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000..1367231 --- /dev/null +++ b/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco.py @@ -0,0 +1,11 @@ +_base_ = '../dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + plugins=[ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 4), + stages=(False, True, True, True), + position='after_conv3') + ])) diff --git a/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py b/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000..50883ff --- /dev/null +++ b/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py @@ -0,0 +1,11 @@ +_base_ = '../cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + plugins=[ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16), + stages=(False, True, True, True), + position='after_conv3') + ])) diff --git a/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py b/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000..31fdd07 --- /dev/null +++ b/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py @@ -0,0 +1,11 @@ +_base_ = '../cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + plugins=[ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 4), + stages=(False, True, True, True), + position='after_conv3') + ])) diff --git a/configs/gcnet/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco.py b/configs/gcnet/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000..ad6ad47 --- /dev/null +++ b/configs/gcnet/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco.py @@ -0,0 +1,8 @@ +_base_ = '../mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py' +model = dict( + backbone=dict(plugins=[ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16), + stages=(False, True, True, True), + position='after_conv3') + ])) diff --git a/configs/gcnet/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco.py b/configs/gcnet/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000..29f9167 --- /dev/null +++ b/configs/gcnet/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco.py @@ -0,0 +1,8 @@ +_base_ = '../mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py' +model = dict( + backbone=dict(plugins=[ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 4), + stages=(False, True, True, True), + position='after_conv3') + ])) diff --git a/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco.py b/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco.py new file mode 100644 index 0000000..6e1c5d0 --- /dev/null +++ b/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = '../mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), norm_eval=False)) diff --git a/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py b/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000..781dba7 --- /dev/null +++ b/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py @@ -0,0 +1,11 @@ +_base_ = '../mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + plugins=[ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16), + stages=(False, True, True, True), + position='after_conv3') + ])) diff --git a/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py b/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000..32972de --- /dev/null +++ b/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py @@ -0,0 +1,11 @@ +_base_ = '../mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + plugins=[ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 4), + stages=(False, True, True, True), + position='after_conv3') + ])) diff --git a/configs/gcnet/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco.py b/configs/gcnet/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000..d299b69 --- /dev/null +++ b/configs/gcnet/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco.py @@ -0,0 +1,8 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict(plugins=[ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16), + stages=(False, True, True, True), + position='after_conv3') + ])) diff --git a/configs/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco.py b/configs/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000..5ac908e --- /dev/null +++ b/configs/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco.py @@ -0,0 +1,8 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict(plugins=[ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 4), + stages=(False, True, True, True), + position='after_conv3') + ])) diff --git a/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco.py b/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco.py new file mode 100644 index 0000000..0308a56 --- /dev/null +++ b/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), norm_eval=False)) diff --git a/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py b/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000..e04780c --- /dev/null +++ b/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py @@ -0,0 +1,11 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + plugins=[ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16), + stages=(False, True, True, True), + position='after_conv3') + ])) diff --git a/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py b/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000..980f819 --- /dev/null +++ b/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py @@ -0,0 +1,11 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + plugins=[ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 4), + stages=(False, True, True, True), + position='after_conv3') + ])) diff --git a/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco.py b/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco.py new file mode 100644 index 0000000..f0c96e5 --- /dev/null +++ b/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco.py @@ -0,0 +1,4 @@ +_base_ = '../mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), norm_eval=False)) diff --git a/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py b/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000..7fb8e82 --- /dev/null +++ b/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py @@ -0,0 +1,11 @@ +_base_ = '../mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + plugins=[ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16), + stages=(False, True, True, True), + position='after_conv3') + ])) diff --git a/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py b/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py new file mode 100644 index 0000000..b1ddbee --- /dev/null +++ b/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py @@ -0,0 +1,11 @@ +_base_ = '../mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + plugins=[ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 4), + stages=(False, True, True, True), + position='after_conv3') + ])) diff --git a/configs/gcnet/metafile.yml b/configs/gcnet/metafile.yml new file mode 100644 index 0000000..1281122 --- /dev/null +++ b/configs/gcnet/metafile.yml @@ -0,0 +1,440 @@ +Collections: + - Name: GCNet + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - Global Context Block + - FPN + - RPN + - ResNet + - ResNeXt + Paper: + URL: https://arxiv.org/abs/1904.11492 + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + README: configs/gcnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/ops/context_block.py#L13 + Version: v2.0.0 + +Models: + - Name: mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco + In Collection: GCNet + Config: configs/gcnet/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 5.0 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.7 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 35.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco_20200515_211915-187da160.pth + + - Name: mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco + In Collection: GCNet + Config: configs/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 5.1 + inference time (ms/im): + - value: 66.67 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.9 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 36.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco_20200204-17235656.pth + + - Name: mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco + In Collection: GCNet + Config: configs/gcnet/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 7.6 + inference time (ms/im): + - value: 87.72 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.3 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco_20200205-e58ae947.pth + + - Name: mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco + In Collection: GCNet + Config: configs/gcnet/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 7.8 + inference time (ms/im): + - value: 86.21 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.2 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco_20200206-af22dc9d.pth + + - Name: mask_rcnn_r50_fpn_syncbn-backbone_1x_coco + In Collection: GCNet + Config: configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco.py + Metadata: + Training Memory (GB): 4.4 + inference time (ms/im): + - value: 60.24 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 38.4 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 34.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco_20200202-bb3eb55c.pth + + - Name: mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco + In Collection: GCNet + Config: configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 5.0 + inference time (ms/im): + - value: 64.52 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.4 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 36.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200202-587b99aa.pth + + - Name: mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco + In Collection: GCNet + Config: configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 5.1 + inference time (ms/im): + - value: 66.23 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.7 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 36.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200202-50b90e5c.pth + + - Name: mask_rcnn_r101_fpn_syncbn-backbone_1x_coco + In Collection: GCNet + Config: configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco.py + Metadata: + Training Memory (GB): 6.4 + inference time (ms/im): + - value: 75.19 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.5 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 36.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco_20200210-81658c8a.pth + + - Name: mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco + In Collection: GCNet + Config: configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 7.6 + inference time (ms/im): + - value: 83.33 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.2 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200207-945e77ca.pth + + - Name: mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco + In Collection: GCNet + Config: configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 7.8 + inference time (ms/im): + - value: 84.75 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.2 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200206-8407a3f0.pth + + - Name: mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco + In Collection: GCNet + Config: configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco.py + Metadata: + Training Memory (GB): 7.6 + inference time (ms/im): + - value: 88.5 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.4 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco_20200211-7584841c.pth + + - Name: mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco + In Collection: GCNet + Config: configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 8.8 + inference time (ms/im): + - value: 102.04 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.5 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200211-cbed3d2c.pth + + - Name: mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco + In Collection: GCNet + Config: configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 9.0 + inference time (ms/im): + - value: 103.09 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.9 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 39.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200212-68164964.pth + + - Name: cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco + In Collection: GCNet + Config: configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco.py + Metadata: + Training Memory (GB): 9.2 + inference time (ms/im): + - value: 119.05 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 44.7 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco_20200310-d5ad2a5e.pth + + - Name: cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco + In Collection: GCNet + Config: configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 10.3 + inference time (ms/im): + - value: 129.87 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 46.2 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 39.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200211-10bf2463.pth + + - Name: cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco + In Collection: GCNet + Config: configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 10.6 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 46.4 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 40.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200703_180653-ed035291.pth + + - Name: cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco + In Collection: GCNet + Config: configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 47.5 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 40.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco_20210615_211019-abbc39ea.pth + + - Name: cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco + In Collection: GCNet + Config: configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 48.0 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 41.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco_20210615_215648-44aa598a.pth + + - Name: cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco + In Collection: GCNet + Config: configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 47.9 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 41.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco_20210615_161851-720338ec.pth diff --git a/configs/gfl/README.md b/configs/gfl/README.md new file mode 100644 index 0000000..51a6aa1 --- /dev/null +++ b/configs/gfl/README.md @@ -0,0 +1,32 @@ +# Generalized Focal Loss: Learning Qualified and Distributed Bounding Boxes for Dense Object Detection + +## Introduction + + + +We provide config files to reproduce the object detection results in the paper [Generalized Focal Loss: Learning Qualified and Distributed Bounding Boxes for Dense Object Detection](https://arxiv.org/abs/2006.04388) + +```latex +@article{li2020generalized, + title={Generalized Focal Loss: Learning Qualified and Distributed Bounding Boxes for Dense Object Detection}, + author={Li, Xiang and Wang, Wenhai and Wu, Lijun and Chen, Shuo and Hu, Xiaolin and Li, Jun and Tang, Jinhui and Yang, Jian}, + journal={arXiv preprint arXiv:2006.04388}, + year={2020} +} +``` + +## Results and Models + +| Backbone | Style | Lr schd | Multi-scale Training| Inf time (fps) | box AP | Config | Download | +|:-----------------:|:-------:|:-------:|:-------------------:|:--------------:|:------:|:------:|:--------:| +| R-50 | pytorch | 1x | No | 19.5 | 40.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl/gfl_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r50_fpn_1x_coco/gfl_r50_fpn_1x_coco_20200629_121244-25944287.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r50_fpn_1x_coco/gfl_r50_fpn_1x_coco_20200629_121244.log.json) | +| R-50 | pytorch | 2x | Yes | 19.5 | 42.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl/gfl_r50_fpn_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r50_fpn_mstrain_2x_coco/gfl_r50_fpn_mstrain_2x_coco_20200629_213802-37bb1edc.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r50_fpn_mstrain_2x_coco/gfl_r50_fpn_mstrain_2x_coco_20200629_213802.log.json) | +| R-101 | pytorch | 2x | Yes | 14.7 | 44.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl/gfl_r101_fpn_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_mstrain_2x_coco/gfl_r101_fpn_mstrain_2x_coco_20200629_200126-dd12f847.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_mstrain_2x_coco/gfl_r101_fpn_mstrain_2x_coco_20200629_200126.log.json) | +| R-101-dcnv2 | pytorch | 2x | Yes | 12.9 | 47.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco_20200630_102002-134b07df.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco_20200630_102002.log.json) | +| X-101-32x4d | pytorch | 2x | Yes | 12.1 | 45.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl/gfl_x101_32x4d_fpn_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_x101_32x4d_fpn_mstrain_2x_coco/gfl_x101_32x4d_fpn_mstrain_2x_coco_20200630_102002-50c1ffdb.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_x101_32x4d_fpn_mstrain_2x_coco/gfl_x101_32x4d_fpn_mstrain_2x_coco_20200630_102002.log.json) | +| X-101-32x4d-dcnv2 | pytorch | 2x | Yes | 10.7 | 48.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco_20200630_102002-14a2bf25.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco_20200630_102002.log.json) | + +[1] *1x and 2x mean the model is trained for 90K and 180K iterations, respectively.* \ +[2] *All results are obtained with a single model and without any test time data augmentation such as multi-scale, flipping and etc..* \ +[3] *`dcnv2` denotes deformable convolutional networks v2.* \ +[4] *FPS is tested with a single GeForce RTX 2080Ti GPU, using a batch size of 1.* diff --git a/configs/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco.py b/configs/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco.py new file mode 100644 index 0000000..b72c2b6 --- /dev/null +++ b/configs/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco.py @@ -0,0 +1,15 @@ +_base_ = './gfl_r50_fpn_mstrain_2x_coco.py' +model = dict( + backbone=dict( + type='ResNet', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/gfl/gfl_r101_fpn_mstrain_2x_coco.py b/configs/gfl/gfl_r101_fpn_mstrain_2x_coco.py new file mode 100644 index 0000000..e33b5c0 --- /dev/null +++ b/configs/gfl/gfl_r101_fpn_mstrain_2x_coco.py @@ -0,0 +1,13 @@ +_base_ = './gfl_r50_fpn_mstrain_2x_coco.py' +model = dict( + backbone=dict( + type='ResNet', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/gfl/gfl_r50_fpn_1x_coco.py b/configs/gfl/gfl_r50_fpn_1x_coco.py new file mode 100644 index 0000000..cfd4b02 --- /dev/null +++ b/configs/gfl/gfl_r50_fpn_1x_coco.py @@ -0,0 +1,57 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + type='GFL', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs='on_output', + num_outs=5), + bbox_head=dict( + type='GFLHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + octave_base_scale=8, + scales_per_octave=1, + strides=[8, 16, 32, 64, 128]), + loss_cls=dict( + type='QualityFocalLoss', + use_sigmoid=True, + beta=2.0, + loss_weight=1.0), + loss_dfl=dict(type='DistributionFocalLoss', loss_weight=0.25), + reg_max=16, + loss_bbox=dict(type='GIoULoss', loss_weight=2.0)), + # training and testing settings + train_cfg=dict( + assigner=dict(type='ATSSAssigner', topk=9), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.6), + max_per_img=100)) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/gfl/gfl_r50_fpn_mstrain_2x_coco.py b/configs/gfl/gfl_r50_fpn_mstrain_2x_coco.py new file mode 100644 index 0000000..b8be601 --- /dev/null +++ b/configs/gfl/gfl_r50_fpn_mstrain_2x_coco.py @@ -0,0 +1,22 @@ +_base_ = './gfl_r50_fpn_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) +# multi-scale training +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 480), (1333, 800)], + multiscale_mode='range', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +data = dict(train=dict(pipeline=train_pipeline)) diff --git a/configs/gfl/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco.py b/configs/gfl/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco.py new file mode 100644 index 0000000..2539807 --- /dev/null +++ b/configs/gfl/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco.py @@ -0,0 +1,18 @@ +_base_ = './gfl_r50_fpn_mstrain_2x_coco.py' +model = dict( + type='GFL', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, False, True, True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) diff --git a/configs/gfl/gfl_x101_32x4d_fpn_mstrain_2x_coco.py b/configs/gfl/gfl_x101_32x4d_fpn_mstrain_2x_coco.py new file mode 100644 index 0000000..effda19 --- /dev/null +++ b/configs/gfl/gfl_x101_32x4d_fpn_mstrain_2x_coco.py @@ -0,0 +1,16 @@ +_base_ = './gfl_r50_fpn_mstrain_2x_coco.py' +model = dict( + type='GFL', + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) diff --git a/configs/gfl/metafile.yml b/configs/gfl/metafile.yml new file mode 100644 index 0000000..8f049c6 --- /dev/null +++ b/configs/gfl/metafile.yml @@ -0,0 +1,134 @@ +Collections: + - Name: Generalized Focal Loss + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - Generalized Focal Loss + - FPN + - ResNet + Paper: + URL: https://arxiv.org/abs/2006.04388 + Title: 'Generalized Focal Loss: Learning Qualified and Distributed Bounding Boxes for Dense Object Detection' + README: configs/gfl/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.2.0/mmdet/models/detectors/gfl.py#L6 + Version: v2.2.0 + +Models: + - Name: gfl_r50_fpn_1x_coco + In Collection: Generalized Focal Loss + Config: configs/gfl/gfl_r50_fpn_1x_coco.py + Metadata: + inference time (ms/im): + - value: 51.28 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r50_fpn_1x_coco/gfl_r50_fpn_1x_coco_20200629_121244-25944287.pth + + - Name: gfl_r50_fpn_mstrain_2x_coco + In Collection: Generalized Focal Loss + Config: configs/gfl/gfl_r50_fpn_mstrain_2x_coco.py + Metadata: + inference time (ms/im): + - value: 51.28 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r50_fpn_mstrain_2x_coco/gfl_r50_fpn_mstrain_2x_coco_20200629_213802-37bb1edc.pth + + - Name: gfl_r101_fpn_mstrain_2x_coco + In Collection: Generalized Focal Loss + Config: configs/gfl/gfl_r101_fpn_mstrain_2x_coco.py + Metadata: + inference time (ms/im): + - value: 68.03 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 44.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_mstrain_2x_coco/gfl_r101_fpn_mstrain_2x_coco_20200629_200126-dd12f847.pth + + - Name: gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco + In Collection: Generalized Focal Loss + Config: configs/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco.py + Metadata: + inference time (ms/im): + - value: 77.52 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 47.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco_20200630_102002-134b07df.pth + + - Name: gfl_x101_32x4d_fpn_mstrain_2x_coco + In Collection: Generalized Focal Loss + Config: configs/gfl/gfl_x101_32x4d_fpn_mstrain_2x_coco.py + Metadata: + inference time (ms/im): + - value: 82.64 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 45.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_x101_32x4d_fpn_mstrain_2x_coco/gfl_x101_32x4d_fpn_mstrain_2x_coco_20200630_102002-50c1ffdb.pth + + - Name: gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco + In Collection: Generalized Focal Loss + Config: configs/gfl/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco.py + Metadata: + inference time (ms/im): + - value: 93.46 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 48.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco_20200630_102002-14a2bf25.pth diff --git a/configs/ghm/README.md b/configs/ghm/README.md new file mode 100644 index 0000000..2a51ed8 --- /dev/null +++ b/configs/ghm/README.md @@ -0,0 +1,23 @@ +# Gradient Harmonized Single-stage Detector + +## Introduction + + + +``` +@inproceedings{li2019gradient, + title={Gradient Harmonized Single-stage Detector}, + author={Li, Buyu and Liu, Yu and Wang, Xiaogang}, + booktitle={AAAI Conference on Artificial Intelligence}, + year={2019} +} +``` + +## Results and Models + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | +| R-50-FPN | pytorch | 1x | 4.0 | 3.3 | 37.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ghm/retinanet_ghm_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_r50_fpn_1x_coco/retinanet_ghm_r50_fpn_1x_coco_20200130-a437fda3.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_r50_fpn_1x_coco/retinanet_ghm_r50_fpn_1x_coco_20200130_004213.log.json) | +| R-101-FPN | pytorch | 1x | 6.0 | 4.4 | 39.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ghm/retinanet_ghm_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_r101_fpn_1x_coco/retinanet_ghm_r101_fpn_1x_coco_20200130-c148ee8f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_r101_fpn_1x_coco/retinanet_ghm_r101_fpn_1x_coco_20200130_145259.log.json) | +| X-101-32x4d-FPN | pytorch | 1x | 7.2 | 5.1 | 40.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ghm/retinanet_ghm_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_x101_32x4d_fpn_1x_coco/retinanet_ghm_x101_32x4d_fpn_1x_coco_20200131-e4333bd0.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_x101_32x4d_fpn_1x_coco/retinanet_ghm_x101_32x4d_fpn_1x_coco_20200131_113653.log.json) | +| X-101-64x4d-FPN | pytorch | 1x | 10.3 | 5.2 | 41.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ghm/retinanet_ghm_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_x101_64x4d_fpn_1x_coco/retinanet_ghm_x101_64x4d_fpn_1x_coco_20200131-dd381cef.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_x101_64x4d_fpn_1x_coco/retinanet_ghm_x101_64x4d_fpn_1x_coco_20200131_113723.log.json) | diff --git a/configs/ghm/metafile.yml b/configs/ghm/metafile.yml new file mode 100644 index 0000000..b4f488c --- /dev/null +++ b/configs/ghm/metafile.yml @@ -0,0 +1,101 @@ +Collections: + - Name: GHM + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - GHM-C + - GHM-R + - FPN + - ResNet + Paper: + URL: https://arxiv.org/abs/1811.05181 + Title: 'Gradient Harmonized Single-stage Detector' + README: configs/ghm/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/losses/ghm_loss.py#L21 + Version: v2.0.0 + +Models: + - Name: retinanet_ghm_r50_fpn_1x_coco + In Collection: GHM + Config: configs/ghm/retinanet_ghm_r50_fpn_1x_coco.py + Metadata: + Training Memory (GB): 4.0 + inference time (ms/im): + - value: 303.03 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 37.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_r50_fpn_1x_coco/retinanet_ghm_r50_fpn_1x_coco_20200130-a437fda3.pth + + - Name: retinanet_ghm_r101_fpn_1x_coco + In Collection: GHM + Config: configs/ghm/retinanet_ghm_r101_fpn_1x_coco.py + Metadata: + Training Memory (GB): 6.0 + inference time (ms/im): + - value: 227.27 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_r101_fpn_1x_coco/retinanet_ghm_r101_fpn_1x_coco_20200130-c148ee8f.pth + + - Name: retinanet_ghm_x101_32x4d_fpn_1x_coco + In Collection: GHM + Config: configs/ghm/retinanet_ghm_x101_32x4d_fpn_1x_coco.py + Metadata: + Training Memory (GB): 7.2 + inference time (ms/im): + - value: 196.08 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_x101_32x4d_fpn_1x_coco/retinanet_ghm_x101_32x4d_fpn_1x_coco_20200131-e4333bd0.pth + + - Name: retinanet_ghm_x101_64x4d_fpn_1x_coco + In Collection: GHM + Config: configs/ghm/retinanet_ghm_x101_64x4d_fpn_1x_coco.py + Metadata: + Training Memory (GB): 10.3 + inference time (ms/im): + - value: 192.31 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_x101_64x4d_fpn_1x_coco/retinanet_ghm_x101_64x4d_fpn_1x_coco_20200131-dd381cef.pth diff --git a/configs/ghm/retinanet_ghm_r101_fpn_1x_coco.py b/configs/ghm/retinanet_ghm_r101_fpn_1x_coco.py new file mode 100644 index 0000000..aaf6fc2 --- /dev/null +++ b/configs/ghm/retinanet_ghm_r101_fpn_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = './retinanet_ghm_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/ghm/retinanet_ghm_r50_fpn_1x_coco.py b/configs/ghm/retinanet_ghm_r50_fpn_1x_coco.py new file mode 100644 index 0000000..61b9751 --- /dev/null +++ b/configs/ghm/retinanet_ghm_r50_fpn_1x_coco.py @@ -0,0 +1,19 @@ +_base_ = '../retinanet/retinanet_r50_fpn_1x_coco.py' +model = dict( + bbox_head=dict( + loss_cls=dict( + _delete_=True, + type='GHMC', + bins=30, + momentum=0.75, + use_sigmoid=True, + loss_weight=1.0), + loss_bbox=dict( + _delete_=True, + type='GHMR', + mu=0.02, + bins=10, + momentum=0.7, + loss_weight=10.0))) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/ghm/retinanet_ghm_x101_32x4d_fpn_1x_coco.py b/configs/ghm/retinanet_ghm_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000..cd2e4cc --- /dev/null +++ b/configs/ghm/retinanet_ghm_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './retinanet_ghm_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) diff --git a/configs/ghm/retinanet_ghm_x101_64x4d_fpn_1x_coco.py b/configs/ghm/retinanet_ghm_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000..b6107d8 --- /dev/null +++ b/configs/ghm/retinanet_ghm_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './retinanet_ghm_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) diff --git a/configs/gn+ws/README.md b/configs/gn+ws/README.md new file mode 100644 index 0000000..8001b0d --- /dev/null +++ b/configs/gn+ws/README.md @@ -0,0 +1,44 @@ +# Weight Standardization + +## Introduction + + + +``` +@article{weightstandardization, + author = {Siyuan Qiao and Huiyu Wang and Chenxi Liu and Wei Shen and Alan Yuille}, + title = {Weight Standardization}, + journal = {arXiv preprint arXiv:1903.10520}, + year = {2019}, +} +``` + +## Results and Models + +Faster R-CNN + +| Backbone | Style | Normalization | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +|:---------:|:-------:|:-------------:|:-------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| +| R-50-FPN | pytorch | GN+WS | 1x | 5.9 | 11.7 | 39.7 | - | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco/faster_rcnn_r50_fpn_gn_ws-all_1x_coco_20200130-613d9fe2.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco/faster_rcnn_r50_fpn_gn_ws-all_1x_coco_20200130_210936.log.json) | +| R-101-FPN | pytorch | GN+WS | 1x | 8.9 | 9.0 | 41.7 | - | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/faster_rcnn_r101_fpn_gn_ws-all_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_r101_fpn_gn_ws-all_1x_coco/faster_rcnn_r101_fpn_gn_ws-all_1x_coco_20200205-a93b0d75.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_r101_fpn_gn_ws-all_1x_coco/faster_rcnn_r101_fpn_gn_ws-all_1x_coco_20200205_232146.log.json) | +| X-50-32x4d-FPN | pytorch | GN+WS | 1x | 7.0 | 10.3 | 40.7 | - | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco_20200203-839c5d9d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco_20200203_220113.log.json) | +| X-101-32x4d-FPN | pytorch | GN+WS | 1x | 10.8 | 7.6 | 42.1 | - | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco_20200212-27da1bc2.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco_20200212_195302.log.json) | + +Mask R-CNN + +| Backbone | Style | Normalization | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +|:---------:|:-------:|:-------------:|:---------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| +| R-50-FPN | pytorch | GN+WS | 2x | 7.3 | 10.5 | 40.6 | 36.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco/mask_rcnn_r50_fpn_gn_ws-all_2x_coco_20200226-16acb762.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco/mask_rcnn_r50_fpn_gn_ws-all_2x_coco_20200226_062128.log.json) | +| R-101-FPN | pytorch | GN+WS | 2x | 10.3 | 8.6 | 42.0 | 37.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_2x_coco/mask_rcnn_r101_fpn_gn_ws-all_2x_coco_20200212-ea357cd9.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_2x_coco/mask_rcnn_r101_fpn_gn_ws-all_2x_coco_20200212_213627.log.json) | +| X-50-32x4d-FPN | pytorch | GN+WS | 2x | 8.4 | 9.3 | 41.1 | 37.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco_20200216-649fdb6f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco_20200216_201500.log.json) | +| X-101-32x4d-FPN | pytorch | GN+WS | 2x | 12.2 | 7.1 | 42.1 | 37.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco_20200319-33fb95b5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco_20200319_104101.log.json) | +| R-50-FPN | pytorch | GN+WS | 20-23-24e | 7.3 | - | 41.1 | 37.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco_20200213-487d1283.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco_20200213_035123.log.json) | +| R-101-FPN | pytorch | GN+WS | 20-23-24e | 10.3 | - | 43.1 | 38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco_20200213-57b5a50f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco_20200213_130142.log.json) | +| X-50-32x4d-FPN | pytorch | GN+WS | 20-23-24e | 8.4 | - | 42.1 | 38.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco_20200226-969bcb2c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco_20200226_093732.log.json) | +| X-101-32x4d-FPN | pytorch | GN+WS | 20-23-24e | 12.2 | - | 42.7 | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco_20200316-e6cd35ef.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco_20200316_013741.log.json) | + +Note: + +- GN+WS requires about 5% more memory than GN, and it is only 5% slower than GN. +- In the paper, a 20-23-24e lr schedule is used instead of 2x. +- The X-50-GN and X-101-GN pretrained models are also shared by the authors. diff --git a/configs/gn+ws/faster_rcnn_r101_fpn_gn_ws-all_1x_coco.py b/configs/gn+ws/faster_rcnn_r101_fpn_gn_ws-all_1x_coco.py new file mode 100644 index 0000000..cd2cb2b --- /dev/null +++ b/configs/gn+ws/faster_rcnn_r101_fpn_gn_ws-all_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = './faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://jhu/resnet101_gn_ws'))) diff --git a/configs/gn+ws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py b/configs/gn+ws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py new file mode 100644 index 0000000..1b326b8 --- /dev/null +++ b/configs/gn+ws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py @@ -0,0 +1,16 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +conv_cfg = dict(type='ConvWS') +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + backbone=dict( + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://jhu/resnet50_gn_ws')), + neck=dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg), + roi_head=dict( + bbox_head=dict( + type='Shared4Conv1FCBBoxHead', + conv_out_channels=256, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg))) diff --git a/configs/gn+ws/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco.py b/configs/gn+ws/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco.py new file mode 100644 index 0000000..f64ae89 --- /dev/null +++ b/configs/gn+ws/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco.py @@ -0,0 +1,18 @@ +_base_ = './faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py' +conv_cfg = dict(type='ConvWS') +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch', + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://jhu/resnext101_32x4d_gn_ws'))) diff --git a/configs/gn+ws/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco.py b/configs/gn+ws/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco.py new file mode 100644 index 0000000..246851b --- /dev/null +++ b/configs/gn+ws/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco.py @@ -0,0 +1,18 @@ +_base_ = './faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py' +conv_cfg = dict(type='ConvWS') +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + backbone=dict( + type='ResNeXt', + depth=50, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch', + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://jhu/resnext50_32x4d_gn_ws'))) diff --git a/configs/gn+ws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco.py b/configs/gn+ws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco.py new file mode 100644 index 0000000..a790d93 --- /dev/null +++ b/configs/gn+ws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco.py @@ -0,0 +1,4 @@ +_base_ = './mask_rcnn_r101_fpn_gn_ws-all_2x_coco.py' +# learning policy +lr_config = dict(step=[20, 23]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/gn+ws/mask_rcnn_r101_fpn_gn_ws-all_2x_coco.py b/configs/gn+ws/mask_rcnn_r101_fpn_gn_ws-all_2x_coco.py new file mode 100644 index 0000000..a9fa6a2 --- /dev/null +++ b/configs/gn+ws/mask_rcnn_r101_fpn_gn_ws-all_2x_coco.py @@ -0,0 +1,6 @@ +_base_ = './mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://jhu/resnet101_gn_ws'))) diff --git a/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco.py b/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco.py new file mode 100644 index 0000000..5516808 --- /dev/null +++ b/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco.py @@ -0,0 +1,4 @@ +_base_ = './mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py' +# learning policy +lr_config = dict(step=[20, 23]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py b/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py new file mode 100644 index 0000000..63be60f --- /dev/null +++ b/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py @@ -0,0 +1,20 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +conv_cfg = dict(type='ConvWS') +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + backbone=dict( + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://jhu/resnet50_gn_ws')), + neck=dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg), + roi_head=dict( + bbox_head=dict( + type='Shared4Conv1FCBBoxHead', + conv_out_channels=256, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg), + mask_head=dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg))) +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco.py b/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco.py new file mode 100644 index 0000000..cfa14c9 --- /dev/null +++ b/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco.py @@ -0,0 +1,4 @@ +_base_ = './mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco.py' +# learning policy +lr_config = dict(step=[20, 23]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco.py b/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco.py new file mode 100644 index 0000000..6498b03 --- /dev/null +++ b/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco.py @@ -0,0 +1,19 @@ +_base_ = './mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py' +# model settings +conv_cfg = dict(type='ConvWS') +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch', + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://jhu/resnext101_32x4d_gn_ws'))) diff --git a/configs/gn+ws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco.py b/configs/gn+ws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco.py new file mode 100644 index 0000000..79ce0ad --- /dev/null +++ b/configs/gn+ws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco.py @@ -0,0 +1,4 @@ +_base_ = './mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco.py' +# learning policy +lr_config = dict(step=[20, 23]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/gn+ws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco.py b/configs/gn+ws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco.py new file mode 100644 index 0000000..7fac317 --- /dev/null +++ b/configs/gn+ws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco.py @@ -0,0 +1,19 @@ +_base_ = './mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py' +# model settings +conv_cfg = dict(type='ConvWS') +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + backbone=dict( + type='ResNeXt', + depth=50, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch', + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://jhu/resnext50_32x4d_gn_ws'))) diff --git a/configs/gn+ws/metafile.yml b/configs/gn+ws/metafile.yml new file mode 100644 index 0000000..bc89359 --- /dev/null +++ b/configs/gn+ws/metafile.yml @@ -0,0 +1,263 @@ +Collections: + - Name: Weight Standardization + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - Group Normalization + - Weight Standardization + Paper: + URL: https://arxiv.org/abs/1903.10520 + Title: 'Weight Standardization' + README: configs/gn+ws/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/configs/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py + Version: v2.0.0 + +Models: + - Name: faster_rcnn_r50_fpn_gn_ws-all_1x_coco + In Collection: Weight Standardization + Config: configs/gn%2Bws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py + Metadata: + Training Memory (GB): 5.9 + inference time (ms/im): + - value: 85.47 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco/faster_rcnn_r50_fpn_gn_ws-all_1x_coco_20200130-613d9fe2.pth + + - Name: faster_rcnn_r101_fpn_gn_ws-all_1x_coco + In Collection: Weight Standardization + Config: configs/gn%2Bws/faster_rcnn_r101_fpn_gn_ws-all_1x_coco.py + Metadata: + Training Memory (GB): 8.9 + inference time (ms/im): + - value: 111.11 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_r101_fpn_gn_ws-all_1x_coco/faster_rcnn_r101_fpn_gn_ws-all_1x_coco_20200205-a93b0d75.pth + + - Name: faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco + In Collection: Weight Standardization + Config: configs/gn%2Bws/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco.py + Metadata: + Training Memory (GB): 7.0 + inference time (ms/im): + - value: 97.09 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco_20200203-839c5d9d.pth + + - Name: faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco + In Collection: Weight Standardization + Config: configs/gn%2Bws/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco.py + Metadata: + Training Memory (GB): 10.8 + inference time (ms/im): + - value: 131.58 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco_20200212-27da1bc2.pth + + - Name: mask_rcnn_r50_fpn_gn_ws-all_2x_coco + In Collection: Weight Standardization + Config: configs/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py + Metadata: + Training Memory (GB): 7.3 + inference time (ms/im): + - value: 95.24 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.6 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 36.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco/mask_rcnn_r50_fpn_gn_ws-all_2x_coco_20200226-16acb762.pth + + - Name: mask_rcnn_r101_fpn_gn_ws-all_2x_coco + In Collection: Weight Standardization + Config: configs/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_2x_coco.py + Metadata: + Training Memory (GB): 10.3 + inference time (ms/im): + - value: 116.28 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.0 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_2x_coco/mask_rcnn_r101_fpn_gn_ws-all_2x_coco_20200212-ea357cd9.pth + + - Name: mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco + In Collection: Weight Standardization + Config: configs/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco.py + Metadata: + Training Memory (GB): 8.4 + inference time (ms/im): + - value: 107.53 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.1 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco_20200216-649fdb6f.pth + + - Name: mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco + In Collection: Weight Standardization + Config: configs/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco.py + Metadata: + Training Memory (GB): 12.2 + inference time (ms/im): + - value: 140.85 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.1 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco_20200319-33fb95b5.pth + + - Name: mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco + In Collection: Weight Standardization + Config: configs/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco.py + Metadata: + Training Memory (GB): 7.3 + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.1 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco_20200213-487d1283.pth + + - Name: mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco + In Collection: Weight Standardization + Config: configs/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco.py + Metadata: + Training Memory (GB): 10.3 + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.1 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco_20200213-57b5a50f.pth + + - Name: mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco + In Collection: Weight Standardization + Config: configs/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco.py + Metadata: + Training Memory (GB): 8.4 + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.1 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco_20200226-969bcb2c.pth + + - Name: mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco + In Collection: Weight Standardization + Config: configs/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco.py + Metadata: + Training Memory (GB): 12.2 + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.7 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco_20200316-e6cd35ef.pth diff --git a/configs/gn/README.md b/configs/gn/README.md new file mode 100644 index 0000000..5c6e41f --- /dev/null +++ b/configs/gn/README.md @@ -0,0 +1,31 @@ +# Group Normalization + +## Introduction + + + +```latex +@inproceedings{wu2018group, + title={Group Normalization}, + author={Wu, Yuxin and He, Kaiming}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + year={2018} +} +``` + +## Results and Models + +| Backbone | model | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +|:-------------:|:----------:|:-------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| +| R-50-FPN (d) | Mask R-CNN | 2x | 7.1 | 11.0 | 40.2 | 36.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_2x_coco/mask_rcnn_r50_fpn_gn-all_2x_coco_20200206-8eee02a6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_2x_coco/mask_rcnn_r50_fpn_gn-all_2x_coco_20200206_050355.log.json) | +| R-50-FPN (d) | Mask R-CNN | 3x | 7.1 | - | 40.5 | 36.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn/mask_rcnn_r50_fpn_gn-all_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_3x_coco/mask_rcnn_r50_fpn_gn-all_3x_coco_20200214-8b23b1e5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_3x_coco/mask_rcnn_r50_fpn_gn-all_3x_coco_20200214_063512.log.json) | +| R-101-FPN (d) | Mask R-CNN | 2x | 9.9 | 9.0 | 41.9 | 37.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn/mask_rcnn_r101_fpn_gn-all_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r101_fpn_gn-all_2x_coco/mask_rcnn_r101_fpn_gn-all_2x_coco_20200205-d96b1b50.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r101_fpn_gn-all_2x_coco/mask_rcnn_r101_fpn_gn-all_2x_coco_20200205_234402.log.json) | +| R-101-FPN (d) | Mask R-CNN | 3x | 9.9 | | 42.1 | 38.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn/mask_rcnn_r101_fpn_gn-all_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r101_fpn_gn-all_3x_coco/mask_rcnn_r101_fpn_gn-all_3x_coco_20200513_181609-0df864f4.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r101_fpn_gn-all_3x_coco/mask_rcnn_r101_fpn_gn-all_3x_coco_20200513_181609.log.json) | +| R-50-FPN (c) | Mask R-CNN | 2x | 7.1 | 10.9 | 40.0 | 36.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco_20200207-20d3e849.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco_20200207_225832.log.json) | +| R-50-FPN (c) | Mask R-CNN | 3x | 7.1 | - | 40.1 | 36.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco_20200225-542aefbc.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco_20200225_235135.log.json) | + +**Notes:** + +- (d) means pretrained model converted from Detectron, and (c) means the contributed model pretrained by [@thangvubk](https://github.com/thangvubk). +- The `3x` schedule is epoch [28, 34, 36]. +- **Memory, Train/Inf time is outdated.** diff --git a/configs/gn/mask_rcnn_r101_fpn_gn-all_2x_coco.py b/configs/gn/mask_rcnn_r101_fpn_gn-all_2x_coco.py new file mode 100644 index 0000000..a505ba0 --- /dev/null +++ b/configs/gn/mask_rcnn_r101_fpn_gn-all_2x_coco.py @@ -0,0 +1,7 @@ +_base_ = './mask_rcnn_r50_fpn_gn-all_2x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron/resnet101_gn'))) diff --git a/configs/gn/mask_rcnn_r101_fpn_gn-all_3x_coco.py b/configs/gn/mask_rcnn_r101_fpn_gn-all_3x_coco.py new file mode 100644 index 0000000..12a9d17 --- /dev/null +++ b/configs/gn/mask_rcnn_r101_fpn_gn-all_3x_coco.py @@ -0,0 +1,5 @@ +_base_ = './mask_rcnn_r101_fpn_gn-all_2x_coco.py' + +# learning policy +lr_config = dict(step=[28, 34]) +runner = dict(type='EpochBasedRunner', max_epochs=36) diff --git a/configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py b/configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py new file mode 100644 index 0000000..1de7d98 --- /dev/null +++ b/configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py @@ -0,0 +1,49 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + backbone=dict( + norm_cfg=norm_cfg, + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron/resnet50_gn')), + neck=dict(norm_cfg=norm_cfg), + roi_head=dict( + bbox_head=dict( + type='Shared4Conv1FCBBoxHead', + conv_out_channels=256, + norm_cfg=norm_cfg), + mask_head=dict(norm_cfg=norm_cfg))) +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/gn/mask_rcnn_r50_fpn_gn-all_3x_coco.py b/configs/gn/mask_rcnn_r50_fpn_gn-all_3x_coco.py new file mode 100644 index 0000000..f917719 --- /dev/null +++ b/configs/gn/mask_rcnn_r50_fpn_gn-all_3x_coco.py @@ -0,0 +1,5 @@ +_base_ = './mask_rcnn_r50_fpn_gn-all_2x_coco.py' + +# learning policy +lr_config = dict(step=[28, 34]) +runner = dict(type='EpochBasedRunner', max_epochs=36) diff --git a/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco.py b/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco.py new file mode 100644 index 0000000..2f430fd --- /dev/null +++ b/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco.py @@ -0,0 +1,17 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +norm_cfg = dict(type='GN', num_groups=32, requires_grad=True) +model = dict( + backbone=dict( + norm_cfg=norm_cfg, + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://contrib/resnet50_gn')), + neck=dict(norm_cfg=norm_cfg), + roi_head=dict( + bbox_head=dict( + type='Shared4Conv1FCBBoxHead', + conv_out_channels=256, + norm_cfg=norm_cfg), + mask_head=dict(norm_cfg=norm_cfg))) +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco.py b/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco.py new file mode 100644 index 0000000..66834f0 --- /dev/null +++ b/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco.py @@ -0,0 +1,5 @@ +_base_ = './mask_rcnn_r50_fpn_gn-all_contrib_2x_coco.py' + +# learning policy +lr_config = dict(step=[28, 34]) +runner = dict(type='EpochBasedRunner', max_epochs=36) diff --git a/configs/gn/metafile.yml b/configs/gn/metafile.yml new file mode 100644 index 0000000..4a1ecae --- /dev/null +++ b/configs/gn/metafile.yml @@ -0,0 +1,162 @@ +Collections: + - Name: Group Normalization + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - Group Normalization + Paper: + URL: https://arxiv.org/abs/1803.08494 + Title: 'Group Normalization' + README: configs/gn/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py + Version: v2.0.0 + +Models: + - Name: mask_rcnn_r50_fpn_gn-all_2x_coco + In Collection: Group Normalization + Config: configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py + Metadata: + Training Memory (GB): 7.1 + inference time (ms/im): + - value: 90.91 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.2 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 36.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_2x_coco/mask_rcnn_r50_fpn_gn-all_2x_coco_20200206-8eee02a6.pth + + - Name: mask_rcnn_r50_fpn_gn-all_3x_coco + In Collection: Group Normalization + Config: configs/gn/mask_rcnn_r50_fpn_gn-all_3x_coco.py + Metadata: + Training Memory (GB): 7.1 + inference time (ms/im): + - value: 90.91 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.5 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 36.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_3x_coco/mask_rcnn_r50_fpn_gn-all_3x_coco_20200214-8b23b1e5.pth + + - Name: mask_rcnn_r101_fpn_gn-all_2x_coco + In Collection: Group Normalization + Config: configs/gn/mask_rcnn_r101_fpn_gn-all_2x_coco.py + Metadata: + Training Memory (GB): 9.9 + inference time (ms/im): + - value: 111.11 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.9 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r101_fpn_gn-all_2x_coco/mask_rcnn_r101_fpn_gn-all_2x_coco_20200205-d96b1b50.pth + + - Name: mask_rcnn_r101_fpn_gn-all_3x_coco + In Collection: Group Normalization + Config: configs/gn/mask_rcnn_r101_fpn_gn-all_3x_coco.py + Metadata: + Training Memory (GB): 9.9 + inference time (ms/im): + - value: 111.11 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.1 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r101_fpn_gn-all_3x_coco/mask_rcnn_r101_fpn_gn-all_3x_coco_20200513_181609-0df864f4.pth + + - Name: mask_rcnn_r50_fpn_gn-all_contrib_2x_coco + In Collection: Group Normalization + Config: configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco.py + Metadata: + Training Memory (GB): 7.1 + inference time (ms/im): + - value: 91.74 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.0 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 36.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco_20200207-20d3e849.pth + + - Name: mask_rcnn_r50_fpn_gn-all_contrib_3x_coco + In Collection: Group Normalization + Config: configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco.py + Metadata: + Training Memory (GB): 7.1 + inference time (ms/im): + - value: 91.74 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.1 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 36.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco_20200225-542aefbc.pth diff --git a/configs/grid_rcnn/README.md b/configs/grid_rcnn/README.md new file mode 100644 index 0000000..30cb6e0 --- /dev/null +++ b/configs/grid_rcnn/README.md @@ -0,0 +1,35 @@ +# Grid R-CNN + +## Introduction + + + +```latex +@inproceedings{lu2019grid, + title={Grid r-cnn}, + author={Lu, Xin and Li, Buyu and Yue, Yuxin and Li, Quanquan and Yan, Junjie}, + booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, + year={2019} +} + +@article{lu2019grid, + title={Grid R-CNN Plus: Faster and Better}, + author={Lu, Xin and Li, Buyu and Yue, Yuxin and Li, Quanquan and Yan, Junjie}, + journal={arXiv preprint arXiv:1906.05688}, + year={2019} +} +``` + +## Results and Models + +| Backbone | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:-----------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| +| R-50 | 2x | 5.1 | 15.0 | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco/grid_rcnn_r50_fpn_gn-head_2x_coco_20200130-6cca8223.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco/grid_rcnn_r50_fpn_gn-head_2x_coco_20200130_221140.log.json) | +| R-101 | 2x | 7.0 | 12.6 | 41.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/grid_rcnn/grid_rcnn_r101_fpn_gn-head_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_r101_fpn_gn-head_2x_coco/grid_rcnn_r101_fpn_gn-head_2x_coco_20200309-d6eca030.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_r101_fpn_gn-head_2x_coco/grid_rcnn_r101_fpn_gn-head_2x_coco_20200309_164224.log.json) | +| X-101-32x4d | 2x | 8.3 | 10.8 | 42.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco_20200130-d8f0e3ff.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco_20200130_215413.log.json) | +| X-101-64x4d | 2x | 11.3 | 7.7 | 43.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/grid_rcnn/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco_20200204-ec76a754.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco_20200204_080641.log.json) | + +**Notes:** + +- All models are trained with 8 GPUs instead of 32 GPUs in the original paper. +- The warming up lasts for 1 epoch and `2x` here indicates 25 epochs. diff --git a/configs/grid_rcnn/grid_rcnn_r101_fpn_gn-head_2x_coco.py b/configs/grid_rcnn/grid_rcnn_r101_fpn_gn-head_2x_coco.py new file mode 100644 index 0000000..1bb5889 --- /dev/null +++ b/configs/grid_rcnn/grid_rcnn_r101_fpn_gn-head_2x_coco.py @@ -0,0 +1,7 @@ +_base_ = './grid_rcnn_r50_fpn_gn-head_2x_coco.py' + +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_1x_coco.py b/configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_1x_coco.py new file mode 100644 index 0000000..4aa00ec --- /dev/null +++ b/configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_1x_coco.py @@ -0,0 +1,11 @@ +_base_ = ['grid_rcnn_r50_fpn_gn-head_2x_coco.py'] +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[8, 11]) +checkpoint_config = dict(interval=1) +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=12) diff --git a/configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py b/configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py new file mode 100644 index 0000000..df63cd5 --- /dev/null +++ b/configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py @@ -0,0 +1,131 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py' +] +# model settings +model = dict( + type='GridRCNN', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + scales=[8], + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), + roi_head=dict( + type='GridRoIHead', + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='Shared2FCBBoxHead', + with_reg=False, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False), + grid_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + grid_head=dict( + type='GridHead', + grid_points=9, + num_convs=8, + in_channels=256, + point_feat_channels=64, + norm_cfg=dict(type='GN', num_groups=36), + loss_grid=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=15))), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=0, + pos_weight=-1, + debug=False), + rpn_proposal=dict( + nms_pre=2000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + pos_radius=1, + pos_weight=-1, + max_num_grid=192, + debug=False)), + test_cfg=dict( + rpn=dict( + nms_pre=1000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + score_thr=0.03, + nms=dict(type='nms', iou_threshold=0.3), + max_per_img=100))) +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=3665, + warmup_ratio=1.0 / 80, + step=[17, 23]) +runner = dict(type='EpochBasedRunner', max_epochs=25) diff --git a/configs/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco.py b/configs/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco.py new file mode 100644 index 0000000..3bc8516 --- /dev/null +++ b/configs/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco.py @@ -0,0 +1,24 @@ +_base_ = './grid_rcnn_r50_fpn_gn-head_2x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) +# optimizer +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=3665, + warmup_ratio=1.0 / 80, + step=[17, 23]) +runner = dict(type='EpochBasedRunner', max_epochs=25) diff --git a/configs/grid_rcnn/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco.py b/configs/grid_rcnn/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco.py new file mode 100644 index 0000000..c78f8f6 --- /dev/null +++ b/configs/grid_rcnn/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco.py @@ -0,0 +1,13 @@ +_base_ = './grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) diff --git a/configs/grid_rcnn/metafile.yml b/configs/grid_rcnn/metafile.yml new file mode 100644 index 0000000..d1aa851 --- /dev/null +++ b/configs/grid_rcnn/metafile.yml @@ -0,0 +1,101 @@ +Collections: + - Name: Grid R-CNN + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RPN + - Dilated Convolution + - ResNet + - RoIAlign + Paper: + URL: https://arxiv.org/abs/1906.05688 + Title: 'Grid R-CNN' + README: configs/grid_rcnn/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/detectors/grid_rcnn.py#L6 + Version: v2.0.0 + +Models: + - Name: grid_rcnn_r50_fpn_gn-head_2x_coco + In Collection: Grid R-CNN + Config: configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py + Metadata: + Training Memory (GB): 5.1 + inference time (ms/im): + - value: 66.67 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco/grid_rcnn_r50_fpn_gn-head_2x_coco_20200130-6cca8223.pth + + - Name: grid_rcnn_r101_fpn_gn-head_2x_coco + In Collection: Grid R-CNN + Config: configs/grid_rcnn/grid_rcnn_r101_fpn_gn-head_2x_coco.py + Metadata: + Training Memory (GB): 7.0 + inference time (ms/im): + - value: 79.37 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_r101_fpn_gn-head_2x_coco/grid_rcnn_r101_fpn_gn-head_2x_coco_20200309-d6eca030.pth + + - Name: grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco + In Collection: Grid R-CNN + Config: configs/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco.py + Metadata: + Training Memory (GB): 8.3 + inference time (ms/im): + - value: 92.59 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco_20200130-d8f0e3ff.pth + + - Name: grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco + In Collection: Grid R-CNN + Config: configs/grid_rcnn/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco.py + Metadata: + Training Memory (GB): 11.3 + inference time (ms/im): + - value: 129.87 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco_20200204-ec76a754.pth diff --git a/configs/groie/README.md b/configs/groie/README.md new file mode 100644 index 0000000..42d4b9f --- /dev/null +++ b/configs/groie/README.md @@ -0,0 +1,62 @@ +# GRoIE + +## A novel Region of Interest Extraction Layer for Instance Segmentation + +By Leonardo Rossi, Akbar Karimi and Andrea Prati from +[IMPLab](http://implab.ce.unipr.it/). + +We provide configs to reproduce the results in the paper for +"*A novel Region of Interest Extraction Layer for Instance Segmentation*" +on COCO object detection. + +## Introduction + + + +This paper is motivated by the need to overcome to the limitations of existing +RoI extractors which select only one (the best) layer from FPN. + +Our intuition is that all the layers of FPN retain useful information. + +Therefore, the proposed layer (called Generic RoI Extractor - **GRoIE**) +introduces non-local building blocks and attention mechanisms to boost the +performance. + +## Results and models + +The results on COCO 2017 minival (5k images) are shown in the below table. + +### Application of GRoIE to different architectures + +| Backbone | Method | Lr schd | box AP | mask AP | Config | Download| +| :-------: | :--------------: | :-----: | :----: | :-----: | :-------:| :--------:| +| R-50-FPN | Faster Original | 1x | 37.4 | | [config](../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130_204655.log.json) | +| R-50-FPN | + GRoIE | 1x | 38.3 | | [config](./faster_rcnn_r50_fpn_groie_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/groie/faster_rcnn_r50_fpn_groie_1x_coco/faster_rcnn_r50_fpn_groie_1x_coco_20200604_211715-66ee9516.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/groie/faster_rcnn_r50_fpn_groie_1x_coco/faster_rcnn_r50_fpn_groie_1x_coco_20200604_211715.log.json) | +| R-50-FPN | Grid R-CNN | 1x | 39.1 | | [config](./grid_rcnn_r50_fpn_gn-head_1x_coco.py)| [model](https://download.openmmlab.com/mmdetection/v2.0/groie/grid_rcnn_r50_fpn_gn-head_1x_coco/grid_rcnn_r50_fpn_gn-head_1x_coco_20200605_202059-64f00ee8.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/groie/grid_rcnn_r50_fpn_gn-head_1x_coco/grid_rcnn_r50_fpn_gn-head_1x_coco_20200605_202059.log.json) | +| R-50-FPN | + GRoIE | 1x | | | [config](./grid_rcnn_r50_fpn_gn-head_groie_1x_coco.py)|| +| R-50-FPN | Mask R-CNN | 1x | 38.2 | 34.7 | [config](../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py)| [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205_050542.log.json) | +| R-50-FPN | + GRoIE | 1x | 39.0 | 36.0 | [config](./mask_rcnn_r50_fpn_groie_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r50_fpn_groie_1x_coco/mask_rcnn_r50_fpn_groie_1x_coco_20200604_211715-50d90c74.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r50_fpn_groie_1x_coco/mask_rcnn_r50_fpn_groie_1x_coco_20200604_211715.log.json) | +| R-50-FPN | GC-Net | 1x | 40.7 | 36.5 | [config](../gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200202-50b90e5c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200202_085547.log.json) | +| R-50-FPN | + GRoIE | 1x | 41.0 | 37.8 | [config](./mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py) |[model](https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco_20200604_211715-42eb79e1.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco_20200604_211715-42eb79e1.pth) | +| R-101-FPN | GC-Net | 1x | 42.2 | 37.8 | [config](../gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200206-8407a3f0.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200206_142508.log.json) | +| R-101-FPN | + GRoIE | 1x | 42.6 | 38.7 | [config](./mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py)| [model](https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco_20200607_224507-8daae01c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco_20200607_224507.log.json) | + +## Citation + +If you use this work or benchmark in your research, please cite this project. + +```latex +@inproceedings{rossi2021novel, + title={A novel region of interest extraction layer for instance segmentation}, + author={Rossi, Leonardo and Karimi, Akbar and Prati, Andrea}, + booktitle={2020 25th International Conference on Pattern Recognition (ICPR)}, + pages={2203--2209}, + year={2021}, + organization={IEEE} +} +``` + +## Contact + +The implementation of GRoIE is currently maintained by +[Leonardo Rossi](https://github.com/hachreak/). diff --git a/configs/groie/faster_rcnn_r50_fpn_groie_1x_coco.py b/configs/groie/faster_rcnn_r50_fpn_groie_1x_coco.py new file mode 100644 index 0000000..0fc528b --- /dev/null +++ b/configs/groie/faster_rcnn_r50_fpn_groie_1x_coco.py @@ -0,0 +1,25 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +# model settings +model = dict( + roi_head=dict( + bbox_roi_extractor=dict( + type='GenericRoIExtractor', + aggregation='sum', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + pre_cfg=dict( + type='ConvModule', + in_channels=256, + out_channels=256, + kernel_size=5, + padding=2, + inplace=False, + ), + post_cfg=dict( + type='GeneralizedAttention', + in_channels=256, + spatial_range=-1, + num_heads=6, + attention_type='0100', + kv_stride=2)))) diff --git a/configs/groie/grid_rcnn_r50_fpn_gn-head_groie_1x_coco.py b/configs/groie/grid_rcnn_r50_fpn_gn-head_groie_1x_coco.py new file mode 100644 index 0000000..8e4b4ab --- /dev/null +++ b/configs/groie/grid_rcnn_r50_fpn_gn-head_groie_1x_coco.py @@ -0,0 +1,45 @@ +_base_ = '../grid_rcnn/grid_rcnn_r50_fpn_gn-head_1x_coco.py' +# model settings +model = dict( + roi_head=dict( + bbox_roi_extractor=dict( + type='GenericRoIExtractor', + aggregation='sum', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + pre_cfg=dict( + type='ConvModule', + in_channels=256, + out_channels=256, + kernel_size=5, + padding=2, + inplace=False, + ), + post_cfg=dict( + type='GeneralizedAttention', + in_channels=256, + spatial_range=-1, + num_heads=6, + attention_type='0100', + kv_stride=2)), + grid_roi_extractor=dict( + type='GenericRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + pre_cfg=dict( + type='ConvModule', + in_channels=256, + out_channels=256, + kernel_size=5, + padding=2, + inplace=False, + ), + post_cfg=dict( + type='GeneralizedAttention', + in_channels=256, + spatial_range=-1, + num_heads=6, + attention_type='0100', + kv_stride=2)))) diff --git a/configs/groie/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py b/configs/groie/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py new file mode 100644 index 0000000..8b83722 --- /dev/null +++ b/configs/groie/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py @@ -0,0 +1,45 @@ +_base_ = '../gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py' +# model settings +model = dict( + roi_head=dict( + bbox_roi_extractor=dict( + type='GenericRoIExtractor', + aggregation='sum', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + pre_cfg=dict( + type='ConvModule', + in_channels=256, + out_channels=256, + kernel_size=5, + padding=2, + inplace=False, + ), + post_cfg=dict( + type='GeneralizedAttention', + in_channels=256, + spatial_range=-1, + num_heads=6, + attention_type='0100', + kv_stride=2)), + mask_roi_extractor=dict( + type='GenericRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + pre_cfg=dict( + type='ConvModule', + in_channels=256, + out_channels=256, + kernel_size=5, + padding=2, + inplace=False, + ), + post_cfg=dict( + type='GeneralizedAttention', + in_channels=256, + spatial_range=-1, + num_heads=6, + attention_type='0100', + kv_stride=2)))) diff --git a/configs/groie/mask_rcnn_r50_fpn_groie_1x_coco.py b/configs/groie/mask_rcnn_r50_fpn_groie_1x_coco.py new file mode 100644 index 0000000..81dfb48 --- /dev/null +++ b/configs/groie/mask_rcnn_r50_fpn_groie_1x_coco.py @@ -0,0 +1,45 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +# model settings +model = dict( + roi_head=dict( + bbox_roi_extractor=dict( + type='GenericRoIExtractor', + aggregation='sum', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + pre_cfg=dict( + type='ConvModule', + in_channels=256, + out_channels=256, + kernel_size=5, + padding=2, + inplace=False, + ), + post_cfg=dict( + type='GeneralizedAttention', + in_channels=256, + spatial_range=-1, + num_heads=6, + attention_type='0100', + kv_stride=2)), + mask_roi_extractor=dict( + type='GenericRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + pre_cfg=dict( + type='ConvModule', + in_channels=256, + out_channels=256, + kernel_size=5, + padding=2, + inplace=False, + ), + post_cfg=dict( + type='GeneralizedAttention', + in_channels=256, + spatial_range=-1, + num_heads=6, + attention_type='0100', + kv_stride=2)))) diff --git a/configs/groie/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py b/configs/groie/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py new file mode 100644 index 0000000..852c5ca --- /dev/null +++ b/configs/groie/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py @@ -0,0 +1,45 @@ +_base_ = '../gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py' +# model settings +model = dict( + roi_head=dict( + bbox_roi_extractor=dict( + type='GenericRoIExtractor', + aggregation='sum', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + pre_cfg=dict( + type='ConvModule', + in_channels=256, + out_channels=256, + kernel_size=5, + padding=2, + inplace=False, + ), + post_cfg=dict( + type='GeneralizedAttention', + in_channels=256, + spatial_range=-1, + num_heads=6, + attention_type='0100', + kv_stride=2)), + mask_roi_extractor=dict( + type='GenericRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32], + pre_cfg=dict( + type='ConvModule', + in_channels=256, + out_channels=256, + kernel_size=5, + padding=2, + inplace=False, + ), + post_cfg=dict( + type='GeneralizedAttention', + in_channels=256, + spatial_range=-1, + num_heads=6, + attention_type='0100', + kv_stride=2)))) diff --git a/configs/groie/metafile.yml b/configs/groie/metafile.yml new file mode 100644 index 0000000..269cb39 --- /dev/null +++ b/configs/groie/metafile.yml @@ -0,0 +1,93 @@ +Collections: + - Name: GRoIE + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - Generic RoI Extractor + - FPN + - RPN + - ResNet + - RoIAlign + Paper: + URL: https://arxiv.org/abs/2004.13665 + Title: 'A novel Region of Interest Extraction Layer for Instance Segmentation' + README: configs/groie/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/roi_heads/roi_extractors/groie.py#L15 + Version: v2.1.0 + +Models: + - Name: faster_rcnn_r50_fpn_groie_1x_coco + In Collection: GRoIE + Config: configs/groie/faster_rcnn_r50_fpn_groie_1x_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 38.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/groie/faster_rcnn_r50_fpn_groie_1x_coco/faster_rcnn_r50_fpn_groie_1x_coco_20200604_211715-66ee9516.pth + + - Name: grid_rcnn_r50_fpn_gn-head_groie_1x_coco + In Collection: GRoIE + Config: configs/groie/grid_rcnn_r50_fpn_gn-head_groie_1x_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.1 + + - Name: mask_rcnn_r50_fpn_groie_1x_coco + In Collection: GRoIE + Config: configs/groie/mask_rcnn_r50_fpn_groie_1x_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.0 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 36.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r50_fpn_groie_1x_coco/mask_rcnn_r50_fpn_groie_1x_coco_20200604_211715-50d90c74.pth + + - Name: mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco + In Collection: GRoIE + Config: configs/groie/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.0 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco_20200604_211715-42eb79e1.pth + + - Name: mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco + In Collection: GRoIE + Config: configs/groie/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.6 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco_20200607_224507-8daae01c.pth diff --git a/configs/guided_anchoring/README.md b/configs/guided_anchoring/README.md new file mode 100644 index 0000000..34d6b0d --- /dev/null +++ b/configs/guided_anchoring/README.md @@ -0,0 +1,49 @@ +# Region Proposal by Guided Anchoring + +## Introduction + + + +We provide config files to reproduce the results in the CVPR 2019 paper for [Region Proposal by Guided Anchoring](https://arxiv.org/abs/1901.03278). + +```latex +@inproceedings{wang2019region, + title={Region Proposal by Guided Anchoring}, + author={Jiaqi Wang and Kai Chen and Shuo Yang and Chen Change Loy and Dahua Lin}, + booktitle={IEEE Conference on Computer Vision and Pattern Recognition}, + year={2019} +} +``` + +## Results and Models + +The results on COCO 2017 val is shown in the below table. (results on test-dev are usually slightly higher than val). + +| Method | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | AR 1000 | Config | Download | +| :----: | :-------------: | :-----: | :-----: | :------: | :------------: | :-----: | :------: | :--------: | +| GA-RPN | R-50-FPN | caffe | 1x | 5.3 | 15.8 | 68.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco/ga_rpn_r50_caffe_fpn_1x_coco_20200531-899008a6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco/ga_rpn_r50_caffe_fpn_1x_coco_20200531_011819.log.json) | +| GA-RPN | R-101-FPN | caffe | 1x | 7.3 | 13.0 | 69.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_rpn_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_r101_caffe_fpn_1x_coco/ga_rpn_r101_caffe_fpn_1x_coco_20200531-ca9ba8fb.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_r101_caffe_fpn_1x_coco/ga_rpn_r101_caffe_fpn_1x_coco_20200531_011812.log.json) | +| GA-RPN | X-101-32x4d-FPN | pytorch | 1x | 8.5 | 10.0 | 70.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco/ga_rpn_x101_32x4d_fpn_1x_coco_20200220-c28d1b18.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco/ga_rpn_x101_32x4d_fpn_1x_coco_20200220_221326.log.json) | +| GA-RPN | X-101-64x4d-FPN | pytorch | 1x | 7.1 | 7.5 | 71.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_rpn_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_x101_64x4d_fpn_1x_coco/ga_rpn_x101_64x4d_fpn_1x_coco_20200225-3c6e1aa2.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_x101_64x4d_fpn_1x_coco/ga_rpn_x101_64x4d_fpn_1x_coco_20200225_152704.log.json) | + +| Method | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :------------: | :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | +| GA-Faster RCNN | R-50-FPN | caffe | 1x | 5.5 | | 39.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco/ga_faster_r50_caffe_fpn_1x_coco_20200702_000718-a11ccfe6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco/ga_faster_r50_caffe_fpn_1x_coco_20200702_000718.log.json) | +| GA-Faster RCNN | R-101-FPN | caffe | 1x | 7.5 | | 41.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_faster_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_r101_caffe_fpn_1x_coco/ga_faster_r101_caffe_fpn_1x_coco_bbox_mAP-0.415_20200505_115528-fb82e499.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_r101_caffe_fpn_1x_coco/ga_faster_r101_caffe_fpn_1x_coco_20200505_115528.log.json) | +| GA-Faster RCNN | X-101-32x4d-FPN | pytorch | 1x | 8.7 | 9.7 | 43.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco/ga_faster_x101_32x4d_fpn_1x_coco_20200215-1ded9da3.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco/ga_faster_x101_32x4d_fpn_1x_coco_20200215_184547.log.json) | +| GA-Faster RCNN | X-101-64x4d-FPN | pytorch | 1x | 11.8 | 7.3 | 43.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_faster_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_x101_64x4d_fpn_1x_coco/ga_faster_x101_64x4d_fpn_1x_coco_20200215-0fa7bde7.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_x101_64x4d_fpn_1x_coco/ga_faster_x101_64x4d_fpn_1x_coco_20200215_104455.log.json) | +| GA-RetinaNet | R-50-FPN | caffe | 1x | 3.5 | 16.8 | 36.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco/ga_retinanet_r50_caffe_fpn_1x_coco_20201020-39581c6f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco/ga_retinanet_r50_caffe_fpn_1x_coco_20201020_225450.log.json) | +| GA-RetinaNet | R-101-FPN | caffe | 1x | 5.5 | 12.9 | 39.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_r101_caffe_fpn_1x_coco/ga_retinanet_r101_caffe_fpn_1x_coco_20200531-6266453c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_r101_caffe_fpn_1x_coco/ga_retinanet_r101_caffe_fpn_1x_coco_20200531_012847.log.json) | +| GA-RetinaNet | X-101-32x4d-FPN | pytorch | 1x | 6.9 | 10.6 | 40.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco/ga_retinanet_x101_32x4d_fpn_1x_coco_20200219-40c56caa.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco/ga_retinanet_x101_32x4d_fpn_1x_coco_20200219_223025.log.json) | +| GA-RetinaNet | X-101-64x4d-FPN | pytorch | 1x | 9.9 | 7.7 | 41.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/guided_anchoring/ga_retinanet_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_x101_64x4d_fpn_1x_coco/ga_retinanet_x101_64x4d_fpn_1x_coco_20200226-ef9f7f1f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_x101_64x4d_fpn_1x_coco/ga_retinanet_x101_64x4d_fpn_1x_coco_20200226_221123.log.json) | + +- In the Guided Anchoring paper, `score_thr` is set to 0.001 in Fast/Faster RCNN and 0.05 in RetinaNet for both baselines and Guided Anchoring. + +- Performance on COCO test-dev benchmark are shown as follows. + +| Method | Backbone | Style | Lr schd | Aug Train | Score thr | AP | AP_50 | AP_75 | AP_small | AP_medium | AP_large | Download | +| :------------: | :-------: | :---: | :-----: | :-------: | :-------: | :---: | :---: | :---: | :------: | :-------: | :------: | :------: | +| GA-Faster RCNN | R-101-FPN | caffe | 1x | F | 0.05 | | | | | | | | +| GA-Faster RCNN | R-101-FPN | caffe | 1x | F | 0.001 | | | | | | | | +| GA-RetinaNet | R-101-FPN | caffe | 1x | F | 0.05 | | | | | | | | +| GA-RetinaNet | R-101-FPN | caffe | 2x | T | 0.05 | | | | | | | | diff --git a/configs/guided_anchoring/ga_fast_r50_caffe_fpn_1x_coco.py b/configs/guided_anchoring/ga_fast_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..8fc203c --- /dev/null +++ b/configs/guided_anchoring/ga_fast_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,65 @@ +_base_ = '../fast_rcnn/fast_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + norm_eval=True, + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet50_caffe')), + roi_head=dict( + bbox_head=dict(bbox_coder=dict(target_stds=[0.05, 0.05, 0.1, 0.1]))), + # model training and testing settings + train_cfg=dict( + rcnn=dict( + assigner=dict(pos_iou_thr=0.6, neg_iou_thr=0.6, min_pos_iou=0.6), + sampler=dict(num=256))), + test_cfg=dict(rcnn=dict(score_thr=1e-3))) +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=300), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadProposals', num_max_proposals=None), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img', 'proposals']), + ]) +] +data = dict( + train=dict( + proposal_file=data_root + 'proposals/ga_rpn_r50_fpn_1x_train2017.pkl', + pipeline=train_pipeline), + val=dict( + proposal_file=data_root + 'proposals/ga_rpn_r50_fpn_1x_val2017.pkl', + pipeline=test_pipeline), + test=dict( + proposal_file=data_root + 'proposals/ga_rpn_r50_fpn_1x_val2017.pkl', + pipeline=test_pipeline)) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/guided_anchoring/ga_faster_r101_caffe_fpn_1x_coco.py b/configs/guided_anchoring/ga_faster_r101_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..a40e7c6 --- /dev/null +++ b/configs/guided_anchoring/ga_faster_r101_caffe_fpn_1x_coco.py @@ -0,0 +1,7 @@ +_base_ = './ga_faster_r50_caffe_fpn_1x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet101_caffe'))) diff --git a/configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco.py b/configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..b0add92 --- /dev/null +++ b/configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,65 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py' +model = dict( + rpn_head=dict( + _delete_=True, + type='GARPNHead', + in_channels=256, + feat_channels=256, + approx_anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=8, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + square_anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + scales=[8], + strides=[4, 8, 16, 32, 64]), + anchor_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.07, 0.07, 0.14, 0.14]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.07, 0.07, 0.11, 0.11]), + loc_filter_thr=0.01, + loss_loc=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), + roi_head=dict( + bbox_head=dict(bbox_coder=dict(target_stds=[0.05, 0.05, 0.1, 0.1]))), + # model training and testing settings + train_cfg=dict( + rpn=dict( + ga_assigner=dict( + type='ApproxMaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + ga_sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=-1, + center_ratio=0.2, + ignore_ratio=0.5), + rpn_proposal=dict(nms_post=1000, max_per_img=300), + rcnn=dict( + assigner=dict(pos_iou_thr=0.6, neg_iou_thr=0.6, min_pos_iou=0.6), + sampler=dict(type='RandomSampler', num=256))), + test_cfg=dict( + rpn=dict(nms_post=1000, max_per_img=300), rcnn=dict(score_thr=1e-3))) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/guided_anchoring/ga_faster_r50_fpn_1x_coco.py b/configs/guided_anchoring/ga_faster_r50_fpn_1x_coco.py new file mode 100644 index 0000000..e3d8238 --- /dev/null +++ b/configs/guided_anchoring/ga_faster_r50_fpn_1x_coco.py @@ -0,0 +1,65 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + rpn_head=dict( + _delete_=True, + type='GARPNHead', + in_channels=256, + feat_channels=256, + approx_anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=8, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + square_anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + scales=[8], + strides=[4, 8, 16, 32, 64]), + anchor_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.07, 0.07, 0.14, 0.14]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.07, 0.07, 0.11, 0.11]), + loc_filter_thr=0.01, + loss_loc=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), + roi_head=dict( + bbox_head=dict(bbox_coder=dict(target_stds=[0.05, 0.05, 0.1, 0.1]))), + # model training and testing settings + train_cfg=dict( + rpn=dict( + ga_assigner=dict( + type='ApproxMaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + ga_sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=-1, + center_ratio=0.2, + ignore_ratio=0.5), + rpn_proposal=dict(nms_post=1000, max_per_img=300), + rcnn=dict( + assigner=dict(pos_iou_thr=0.6, neg_iou_thr=0.6, min_pos_iou=0.6), + sampler=dict(type='RandomSampler', num=256))), + test_cfg=dict( + rpn=dict(nms_post=1000, max_per_img=300), rcnn=dict(score_thr=1e-3))) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco.py b/configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000..f1dda94 --- /dev/null +++ b/configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './ga_faster_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) diff --git a/configs/guided_anchoring/ga_faster_x101_64x4d_fpn_1x_coco.py b/configs/guided_anchoring/ga_faster_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000..fb9e2af --- /dev/null +++ b/configs/guided_anchoring/ga_faster_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './ga_faster_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) diff --git a/configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_1x_coco.py b/configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..1b1cccd --- /dev/null +++ b/configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_1x_coco.py @@ -0,0 +1,7 @@ +_base_ = './ga_retinanet_r50_caffe_fpn_1x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet101_caffe'))) diff --git a/configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_mstrain_2x.py b/configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_mstrain_2x.py new file mode 100644 index 0000000..260895b --- /dev/null +++ b/configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_mstrain_2x.py @@ -0,0 +1,169 @@ +_base_ = '../_base_/default_runtime.py' + +# model settings +model = dict( + type='RetinaNet', + backbone=dict( + type='ResNet', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + norm_eval=True, + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet101_caffe')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs=True, + num_outs=5), + bbox_head=dict( + type='GARetinaHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + approx_anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=4, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[8, 16, 32, 64, 128]), + square_anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + scales=[4], + strides=[8, 16, 32, 64, 128]), + anchor_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loc_filter_thr=0.01, + loss_loc=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=0.04, loss_weight=1.0))) +# training and testing settings +train_cfg = dict( + ga_assigner=dict( + type='ApproxMaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0.4, + ignore_iof_thr=-1), + ga_sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.0, + ignore_iof_thr=-1), + allowed_border=-1, + pos_weight=-1, + center_ratio=0.2, + ignore_ratio=0.5, + debug=False) +test_cfg = dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100) +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 480), (1333, 960)], + keep_ratio=True, + multiscale_mode='range'), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) +evaluation = dict(interval=1, metric='bbox') +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[16, 22]) +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + # dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco.py b/configs/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..3351201 --- /dev/null +++ b/configs/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,62 @@ +_base_ = '../retinanet/retinanet_r50_caffe_fpn_1x_coco.py' +model = dict( + bbox_head=dict( + _delete_=True, + type='GARetinaHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + approx_anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=4, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[8, 16, 32, 64, 128]), + square_anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + scales=[4], + strides=[8, 16, 32, 64, 128]), + anchor_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loc_filter_thr=0.01, + loss_loc=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=0.04, loss_weight=1.0)), + # training and testing settings + train_cfg=dict( + ga_assigner=dict( + type='ApproxMaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0.4, + ignore_iof_thr=-1), + ga_sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + assigner=dict(neg_iou_thr=0.5, min_pos_iou=0.0), + center_ratio=0.2, + ignore_ratio=0.5)) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/guided_anchoring/ga_retinanet_r50_fpn_1x_coco.py b/configs/guided_anchoring/ga_retinanet_r50_fpn_1x_coco.py new file mode 100644 index 0000000..7694723 --- /dev/null +++ b/configs/guided_anchoring/ga_retinanet_r50_fpn_1x_coco.py @@ -0,0 +1,62 @@ +_base_ = '../retinanet/retinanet_r50_fpn_1x_coco.py' +model = dict( + bbox_head=dict( + _delete_=True, + type='GARetinaHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + approx_anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=4, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[8, 16, 32, 64, 128]), + square_anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + scales=[4], + strides=[8, 16, 32, 64, 128]), + anchor_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loc_filter_thr=0.01, + loss_loc=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=0.04, loss_weight=1.0)), + # training and testing settings + train_cfg=dict( + ga_assigner=dict( + type='ApproxMaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0.4, + ignore_iof_thr=-1), + ga_sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + assigner=dict(neg_iou_thr=0.5, min_pos_iou=0.0), + center_ratio=0.2, + ignore_ratio=0.5)) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco.py b/configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000..c5eb34f --- /dev/null +++ b/configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './ga_retinanet_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) diff --git a/configs/guided_anchoring/ga_retinanet_x101_64x4d_fpn_1x_coco.py b/configs/guided_anchoring/ga_retinanet_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000..5c69a6f --- /dev/null +++ b/configs/guided_anchoring/ga_retinanet_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './ga_retinanet_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) diff --git a/configs/guided_anchoring/ga_rpn_r101_caffe_fpn_1x_coco.py b/configs/guided_anchoring/ga_rpn_r101_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..039703e --- /dev/null +++ b/configs/guided_anchoring/ga_rpn_r101_caffe_fpn_1x_coco.py @@ -0,0 +1,8 @@ +_base_ = './ga_rpn_r50_caffe_fpn_1x_coco.py' +# model settings +model = dict( + backbone=dict( + depth=101, + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet101_caffe'))) diff --git a/configs/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco.py b/configs/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..7830894 --- /dev/null +++ b/configs/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,58 @@ +_base_ = '../rpn/rpn_r50_caffe_fpn_1x_coco.py' +model = dict( + rpn_head=dict( + _delete_=True, + type='GARPNHead', + in_channels=256, + feat_channels=256, + approx_anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=8, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + square_anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + scales=[8], + strides=[4, 8, 16, 32, 64]), + anchor_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.07, 0.07, 0.14, 0.14]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.07, 0.07, 0.11, 0.11]), + loc_filter_thr=0.01, + loss_loc=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict( + rpn=dict( + ga_assigner=dict( + type='ApproxMaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + ga_sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=-1, + center_ratio=0.2, + ignore_ratio=0.5)), + test_cfg=dict(rpn=dict(nms_post=1000))) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/guided_anchoring/ga_rpn_r50_fpn_1x_coco.py b/configs/guided_anchoring/ga_rpn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..27ab3e7 --- /dev/null +++ b/configs/guided_anchoring/ga_rpn_r50_fpn_1x_coco.py @@ -0,0 +1,58 @@ +_base_ = '../rpn/rpn_r50_fpn_1x_coco.py' +model = dict( + rpn_head=dict( + _delete_=True, + type='GARPNHead', + in_channels=256, + feat_channels=256, + approx_anchor_generator=dict( + type='AnchorGenerator', + octave_base_scale=8, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + square_anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + scales=[8], + strides=[4, 8, 16, 32, 64]), + anchor_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.07, 0.07, 0.14, 0.14]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.07, 0.07, 0.11, 0.11]), + loc_filter_thr=0.01, + loss_loc=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict( + rpn=dict( + ga_assigner=dict( + type='ApproxMaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + ga_sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=-1, + center_ratio=0.2, + ignore_ratio=0.5)), + test_cfg=dict(rpn=dict(nms_post=1000))) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco.py b/configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000..cccc985 --- /dev/null +++ b/configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './ga_rpn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) diff --git a/configs/guided_anchoring/ga_rpn_x101_64x4d_fpn_1x_coco.py b/configs/guided_anchoring/ga_rpn_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000..4e134d2 --- /dev/null +++ b/configs/guided_anchoring/ga_rpn_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './ga_rpn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) diff --git a/configs/guided_anchoring/metafile.yml b/configs/guided_anchoring/metafile.yml new file mode 100644 index 0000000..3019d4a --- /dev/null +++ b/configs/guided_anchoring/metafile.yml @@ -0,0 +1,246 @@ +Collections: + - Name: Guided Anchoring + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - FPN + - Guided Anchoring + - ResNet + Paper: + URL: https://arxiv.org/abs/1901.03278 + Title: 'Region Proposal by Guided Anchoring' + README: configs/guided_anchoring/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/dense_heads/ga_retina_head.py#L10 + Version: v2.0.0 + +Models: + - Name: ga_rpn_r50_caffe_fpn_1x_coco + In Collection: Guided Anchoring + Config: configs/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco.py + Metadata: + Training Memory (GB): 5.3 + inference time (ms/im): + - value: 63.29 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Region Proposal + Dataset: COCO + Metrics: + AR@1000: 68.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco/ga_rpn_r50_caffe_fpn_1x_coco_20200531-899008a6.pth + + - Name: ga_rpn_r101_caffe_fpn_1x_coco.py + In Collection: Guided Anchoring + Config: configs/guided_anchoring/ga_rpn_r101_caffe_fpn_1x_coco.py.py + Metadata: + Training Memory (GB): 7.3 + inference time (ms/im): + - value: 76.92 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Region Proposal + Dataset: COCO + Metrics: + AR@1000: 69.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_r101_caffe_fpn_1x_coco/ga_rpn_r101_caffe_fpn_1x_coco_20200531-ca9ba8fb.pth + + - Name: ga_rpn_x101_32x4d_fpn_1x_coco.py + In Collection: Guided Anchoring + Config: configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco.py.py + Metadata: + Training Memory (GB): 8.5 + inference time (ms/im): + - value: 100 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Region Proposal + Dataset: COCO + Metrics: + AR@1000: 70.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco/ga_rpn_x101_32x4d_fpn_1x_coco_20200220-c28d1b18.pth + + - Name: ga_rpn_x101_64x4d_fpn_1x_coco.py.py + In Collection: Guided Anchoring + Config: configs/guided_anchoring/ga_rpn_x101_64x4d_fpn_1x_coco.py.py.py + Metadata: + Training Memory (GB): 7.1 + inference time (ms/im): + - value: 133.33 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Region Proposal + Dataset: COCO + Metrics: + AR@1000: 70.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_x101_64x4d_fpn_1x_coco/ga_rpn_x101_64x4d_fpn_1x_coco_20200225-3c6e1aa2.pth + + - Name: ga_faster_r50_caffe_fpn_1x_coco + In Collection: Guided Anchoring + Config: configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco.py + Metadata: + Training Memory (GB): 5.5 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco/ga_faster_r50_caffe_fpn_1x_coco_20200702_000718-a11ccfe6.pth + + - Name: ga_faster_r101_caffe_fpn_1x_coco + In Collection: Guided Anchoring + Config: configs/guided_anchoring/ga_faster_r101_caffe_fpn_1x_coco.py + Metadata: + Training Memory (GB): 7.5 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_r101_caffe_fpn_1x_coco/ga_faster_r101_caffe_fpn_1x_coco_bbox_mAP-0.415_20200505_115528-fb82e499.pth + + - Name: ga_faster_x101_32x4d_fpn_1x_coco + In Collection: Guided Anchoring + Config: configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco.py + Metadata: + Training Memory (GB): 8.7 + inference time (ms/im): + - value: 103.09 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco/ga_faster_x101_32x4d_fpn_1x_coco_20200215-1ded9da3.pth + + - Name: ga_faster_x101_64x4d_fpn_1x_coco + In Collection: Guided Anchoring + Config: configs/guided_anchoring/ga_faster_x101_64x4d_fpn_1x_coco.py + Metadata: + Training Memory (GB): 11.8 + inference time (ms/im): + - value: 136.99 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_x101_64x4d_fpn_1x_coco/ga_faster_x101_64x4d_fpn_1x_coco_20200215-0fa7bde7.pth + + - Name: ga_retinanet_r50_caffe_fpn_1x_coco + In Collection: Guided Anchoring + Config: configs/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco.py + Metadata: + Training Memory (GB): 3.5 + inference time (ms/im): + - value: 59.52 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 36.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco/ga_retinanet_r50_caffe_fpn_1x_coco_20201020-39581c6f.pth + + - Name: ga_retinanet_r101_caffe_fpn_1x_coco + In Collection: Guided Anchoring + Config: configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_1x_coco.py + Metadata: + Training Memory (GB): 5.5 + inference time (ms/im): + - value: 77.52 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_r101_caffe_fpn_1x_coco/ga_retinanet_r101_caffe_fpn_1x_coco_20200531-6266453c.pth + + - Name: ga_retinanet_x101_32x4d_fpn_1x_coco + In Collection: Guided Anchoring + Config: configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco.py + Metadata: + Training Memory (GB): 6.9 + inference time (ms/im): + - value: 94.34 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco/ga_retinanet_x101_32x4d_fpn_1x_coco_20200219-40c56caa.pth + + - Name: ga_retinanet_x101_64x4d_fpn_1x_coco + In Collection: Guided Anchoring + Config: configs/guided_anchoring/ga_retinanet_x101_64x4d_fpn_1x_coco.py + Metadata: + Training Memory (GB): 9.9 + inference time (ms/im): + - value: 129.87 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_x101_64x4d_fpn_1x_coco/ga_retinanet_x101_64x4d_fpn_1x_coco_20200226-ef9f7f1f.pth diff --git a/configs/hrnet/README.md b/configs/hrnet/README.md new file mode 100644 index 0000000..bff5ce0 --- /dev/null +++ b/configs/hrnet/README.md @@ -0,0 +1,88 @@ +# High-resolution networks (HRNets) for object detection + +## Introduction + + + +```latex +@inproceedings{SunXLW19, + title={Deep High-Resolution Representation Learning for Human Pose Estimation}, + author={Ke Sun and Bin Xiao and Dong Liu and Jingdong Wang}, + booktitle={CVPR}, + year={2019} +} + +@article{SunZJCXLMWLW19, + title={High-Resolution Representations for Labeling Pixels and Regions}, + author={Ke Sun and Yang Zhao and Borui Jiang and Tianheng Cheng and Bin Xiao + and Dong Liu and Yadong Mu and Xinggang Wang and Wenyu Liu and Jingdong Wang}, + journal = {CoRR}, + volume = {abs/1904.04514}, + year={2019} +} +``` + +## Results and Models + +### Faster R-CNN + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :-------------:|:------:| :------:| :--------:| +| HRNetV2p-W18 | pytorch | 1x | 6.6 | 13.4 | 36.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco/faster_rcnn_hrnetv2p_w18_1x_coco_20200130-56651a6d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco/faster_rcnn_hrnetv2p_w18_1x_coco_20200130_211246.log.json) | +| HRNetV2p-W18 | pytorch | 2x | 6.6 | - | 38.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco/faster_rcnn_hrnetv2p_w18_2x_coco_20200702_085731-a4ec0611.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco/faster_rcnn_hrnetv2p_w18_2x_coco_20200702_085731.log.json) | +| HRNetV2p-W32 | pytorch | 1x | 9.0 | 12.4 | 40.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco/faster_rcnn_hrnetv2p_w32_1x_coco_20200130-6e286425.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco/faster_rcnn_hrnetv2p_w32_1x_coco_20200130_204442.log.json) | +| HRNetV2p-W32 | pytorch | 2x | 9.0 | - | 41.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/faster_rcnn_hrnetv2p_w32_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w32_2x_coco/faster_rcnn_hrnetv2p_w32_2x_coco_20200529_015927-976a9c15.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w32_2x_coco/faster_rcnn_hrnetv2p_w32_2x_coco_20200529_015927.log.json) | +| HRNetV2p-W40 | pytorch | 1x | 10.4 | 10.5 | 41.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco/faster_rcnn_hrnetv2p_w40_1x_coco_20200210-95c1f5ce.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco/faster_rcnn_hrnetv2p_w40_1x_coco_20200210_125315.log.json) | +| HRNetV2p-W40 | pytorch | 2x | 10.4 | - | 42.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/faster_rcnn_hrnetv2p_w40_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w40_2x_coco/faster_rcnn_hrnetv2p_w40_2x_coco_20200512_161033-0f236ef4.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w40_2x_coco/faster_rcnn_hrnetv2p_w40_2x_coco_20200512_161033.log.json) | + +### Mask R-CNN + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :-------------:|:------:| :------:|:------:|:--------:| +| HRNetV2p-W18 | pytorch | 1x | 7.0 | 11.7 | 37.7 | 34.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco/mask_rcnn_hrnetv2p_w18_1x_coco_20200205-1c3d78ed.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco/mask_rcnn_hrnetv2p_w18_1x_coco_20200205_232523.log.json) | +| HRNetV2p-W18 | pytorch | 2x | 7.0 | - | 39.8 | 36.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/mask_rcnn_hrnetv2p_w18_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w18_2x_coco/mask_rcnn_hrnetv2p_w18_2x_coco_20200212-b3c825b1.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w18_2x_coco/mask_rcnn_hrnetv2p_w18_2x_coco_20200212_134222.log.json) | +| HRNetV2p-W32 | pytorch | 1x | 9.4 | 11.3 | 41.2 | 37.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco/mask_rcnn_hrnetv2p_w32_1x_coco_20200207-b29f616e.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco/mask_rcnn_hrnetv2p_w32_1x_coco_20200207_055017.log.json) | +| HRNetV2p-W32 | pytorch | 2x | 9.4 | - | 42.5 | 37.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/mask_rcnn_hrnetv2p_w32_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w32_2x_coco/mask_rcnn_hrnetv2p_w32_2x_coco_20200213-45b75b4d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w32_2x_coco/mask_rcnn_hrnetv2p_w32_2x_coco_20200213_150518.log.json) | +| HRNetV2p-W40 | pytorch | 1x | 10.9 | | 42.1 | 37.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/mask_rcnn_hrnetv2p_w40_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w40_1x_coco/mask_rcnn_hrnetv2p_w40_1x_coco_20200511_015646-66738b35.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w40_1x_coco/mask_rcnn_hrnetv2p_w40_1x_coco_20200511_015646.log.json) | +| HRNetV2p-W40 | pytorch | 2x | 10.9 | | 42.8 | 38.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/mask_rcnn_hrnetv2p_w40_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w40_2x_coco/mask_rcnn_hrnetv2p_w40_2x_coco_20200512_163732-aed5e4ab.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w40_2x_coco/mask_rcnn_hrnetv2p_w40_2x_coco_20200512_163732.log.json) | + +### Cascade R-CNN + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :-------------:|:------:| :------: | :--------: | +| HRNetV2p-W18 | pytorch | 20e | 7.0 | 11.0 | 41.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/cascade_rcnn_hrnetv2p_w18_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w18_20e_coco/cascade_rcnn_hrnetv2p_w18_20e_coco_20200210-434be9d7.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w18_20e_coco/cascade_rcnn_hrnetv2p_w18_20e_coco_20200210_105632.log.json) | +| HRNetV2p-W32 | pytorch | 20e | 9.4 | 11.0 | 43.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco/cascade_rcnn_hrnetv2p_w32_20e_coco_20200208-928455a4.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco/cascade_rcnn_hrnetv2p_w32_20e_coco_20200208_160511.log.json) | +| HRNetV2p-W40 | pytorch | 20e | 10.8 | | 43.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/cascade_rcnn_hrnetv2p_w40_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w40_20e_coco/cascade_rcnn_hrnetv2p_w40_20e_coco_20200512_161112-75e47b04.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w40_20e_coco/cascade_rcnn_hrnetv2p_w40_20e_coco_20200512_161112.log.json) | + +### Cascade Mask R-CNN + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :-------------:|:------:| :------:|:------:|:--------:| +| HRNetV2p-W18 | pytorch | 20e | 8.5 | 8.5 |41.6 |36.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w18_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w18_20e_coco/cascade_mask_rcnn_hrnetv2p_w18_20e_coco_20200210-b543cd2b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w18_20e_coco/cascade_mask_rcnn_hrnetv2p_w18_20e_coco_20200210_093149.log.json) | +| HRNetV2p-W32 | pytorch | 20e | | 8.3 |44.3 |38.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco/cascade_mask_rcnn_hrnetv2p_w32_20e_coco_20200512_154043-39d9cf7b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco/cascade_mask_rcnn_hrnetv2p_w32_20e_coco_20200512_154043.log.json) | +| HRNetV2p-W40 | pytorch | 20e | 12.5 | |45.1 |39.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w40_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w40_20e_coco/cascade_mask_rcnn_hrnetv2p_w40_20e_coco_20200527_204922-969c4610.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w40_20e_coco/cascade_mask_rcnn_hrnetv2p_w40_20e_coco_20200527_204922.log.json) | + +### Hybrid Task Cascade (HTC) + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :-------------:|:------:| :------:|:------:|:--------:| +| HRNetV2p-W18 | pytorch | 20e | 10.8 | 4.7 | 42.8 | 37.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/htc_hrnetv2p_w18_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w18_20e_coco/htc_hrnetv2p_w18_20e_coco_20200210-b266988c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w18_20e_coco/htc_hrnetv2p_w18_20e_coco_20200210_182735.log.json) | +| HRNetV2p-W32 | pytorch | 20e | 13.1 | 4.9 | 45.4 | 39.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/htc_hrnetv2p_w32_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w32_20e_coco/htc_hrnetv2p_w32_20e_coco_20200207-7639fa12.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w32_20e_coco/htc_hrnetv2p_w32_20e_coco_20200207_193153.log.json) | +| HRNetV2p-W40 | pytorch | 20e | 14.6 | | 46.4 | 40.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/htc_hrnetv2p_w40_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w40_20e_coco/htc_hrnetv2p_w40_20e_coco_20200529_183411-417c4d5b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w40_20e_coco/htc_hrnetv2p_w40_20e_coco_20200529_183411.log.json) | + +### FCOS + +| Backbone | Style | GN | MS train | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:---------:|:-------:|:-------:|:--------:|:-------:|:------:|:------:|:------:|:------:|:--------:| +|HRNetV2p-W18| pytorch | Y | N | 1x | 13.0 | 12.9 | 35.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco_20201212_100710-4ad151de.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco_20201212_100710.log.json) | +|HRNetV2p-W18| pytorch | Y | N | 2x | 13.0 | - | 38.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco_20201212_101110-5c575fa5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco_20201212_101110.log.json) | +|HRNetV2p-W32| pytorch | Y | N | 1x | 17.5 | 12.9 | 39.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco_20201211_134730-cb8055c0.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco_20201211_134730.log.json) | +|HRNetV2p-W32| pytorch | Y | N | 2x | 17.5 | - | 40.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco_20201212_112133-77b6b9bb.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco_20201212_112133.log.json) | +|HRNetV2p-W18| pytorch | Y | Y | 2x | 13.0 | 12.9 | 38.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco_20201212_111651-441e9d9f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco_20201212_111651.log.json) | +|HRNetV2p-W32| pytorch | Y | Y | 2x | 17.5 | 12.4 | 41.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco_20201212_090846-b6f2b49f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco_20201212_090846.log.json) | +|HRNetV2p-W48| pytorch | Y | Y | 2x | 20.3 | 10.8 | 42.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/hrnet/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco_20201212_124752-f22d2ce5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco_20201212_124752.log.json) | + +**Note:** + +- The `28e` schedule in HTC indicates decreasing the lr at 24 and 27 epochs, with a total of 28 epochs. +- HRNetV2 ImageNet pretrained models are in [HRNets for Image Classification](https://github.com/HRNet/HRNet-Image-Classification). diff --git a/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w18_20e_coco.py b/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w18_20e_coco.py new file mode 100644 index 0000000..839cf3e --- /dev/null +++ b/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w18_20e_coco.py @@ -0,0 +1,11 @@ +_base_ = './cascade_mask_rcnn_hrnetv2p_w32_20e_coco.py' +# model settings +model = dict( + backbone=dict( + extra=dict( + stage2=dict(num_channels=(18, 36)), + stage3=dict(num_channels=(18, 36, 72)), + stage4=dict(num_channels=(18, 36, 72, 144))), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18')), + neck=dict(type='HRFPN', in_channels=[18, 36, 72, 144], out_channels=256)) diff --git a/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco.py b/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco.py new file mode 100644 index 0000000..9942602 --- /dev/null +++ b/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco.py @@ -0,0 +1,40 @@ +_base_ = '../cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + _delete_=True, + type='HRNet', + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w32')), + neck=dict( + _delete_=True, + type='HRFPN', + in_channels=[32, 64, 128, 256], + out_channels=256)) +# learning policy +lr_config = dict(step=[16, 19]) +runner = dict(type='EpochBasedRunner', max_epochs=20) diff --git a/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w40_20e_coco.py b/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w40_20e_coco.py new file mode 100644 index 0000000..10d5e83 --- /dev/null +++ b/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w40_20e_coco.py @@ -0,0 +1,12 @@ +_base_ = './cascade_mask_rcnn_hrnetv2p_w32_20e_coco.py' +# model settings +model = dict( + backbone=dict( + type='HRNet', + extra=dict( + stage2=dict(num_channels=(40, 80)), + stage3=dict(num_channels=(40, 80, 160)), + stage4=dict(num_channels=(40, 80, 160, 320))), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w40')), + neck=dict(type='HRFPN', in_channels=[40, 80, 160, 320], out_channels=256)) diff --git a/configs/hrnet/cascade_rcnn_hrnetv2p_w18_20e_coco.py b/configs/hrnet/cascade_rcnn_hrnetv2p_w18_20e_coco.py new file mode 100644 index 0000000..ebd5e20 --- /dev/null +++ b/configs/hrnet/cascade_rcnn_hrnetv2p_w18_20e_coco.py @@ -0,0 +1,11 @@ +_base_ = './cascade_rcnn_hrnetv2p_w32_20e_coco.py' +# model settings +model = dict( + backbone=dict( + extra=dict( + stage2=dict(num_channels=(18, 36)), + stage3=dict(num_channels=(18, 36, 72)), + stage4=dict(num_channels=(18, 36, 72, 144))), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18')), + neck=dict(type='HRFPN', in_channels=[18, 36, 72, 144], out_channels=256)) diff --git a/configs/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco.py b/configs/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco.py new file mode 100644 index 0000000..e7f89a9 --- /dev/null +++ b/configs/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco.py @@ -0,0 +1,40 @@ +_base_ = '../cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + _delete_=True, + type='HRNet', + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w32')), + neck=dict( + _delete_=True, + type='HRFPN', + in_channels=[32, 64, 128, 256], + out_channels=256)) +# learning policy +lr_config = dict(step=[16, 19]) +runner = dict(type='EpochBasedRunner', max_epochs=20) diff --git a/configs/hrnet/cascade_rcnn_hrnetv2p_w40_20e_coco.py b/configs/hrnet/cascade_rcnn_hrnetv2p_w40_20e_coco.py new file mode 100644 index 0000000..265e8d6 --- /dev/null +++ b/configs/hrnet/cascade_rcnn_hrnetv2p_w40_20e_coco.py @@ -0,0 +1,12 @@ +_base_ = './cascade_rcnn_hrnetv2p_w32_20e_coco.py' +# model settings +model = dict( + backbone=dict( + type='HRNet', + extra=dict( + stage2=dict(num_channels=(40, 80)), + stage3=dict(num_channels=(40, 80, 160)), + stage4=dict(num_channels=(40, 80, 160, 320))), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w40')), + neck=dict(type='HRFPN', in_channels=[40, 80, 160, 320], out_channels=256)) diff --git a/configs/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.py b/configs/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.py new file mode 100644 index 0000000..1df2c3d --- /dev/null +++ b/configs/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.py @@ -0,0 +1,11 @@ +_base_ = './faster_rcnn_hrnetv2p_w32_1x_coco.py' +# model settings +model = dict( + backbone=dict( + extra=dict( + stage2=dict(num_channels=(18, 36)), + stage3=dict(num_channels=(18, 36, 72)), + stage4=dict(num_channels=(18, 36, 72, 144))), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18')), + neck=dict(type='HRFPN', in_channels=[18, 36, 72, 144], out_channels=256)) diff --git a/configs/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco.py b/configs/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco.py new file mode 100644 index 0000000..a4b987a --- /dev/null +++ b/configs/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco.py @@ -0,0 +1,5 @@ +_base_ = './faster_rcnn_hrnetv2p_w18_1x_coco.py' + +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco.py b/configs/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco.py new file mode 100644 index 0000000..be05809 --- /dev/null +++ b/configs/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco.py @@ -0,0 +1,37 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + _delete_=True, + type='HRNet', + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w32')), + neck=dict( + _delete_=True, + type='HRFPN', + in_channels=[32, 64, 128, 256], + out_channels=256)) diff --git a/configs/hrnet/faster_rcnn_hrnetv2p_w32_2x_coco.py b/configs/hrnet/faster_rcnn_hrnetv2p_w32_2x_coco.py new file mode 100644 index 0000000..63c8717 --- /dev/null +++ b/configs/hrnet/faster_rcnn_hrnetv2p_w32_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './faster_rcnn_hrnetv2p_w32_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco.py b/configs/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco.py new file mode 100644 index 0000000..886a7c9 --- /dev/null +++ b/configs/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco.py @@ -0,0 +1,11 @@ +_base_ = './faster_rcnn_hrnetv2p_w32_1x_coco.py' +model = dict( + backbone=dict( + type='HRNet', + extra=dict( + stage2=dict(num_channels=(40, 80)), + stage3=dict(num_channels=(40, 80, 160)), + stage4=dict(num_channels=(40, 80, 160, 320))), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w40')), + neck=dict(type='HRFPN', in_channels=[40, 80, 160, 320], out_channels=256)) diff --git a/configs/hrnet/faster_rcnn_hrnetv2p_w40_2x_coco.py b/configs/hrnet/faster_rcnn_hrnetv2p_w40_2x_coco.py new file mode 100644 index 0000000..585cc2c --- /dev/null +++ b/configs/hrnet/faster_rcnn_hrnetv2p_w40_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './faster_rcnn_hrnetv2p_w40_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco.py b/configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco.py new file mode 100644 index 0000000..fd662bd --- /dev/null +++ b/configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco.py @@ -0,0 +1,10 @@ +_base_ = './fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py' +model = dict( + backbone=dict( + extra=dict( + stage2=dict(num_channels=(18, 36)), + stage3=dict(num_channels=(18, 36, 72)), + stage4=dict(num_channels=(18, 36, 72, 144))), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18')), + neck=dict(type='HRFPN', in_channels=[18, 36, 72, 144], out_channels=256)) diff --git a/configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco.py b/configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco.py new file mode 100644 index 0000000..3497595 --- /dev/null +++ b/configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './fcos_hrnetv2p_w18_gn-head_4x4_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/hrnet/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco.py b/configs/hrnet/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco.py new file mode 100644 index 0000000..37bfdae --- /dev/null +++ b/configs/hrnet/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco.py @@ -0,0 +1,10 @@ +_base_ = './fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco.py' +model = dict( + backbone=dict( + extra=dict( + stage2=dict(num_channels=(18, 36)), + stage3=dict(num_channels=(18, 36, 72)), + stage4=dict(num_channels=(18, 36, 72, 144))), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18')), + neck=dict(type='HRFPN', in_channels=[18, 36, 72, 144], out_channels=256)) diff --git a/configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py b/configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py new file mode 100644 index 0000000..10617f2 --- /dev/null +++ b/configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py @@ -0,0 +1,70 @@ +_base_ = '../fcos/fcos_r50_caffe_fpn_gn-head_4x4_1x_coco.py' +model = dict( + backbone=dict( + _delete_=True, + type='HRNet', + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w32')), + neck=dict( + _delete_=True, + type='HRFPN', + in_channels=[32, 64, 128, 256], + out_channels=256, + stride=2, + num_outs=5)) +img_norm_cfg = dict( + mean=[103.53, 116.28, 123.675], std=[57.375, 57.12, 58.395], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco.py b/configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco.py new file mode 100644 index 0000000..7b38130 --- /dev/null +++ b/configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/hrnet/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco.py b/configs/hrnet/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco.py new file mode 100644 index 0000000..482f887 --- /dev/null +++ b/configs/hrnet/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco.py @@ -0,0 +1,39 @@ +_base_ = './fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py' +img_norm_cfg = dict( + mean=[103.53, 116.28, 123.675], std=[57.375, 57.12, 58.395], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/hrnet/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco.py b/configs/hrnet/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco.py new file mode 100644 index 0000000..0ae9dbe --- /dev/null +++ b/configs/hrnet/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco.py @@ -0,0 +1,11 @@ +_base_ = './fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco.py' +model = dict( + backbone=dict( + type='HRNet', + extra=dict( + stage2=dict(num_channels=(40, 80)), + stage3=dict(num_channels=(40, 80, 160)), + stage4=dict(num_channels=(40, 80, 160, 320))), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w40')), + neck=dict(type='HRFPN', in_channels=[40, 80, 160, 320], out_channels=256)) diff --git a/configs/hrnet/htc_hrnetv2p_w18_20e_coco.py b/configs/hrnet/htc_hrnetv2p_w18_20e_coco.py new file mode 100644 index 0000000..3c2eb1d --- /dev/null +++ b/configs/hrnet/htc_hrnetv2p_w18_20e_coco.py @@ -0,0 +1,10 @@ +_base_ = './htc_hrnetv2p_w32_20e_coco.py' +model = dict( + backbone=dict( + extra=dict( + stage2=dict(num_channels=(18, 36)), + stage3=dict(num_channels=(18, 36, 72)), + stage4=dict(num_channels=(18, 36, 72, 144))), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18')), + neck=dict(type='HRFPN', in_channels=[18, 36, 72, 144], out_channels=256)) diff --git a/configs/hrnet/htc_hrnetv2p_w32_20e_coco.py b/configs/hrnet/htc_hrnetv2p_w32_20e_coco.py new file mode 100644 index 0000000..545cb83 --- /dev/null +++ b/configs/hrnet/htc_hrnetv2p_w32_20e_coco.py @@ -0,0 +1,37 @@ +_base_ = '../htc/htc_r50_fpn_20e_coco.py' +model = dict( + backbone=dict( + _delete_=True, + type='HRNet', + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w32')), + neck=dict( + _delete_=True, + type='HRFPN', + in_channels=[32, 64, 128, 256], + out_channels=256)) diff --git a/configs/hrnet/htc_hrnetv2p_w40_20e_coco.py b/configs/hrnet/htc_hrnetv2p_w40_20e_coco.py new file mode 100644 index 0000000..94bff1b --- /dev/null +++ b/configs/hrnet/htc_hrnetv2p_w40_20e_coco.py @@ -0,0 +1,11 @@ +_base_ = './htc_hrnetv2p_w32_20e_coco.py' +model = dict( + backbone=dict( + type='HRNet', + extra=dict( + stage2=dict(num_channels=(40, 80)), + stage3=dict(num_channels=(40, 80, 160)), + stage4=dict(num_channels=(40, 80, 160, 320))), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w40')), + neck=dict(type='HRFPN', in_channels=[40, 80, 160, 320], out_channels=256)) diff --git a/configs/hrnet/htc_hrnetv2p_w40_28e_coco.py b/configs/hrnet/htc_hrnetv2p_w40_28e_coco.py new file mode 100644 index 0000000..7067e8b --- /dev/null +++ b/configs/hrnet/htc_hrnetv2p_w40_28e_coco.py @@ -0,0 +1,4 @@ +_base_ = './htc_hrnetv2p_w40_20e_coco.py' +# learning policy +lr_config = dict(step=[24, 27]) +runner = dict(type='EpochBasedRunner', max_epochs=28) diff --git a/configs/hrnet/htc_x101_64x4d_fpn_16x1_28e_coco.py b/configs/hrnet/htc_x101_64x4d_fpn_16x1_28e_coco.py new file mode 100644 index 0000000..815f285 --- /dev/null +++ b/configs/hrnet/htc_x101_64x4d_fpn_16x1_28e_coco.py @@ -0,0 +1,4 @@ +_base_ = '../htc/htc_x101_64x4d_fpn_16x1_20e_coco.py' +# learning policy +lr_config = dict(step=[24, 27]) +runner = dict(type='EpochBasedRunner', max_epochs=28) diff --git a/configs/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco.py b/configs/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco.py new file mode 100644 index 0000000..cb12200 --- /dev/null +++ b/configs/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco.py @@ -0,0 +1,10 @@ +_base_ = './mask_rcnn_hrnetv2p_w32_1x_coco.py' +model = dict( + backbone=dict( + extra=dict( + stage2=dict(num_channels=(18, 36)), + stage3=dict(num_channels=(18, 36, 72)), + stage4=dict(num_channels=(18, 36, 72, 144))), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18')), + neck=dict(type='HRFPN', in_channels=[18, 36, 72, 144], out_channels=256)) diff --git a/configs/hrnet/mask_rcnn_hrnetv2p_w18_2x_coco.py b/configs/hrnet/mask_rcnn_hrnetv2p_w18_2x_coco.py new file mode 100644 index 0000000..ca62682 --- /dev/null +++ b/configs/hrnet/mask_rcnn_hrnetv2p_w18_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './mask_rcnn_hrnetv2p_w18_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco.py b/configs/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco.py new file mode 100644 index 0000000..d5f0eb5 --- /dev/null +++ b/configs/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco.py @@ -0,0 +1,37 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + _delete_=True, + type='HRNet', + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256))), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w32')), + neck=dict( + _delete_=True, + type='HRFPN', + in_channels=[32, 64, 128, 256], + out_channels=256)) diff --git a/configs/hrnet/mask_rcnn_hrnetv2p_w32_2x_coco.py b/configs/hrnet/mask_rcnn_hrnetv2p_w32_2x_coco.py new file mode 100644 index 0000000..63d5d13 --- /dev/null +++ b/configs/hrnet/mask_rcnn_hrnetv2p_w32_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './mask_rcnn_hrnetv2p_w32_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/hrnet/mask_rcnn_hrnetv2p_w40_1x_coco.py b/configs/hrnet/mask_rcnn_hrnetv2p_w40_1x_coco.py new file mode 100644 index 0000000..5a76f4b --- /dev/null +++ b/configs/hrnet/mask_rcnn_hrnetv2p_w40_1x_coco.py @@ -0,0 +1,11 @@ +_base_ = './mask_rcnn_hrnetv2p_w18_1x_coco.py' +model = dict( + backbone=dict( + type='HRNet', + extra=dict( + stage2=dict(num_channels=(40, 80)), + stage3=dict(num_channels=(40, 80, 160)), + stage4=dict(num_channels=(40, 80, 160, 320))), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w40')), + neck=dict(type='HRFPN', in_channels=[40, 80, 160, 320], out_channels=256)) diff --git a/configs/hrnet/mask_rcnn_hrnetv2p_w40_2x_coco.py b/configs/hrnet/mask_rcnn_hrnetv2p_w40_2x_coco.py new file mode 100644 index 0000000..3a2a510 --- /dev/null +++ b/configs/hrnet/mask_rcnn_hrnetv2p_w40_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './mask_rcnn_hrnetv2p_w40_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/hrnet/metafile.yml b/configs/hrnet/metafile.yml new file mode 100644 index 0000000..37703aa --- /dev/null +++ b/configs/hrnet/metafile.yml @@ -0,0 +1,604 @@ +Collections: + - Name: HRNet + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - HRNet + Paper: + URL: https://arxiv.org/abs/1904.04514 + Title: 'Deep High-Resolution Representation Learning for Visual Recognition' + README: configs/hrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195 + Version: v2.0.0 + +Models: + - Name: faster_rcnn_hrnetv2p_w18_1x_coco + In Collection: HRNet + Config: configs/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.py + Metadata: + Training Memory (GB): 6.6 + inference time (ms/im): + - value: 74.63 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 36.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco/faster_rcnn_hrnetv2p_w18_1x_coco_20200130-56651a6d.pth + + - Name: faster_rcnn_hrnetv2p_w18_2x_coco + In Collection: HRNet + Config: configs/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco.py + Metadata: + Training Memory (GB): 6.6 + inference time (ms/im): + - value: 74.63 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 38.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco/faster_rcnn_hrnetv2p_w18_2x_coco_20200702_085731-a4ec0611.pth + + - Name: faster_rcnn_hrnetv2p_w32_1x_coco + In Collection: HRNet + Config: configs/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco.py + Metadata: + Training Memory (GB): 9.0 + inference time (ms/im): + - value: 80.65 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco/faster_rcnn_hrnetv2p_w32_1x_coco_20200130-6e286425.pth + + - Name: faster_rcnn_hrnetv2p_w32_2x_coco + In Collection: HRNet + Config: configs/hrnet/faster_rcnn_hrnetv2p_w32_2x_coco.py + Metadata: + Training Memory (GB): 9.0 + inference time (ms/im): + - value: 80.65 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w32_2x_coco/faster_rcnn_hrnetv2p_w32_2x_coco_20200529_015927-976a9c15.pth + + - Name: faster_rcnn_hrnetv2p_w40_1x_coco + In Collection: HRNet + Config: configs/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco.py + Metadata: + Training Memory (GB): 10.4 + inference time (ms/im): + - value: 95.24 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco/faster_rcnn_hrnetv2p_w40_1x_coco_20200210-95c1f5ce.pth + + - Name: faster_rcnn_hrnetv2p_w40_2x_coco + In Collection: HRNet + Config: configs/hrnet/faster_rcnn_hrnetv2p_w40_2x_coco.py + Metadata: + Training Memory (GB): 10.4 + inference time (ms/im): + - value: 95.24 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w40_2x_coco/faster_rcnn_hrnetv2p_w40_2x_coco_20200512_161033-0f236ef4.pth + + - Name: mask_rcnn_hrnetv2p_w18_1x_coco + In Collection: HRNet + Config: configs/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco.py + Metadata: + Training Memory (GB): 7.0 + inference time (ms/im): + - value: 85.47 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 37.7 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 34.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco/mask_rcnn_hrnetv2p_w18_1x_coco_20200205-1c3d78ed.pth + + - Name: mask_rcnn_hrnetv2p_w18_2x_coco + In Collection: HRNet + Config: configs/hrnet/mask_rcnn_hrnetv2p_w18_2x_coco.py + Metadata: + Training Memory (GB): 7.0 + inference time (ms/im): + - value: 85.47 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.8 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 36.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w18_2x_coco/mask_rcnn_hrnetv2p_w18_2x_coco_20200212-b3c825b1.pth + + - Name: mask_rcnn_hrnetv2p_w32_1x_coco + In Collection: HRNet + Config: configs/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco.py + Metadata: + Training Memory (GB): 9.4 + inference time (ms/im): + - value: 88.5 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.2 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco/mask_rcnn_hrnetv2p_w32_1x_coco_20200207-b29f616e.pth + + - Name: mask_rcnn_hrnetv2p_w32_2x_coco + In Collection: HRNet + Config: configs/hrnet/mask_rcnn_hrnetv2p_w32_2x_coco.py + Metadata: + Training Memory (GB): 9.4 + inference time (ms/im): + - value: 88.5 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.5 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w32_2x_coco/mask_rcnn_hrnetv2p_w32_2x_coco_20200213-45b75b4d.pth + + - Name: mask_rcnn_hrnetv2p_w40_1x_coco + In Collection: HRNet + Config: configs/hrnet/mask_rcnn_hrnetv2p_w40_1x_coco.py + Metadata: + Training Memory (GB): 10.9 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.1 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w40_1x_coco/mask_rcnn_hrnetv2p_w40_1x_coco_20200511_015646-66738b35.pth + + - Name: mask_rcnn_hrnetv2p_w40_2x_coco + In Collection: HRNet + Config: configs/hrnet/mask_rcnn_hrnetv2p_w40_2x_coco.py + Metadata: + Training Memory (GB): 10.9 + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.8 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w40_2x_coco/mask_rcnn_hrnetv2p_w40_2x_coco_20200512_163732-aed5e4ab.pth + + - Name: cascade_rcnn_hrnetv2p_w18_20e_coco + In Collection: HRNet + Config: configs/hrnet/cascade_rcnn_hrnetv2p_w18_20e_coco.py + Metadata: + Training Memory (GB): 7.0 + inference time (ms/im): + - value: 90.91 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 20 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w18_20e_coco/cascade_rcnn_hrnetv2p_w18_20e_coco_20200210-434be9d7.pth + + - Name: cascade_rcnn_hrnetv2p_w32_20e_coco + In Collection: HRNet + Config: configs/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco.py + Metadata: + Training Memory (GB): 9.4 + inference time (ms/im): + - value: 90.91 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 20 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco/cascade_rcnn_hrnetv2p_w32_20e_coco_20200208-928455a4.pth + + - Name: cascade_rcnn_hrnetv2p_w40_20e_coco + In Collection: HRNet + Config: configs/hrnet/cascade_rcnn_hrnetv2p_w40_20e_coco.py + Metadata: + Training Memory (GB): 10.8 + Epochs: 20 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w40_20e_coco/cascade_rcnn_hrnetv2p_w40_20e_coco_20200512_161112-75e47b04.pth + + - Name: cascade_mask_rcnn_hrnetv2p_w18_20e_coco + In Collection: HRNet + Config: configs/hrnet/cascade_mask_rcnn_hrnetv2p_w18_20e_coco.py + Metadata: + Training Memory (GB): 8.5 + inference time (ms/im): + - value: 117.65 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 20 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.6 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 36.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w18_20e_coco/cascade_mask_rcnn_hrnetv2p_w18_20e_coco_20200210-b543cd2b.pth + + - Name: cascade_mask_rcnn_hrnetv2p_w32_20e_coco + In Collection: HRNet + Config: configs/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco.py + Metadata: + inference time (ms/im): + - value: 120.48 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 20 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 44.3 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco/cascade_mask_rcnn_hrnetv2p_w32_20e_coco_20200512_154043-39d9cf7b.pth + + - Name: cascade_mask_rcnn_hrnetv2p_w40_20e_coco + In Collection: HRNet + Config: configs/hrnet/cascade_mask_rcnn_hrnetv2p_w40_20e_coco.py + Metadata: + Training Memory (GB): 12.5 + Epochs: 20 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 45.1 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 39.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w40_20e_coco/cascade_mask_rcnn_hrnetv2p_w40_20e_coco_20200527_204922-969c4610.pth + + - Name: htc_hrnetv2p_w18_20e_coco + In Collection: HRNet + Config: configs/hrnet/htc_hrnetv2p_w18_20e_coco.py + Metadata: + Training Memory (GB): 10.8 + inference time (ms/im): + - value: 212.77 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 20 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.8 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w18_20e_coco/htc_hrnetv2p_w18_20e_coco_20200210-b266988c.pth + + - Name: htc_hrnetv2p_w32_20e_coco + In Collection: HRNet + Config: configs/hrnet/htc_hrnetv2p_w32_20e_coco.py + Metadata: + Training Memory (GB): 13.1 + inference time (ms/im): + - value: 204.08 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 20 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 45.4 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 39.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w32_20e_coco/htc_hrnetv2p_w32_20e_coco_20200207-7639fa12.pth + + - Name: htc_hrnetv2p_w40_20e_coco + In Collection: HRNet + Config: configs/hrnet/htc_hrnetv2p_w40_20e_coco.py + Metadata: + Training Memory (GB): 14.6 + Epochs: 20 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 46.4 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 40.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w40_20e_coco/htc_hrnetv2p_w40_20e_coco_20200529_183411-417c4d5b.pth + + - Name: fcos_hrnetv2p_w18_gn-head_4x4_1x_coco + In Collection: HRNet + Config: configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco.py + Metadata: + Training Resources: 4x V100 GPUs + Batch Size: 16 + Training Memory (GB): 13.0 + inference time (ms/im): + - value: 77.52 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 35.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco_20201212_100710-4ad151de.pth + + - Name: fcos_hrnetv2p_w18_gn-head_4x4_2x_coco + In Collection: HRNet + Config: configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco.py + Metadata: + Training Resources: 4x V100 GPUs + Batch Size: 16 + Training Memory (GB): 13.0 + inference time (ms/im): + - value: 77.52 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 38.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco_20201212_101110-5c575fa5.pth + + - Name: fcos_hrnetv2p_w32_gn-head_4x4_1x_coco + In Collection: HRNet + Config: configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py + Metadata: + Training Resources: 4x V100 GPUs + Batch Size: 16 + Training Memory (GB): 17.5 + inference time (ms/im): + - value: 77.52 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco_20201211_134730-cb8055c0.pth + + - Name: fcos_hrnetv2p_w32_gn-head_4x4_2x_coco + In Collection: HRNet + Config: configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco.py + Metadata: + Training Resources: 4x V100 GPUs + Batch Size: 16 + Training Memory (GB): 17.5 + inference time (ms/im): + - value: 77.52 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco_20201212_112133-77b6b9bb.pth + + - Name: fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco + In Collection: HRNet + Config: configs/hrnet/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco.py + Metadata: + Training Resources: 4x V100 GPUs + Batch Size: 16 + Training Memory (GB): 13.0 + inference time (ms/im): + - value: 77.52 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 38.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco_20201212_111651-441e9d9f.pth + + - Name: fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco + In Collection: HRNet + Config: configs/hrnet/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco.py + Metadata: + Training Resources: 4x V100 GPUs + Batch Size: 16 + Training Memory (GB): 17.5 + inference time (ms/im): + - value: 80.65 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco_20201212_090846-b6f2b49f.pth + + - Name: fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco + In Collection: HRNet + Config: configs/hrnet/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco.py + Metadata: + Training Resources: 4x V100 GPUs + Batch Size: 16 + Training Memory (GB): 20.3 + inference time (ms/im): + - value: 92.59 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco_20201212_124752-f22d2ce5.pth diff --git a/configs/htc/README.md b/configs/htc/README.md new file mode 100644 index 0000000..1625827 --- /dev/null +++ b/configs/htc/README.md @@ -0,0 +1,57 @@ +# Hybrid Task Cascade for Instance Segmentation + +## Introduction + + + +We provide config files to reproduce the results in the CVPR 2019 paper for [Hybrid Task Cascade](https://arxiv.org/abs/1901.07518). + +```latex +@inproceedings{chen2019hybrid, + title={Hybrid task cascade for instance segmentation}, + author={Chen, Kai and Pang, Jiangmiao and Wang, Jiaqi and Xiong, Yu and Li, Xiaoxiao and Sun, Shuyang and Feng, Wansen and Liu, Ziwei and Shi, Jianping and Ouyang, Wanli and Chen Change Loy and Dahua Lin}, + booktitle={IEEE Conference on Computer Vision and Pattern Recognition}, + year={2019} +} +``` + +## Dataset + +HTC requires COCO and [COCO-stuff](http://calvin.inf.ed.ac.uk/wp-content/uploads/data/cocostuffdataset/stuffthingmaps_trainval2017.zip) dataset for training. You need to download and extract it in the COCO dataset path. +The directory should be like this. + +```none +mmdetection +├── mmdet +├── tools +├── configs +├── data +│ ├── coco +│ │ ├── annotations +│ │ ├── train2017 +│ │ ├── val2017 +│ │ ├── test2017 +| | ├── stuffthingmaps +``` + +## Results and Models + +The results on COCO 2017val are shown in the below table. (results on test-dev are usually slightly higher than val) + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +|:---------:|:-------:|:-------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| +| R-50-FPN | pytorch | 1x | 8.2 | 5.8 | 42.3 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/htc/htc_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_r50_fpn_1x_coco/htc_r50_fpn_1x_coco_20200317-7332cf16.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_r50_fpn_1x_coco/htc_r50_fpn_1x_coco_20200317_070435.log.json) | +| R-50-FPN | pytorch | 20e | 8.2 | - | 43.3 | 38.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/htc/htc_r50_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_r50_fpn_20e_coco/htc_r50_fpn_20e_coco_20200319-fe28c577.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_r50_fpn_20e_coco/htc_r50_fpn_20e_coco_20200319_070313.log.json) | +| R-101-FPN | pytorch | 20e | 10.2 | 5.5 | 44.8 | 39.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/htc/htc_r101_fpn_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_r101_fpn_20e_coco/htc_r101_fpn_20e_coco_20200317-9b41b48f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_r101_fpn_20e_coco/htc_r101_fpn_20e_coco_20200317_153107.log.json) | +| X-101-32x4d-FPN | pytorch |20e| 11.4 | 5.0 | 46.1 | 40.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/htc/htc_x101_32x4d_fpn_16x1_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_32x4d_fpn_16x1_20e_coco/htc_x101_32x4d_fpn_16x1_20e_coco_20200318-de97ae01.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_32x4d_fpn_16x1_20e_coco/htc_x101_32x4d_fpn_16x1_20e_coco_20200318_034519.log.json) | +| X-101-64x4d-FPN | pytorch |20e| 14.5 | 4.4 | 47.0 | 41.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/htc/htc_x101_64x4d_fpn_16x1_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_64x4d_fpn_16x1_20e_coco/htc_x101_64x4d_fpn_16x1_20e_coco_20200318-b181fd7a.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_64x4d_fpn_16x1_20e_coco/htc_x101_64x4d_fpn_16x1_20e_coco_20200318_081711.log.json) | + +- In the HTC paper and COCO 2018 Challenge, `score_thr` is set to 0.001 for both baselines and HTC. +- We use 8 GPUs with 2 images/GPU for R-50 and R-101 models, and 16 GPUs with 1 image/GPU for X-101 models. + If you would like to train X-101 HTC with 8 GPUs, you need to change the lr from 0.02 to 0.01. + +We also provide a powerful HTC with DCN and multi-scale training model. No testing augmentation is used. + +| Backbone | Style | DCN | training scales | Lr schd | box AP | mask AP | Config | Download | +|:----------------:|:-------:|:-----:|:---------------:|:-------:|:------:|:-------:|:------:|:--------:| +| X-101-64x4d-FPN | pytorch | c3-c5 | 400~1400 | 20e | 50.4 | 43.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco_20200312-946fd751.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco_20200312_203410.log.json) | diff --git a/configs/htc/htc_r101_fpn_20e_coco.py b/configs/htc/htc_r101_fpn_20e_coco.py new file mode 100644 index 0000000..b42297b --- /dev/null +++ b/configs/htc/htc_r101_fpn_20e_coco.py @@ -0,0 +1,9 @@ +_base_ = './htc_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) +# learning policy +lr_config = dict(step=[16, 19]) +runner = dict(type='EpochBasedRunner', max_epochs=20) diff --git a/configs/htc/htc_r50_fpn_1x_coco.py b/configs/htc/htc_r50_fpn_1x_coco.py new file mode 100644 index 0000000..1e8e18a --- /dev/null +++ b/configs/htc/htc_r50_fpn_1x_coco.py @@ -0,0 +1,56 @@ +_base_ = './htc_without_semantic_r50_fpn_1x_coco.py' +model = dict( + roi_head=dict( + semantic_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), + out_channels=256, + featmap_strides=[8]), + semantic_head=dict( + type='FusedSemanticHead', + num_ins=5, + fusion_level=1, + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=183, + loss_seg=dict( + type='CrossEntropyLoss', ignore_index=255, loss_weight=0.2)))) +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='SegRescale', scale_factor=1 / 8), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict( + seg_prefix=data_root + 'stuffthingmaps/train2017/', + pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/htc/htc_r50_fpn_20e_coco.py b/configs/htc/htc_r50_fpn_20e_coco.py new file mode 100644 index 0000000..7d2e011 --- /dev/null +++ b/configs/htc/htc_r50_fpn_20e_coco.py @@ -0,0 +1,4 @@ +_base_ = './htc_r50_fpn_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 19]) +runner = dict(type='EpochBasedRunner', max_epochs=20) diff --git a/configs/htc/htc_without_semantic_r50_fpn_1x_coco.py b/configs/htc/htc_without_semantic_r50_fpn_1x_coco.py new file mode 100644 index 0000000..565104f --- /dev/null +++ b/configs/htc/htc_without_semantic_r50_fpn_1x_coco.py @@ -0,0 +1,236 @@ +_base_ = [ + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# model settings +model = dict( + type='HybridTaskCascade', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + scales=[8], + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), + roi_head=dict( + type='HybridTaskCascadeRoIHead', + interleaved=True, + mask_info_flow=True, + num_stages=3, + stage_loss_weights=[1, 0.5, 0.25], + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=[ + dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, + loss_weight=1.0)), + dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.05, 0.05, 0.1, 0.1]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, + loss_weight=1.0)), + dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.033, 0.033, 0.067, 0.067]), + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) + ], + mask_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + mask_head=[ + dict( + type='HTCMaskHead', + with_conv_res=False, + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=80, + loss_mask=dict( + type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)), + dict( + type='HTCMaskHead', + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=80, + loss_mask=dict( + type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)), + dict( + type='HTCMaskHead', + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=80, + loss_mask=dict( + type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)) + ]), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=0, + pos_weight=-1, + debug=False), + rpn_proposal=dict( + nms_pre=2000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=[ + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False), + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.6, + neg_iou_thr=0.6, + min_pos_iou=0.6, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False), + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.7, + min_pos_iou=0.7, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False) + ]), + test_cfg=dict( + rpn=dict( + nms_pre=1000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + score_thr=0.001, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100, + mask_thr_binary=0.5))) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + val=dict(pipeline=test_pipeline), test=dict(pipeline=test_pipeline)) diff --git a/configs/htc/htc_x101_32x4d_fpn_16x1_20e_coco.py b/configs/htc/htc_x101_32x4d_fpn_16x1_20e_coco.py new file mode 100644 index 0000000..0c834f2 --- /dev/null +++ b/configs/htc/htc_x101_32x4d_fpn_16x1_20e_coco.py @@ -0,0 +1,19 @@ +_base_ = './htc_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) +data = dict(samples_per_gpu=1, workers_per_gpu=1) +# learning policy +lr_config = dict(step=[16, 19]) +runner = dict(type='EpochBasedRunner', max_epochs=20) diff --git a/configs/htc/htc_x101_64x4d_fpn_16x1_20e_coco.py b/configs/htc/htc_x101_64x4d_fpn_16x1_20e_coco.py new file mode 100644 index 0000000..8b0d962 --- /dev/null +++ b/configs/htc/htc_x101_64x4d_fpn_16x1_20e_coco.py @@ -0,0 +1,19 @@ +_base_ = './htc_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) +data = dict(samples_per_gpu=1, workers_per_gpu=1) +# learning policy +lr_config = dict(step=[16, 19]) +runner = dict(type='EpochBasedRunner', max_epochs=20) diff --git a/configs/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco.py b/configs/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco.py new file mode 100644 index 0000000..c8d8703 --- /dev/null +++ b/configs/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco.py @@ -0,0 +1,43 @@ +_base_ = './htc_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) +# dataset settings +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), + dict( + type='Resize', + img_scale=[(1600, 400), (1600, 1400)], + multiscale_mode='range', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='SegRescale', scale_factor=1 / 8), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), +] +data = dict( + samples_per_gpu=1, workers_per_gpu=1, train=dict(pipeline=train_pipeline)) +# learning policy +lr_config = dict(step=[16, 19]) +runner = dict(type='EpochBasedRunner', max_epochs=20) diff --git a/configs/htc/metafile.yml b/configs/htc/metafile.yml new file mode 100644 index 0000000..acd038c --- /dev/null +++ b/configs/htc/metafile.yml @@ -0,0 +1,165 @@ +Collections: + - Name: HTC + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - FPN + - HTC + - RPN + - ResNet + - ResNeXt + - RoIAlign + Paper: + URL: https://arxiv.org/abs/1901.07518 + Title: 'Hybrid Task Cascade for Instance Segmentation' + README: configs/htc/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/detectors/htc.py#L6 + Version: v2.0.0 + +Models: + - Name: htc_r50_fpn_1x_coco + In Collection: HTC + Config: configs/htc/htc_r50_fpn_1x_coco.py + Metadata: + Training Memory (GB): 8.2 + inference time (ms/im): + - value: 172.41 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.3 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/htc/htc_r50_fpn_1x_coco/htc_r50_fpn_1x_coco_20200317-7332cf16.pth + + - Name: htc_r50_fpn_20e_coco + In Collection: HTC + Config: configs/htc/htc_r50_fpn_20e_coco.py + Metadata: + Training Memory (GB): 8.2 + inference time (ms/im): + - value: 172.41 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 20 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.3 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/htc/htc_r50_fpn_20e_coco/htc_r50_fpn_20e_coco_20200319-fe28c577.pth + + - Name: htc_r101_fpn_20e_coco + In Collection: HTC + Config: configs/htc/htc_r101_fpn_20e_coco.py + Metadata: + Training Memory (GB): 10.2 + inference time (ms/im): + - value: 181.82 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 20 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 44.8 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 39.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/htc/htc_r101_fpn_20e_coco/htc_r101_fpn_20e_coco_20200317-9b41b48f.pth + + - Name: htc_x101_32x4d_fpn_16x1_20e_coco + In Collection: HTC + Config: configs/htc/htc_x101_32x4d_fpn_16x1_20e_coco.py + Metadata: + Training Resources: 16x V100 GPUs + Batch Size: 16 + Training Memory (GB): 11.4 + inference time (ms/im): + - value: 200 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 20 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 46.1 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 40.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_32x4d_fpn_16x1_20e_coco/htc_x101_32x4d_fpn_16x1_20e_coco_20200318-de97ae01.pth + + - Name: htc_x101_64x4d_fpn_16x1_20e_coco + In Collection: HTC + Config: configs/htc/htc_x101_64x4d_fpn_16x1_20e_coco.py + Metadata: + Training Resources: 16x V100 GPUs + Batch Size: 16 + Training Memory (GB): 14.5 + inference time (ms/im): + - value: 227.27 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 20 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 47.0 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 41.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_64x4d_fpn_16x1_20e_coco/htc_x101_64x4d_fpn_16x1_20e_coco_20200318-b181fd7a.pth + + - Name: htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco + In Collection: HTC + Config: configs/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco.py + Metadata: + Training Resources: 16x V100 GPUs + Batch Size: 16 + Epochs: 20 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 50.4 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 43.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco_20200312-946fd751.pth diff --git a/configs/instaboost/README.md b/configs/instaboost/README.md new file mode 100644 index 0000000..6d365ac --- /dev/null +++ b/configs/instaboost/README.md @@ -0,0 +1,44 @@ +# InstaBoost for MMDetection + + + +Configs in this directory is the implementation for ICCV2019 paper "InstaBoost: Boosting Instance Segmentation Via Probability Map Guided Copy-Pasting" and provided by the authors of the paper. InstaBoost is a data augmentation method for object detection and instance segmentation. The paper has been released on [`arXiv`](https://arxiv.org/abs/1908.07801). + +```latex +@inproceedings{fang2019instaboost, + title={Instaboost: Boosting instance segmentation via probability map guided copy-pasting}, + author={Fang, Hao-Shu and Sun, Jianhua and Wang, Runzhong and Gou, Minghao and Li, Yong-Lu and Lu, Cewu}, + booktitle={Proceedings of the IEEE International Conference on Computer Vision}, + pages={682--691}, + year={2019} +} +``` + +## Usage + +### Requirements + +You need to install `instaboostfast` before using it. + +```shell +pip install instaboostfast +``` + +The code and more details can be found [here](https://github.com/GothicAi/Instaboost). + +### Integration with MMDetection + +InstaBoost have been already integrated in the data pipeline, thus all you need is to add or change **InstaBoost** configurations after **LoadImageFromFile**. We have provided examples like [this](mask_rcnn_r50_fpn_instaboost_4x#L121). You can refer to [`InstaBoostConfig`](https://github.com/GothicAi/InstaBoost-pypi#instaboostconfig) for more details. + +## Results and Models + +- All models were trained on `coco_2017_train` and tested on `coco_2017_val` for convenience of evaluation and comparison. In the paper, the results are obtained from `test-dev`. +- To balance accuracy and training time when using InstaBoost, models released in this page are all trained for 48 Epochs. Other training and testing configs strictly follow the original framework. +- For results and models in MMDetection V1.x, please refer to [Instaboost](https://github.com/GothicAi/Instaboost). + +| Network | Backbone | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :--------: | :-----: | :------: | :------------: | :------:| :-----: | :------: | :-----------------: | +| Mask R-CNN | R-50-FPN | 4x | 4.4 | 17.5 | 40.6 | 36.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco/mask_rcnn_r50_fpn_instaboost_4x_coco_20200307-d025f83a.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco/mask_rcnn_r50_fpn_instaboost_4x_coco_20200307_223635.log.json) | +| Mask R-CNN | R-101-FPN | 4x | 6.4 | | 42.5 | 38.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/instaboost/mask_rcnn_r101_fpn_instaboost_4x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_r101_fpn_instaboost_4x_coco/mask_rcnn_r101_fpn_instaboost_4x_coco_20200703_235738-f23f3a5f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_r101_fpn_instaboost_4x_coco/mask_rcnn_r101_fpn_instaboost_4x_coco_20200703_235738.log.json) | +| Mask R-CNN | X-101-64x4d-FPN | 4x | 10.7 | | 44.7 | 39.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/instaboost/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco_20200515_080947-8ed58c1b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco_20200515_080947.log.json) | +| Cascade R-CNN | R-101-FPN | 4x | 6.0 | 12.0 | 43.7 | 38.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco_20200307-c19d98d9.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco_20200307_223646.log.json) | diff --git a/configs/instaboost/cascade_mask_rcnn_r101_fpn_instaboost_4x_coco.py b/configs/instaboost/cascade_mask_rcnn_r101_fpn_instaboost_4x_coco.py new file mode 100644 index 0000000..9d0515d --- /dev/null +++ b/configs/instaboost/cascade_mask_rcnn_r101_fpn_instaboost_4x_coco.py @@ -0,0 +1,7 @@ +_base_ = './cascade_mask_rcnn_r50_fpn_instaboost_4x_coco.py' + +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco.py b/configs/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco.py new file mode 100644 index 0000000..a89a81f --- /dev/null +++ b/configs/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco.py @@ -0,0 +1,28 @@ +_base_ = '../cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='InstaBoost', + action_candidate=('normal', 'horizontal', 'skip'), + action_prob=(1, 0, 0), + scale=(0.8, 1.2), + dx=15, + dy=15, + theta=(-1, 1), + color_prob=0.5, + hflag=False, + aug_ratio=0.5), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +data = dict(train=dict(pipeline=train_pipeline)) +# learning policy +lr_config = dict(step=[32, 44]) +runner = dict(type='EpochBasedRunner', max_epochs=48) diff --git a/configs/instaboost/cascade_mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco.py b/configs/instaboost/cascade_mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco.py new file mode 100644 index 0000000..d67b799 --- /dev/null +++ b/configs/instaboost/cascade_mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco.py @@ -0,0 +1,14 @@ +_base_ = './cascade_mask_rcnn_r50_fpn_instaboost_4x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) diff --git a/configs/instaboost/mask_rcnn_r101_fpn_instaboost_4x_coco.py b/configs/instaboost/mask_rcnn_r101_fpn_instaboost_4x_coco.py new file mode 100644 index 0000000..ebbb43e --- /dev/null +++ b/configs/instaboost/mask_rcnn_r101_fpn_instaboost_4x_coco.py @@ -0,0 +1,6 @@ +_base_ = './mask_rcnn_r50_fpn_instaboost_4x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco.py b/configs/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco.py new file mode 100644 index 0000000..55ca62b --- /dev/null +++ b/configs/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco.py @@ -0,0 +1,28 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='InstaBoost', + action_candidate=('normal', 'horizontal', 'skip'), + action_prob=(1, 0, 0), + scale=(0.8, 1.2), + dx=15, + dy=15, + theta=(-1, 1), + color_prob=0.5, + hflag=False, + aug_ratio=0.5), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +data = dict(train=dict(pipeline=train_pipeline)) +# learning policy +lr_config = dict(step=[32, 44]) +runner = dict(type='EpochBasedRunner', max_epochs=48) diff --git a/configs/instaboost/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco.py b/configs/instaboost/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco.py new file mode 100644 index 0000000..2010f44 --- /dev/null +++ b/configs/instaboost/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco.py @@ -0,0 +1,14 @@ +_base_ = './mask_rcnn_r50_fpn_instaboost_4x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) diff --git a/configs/instaboost/metafile.yml b/configs/instaboost/metafile.yml new file mode 100644 index 0000000..325283d --- /dev/null +++ b/configs/instaboost/metafile.yml @@ -0,0 +1,99 @@ +Collections: + - Name: InstaBoost + Metadata: + Training Data: COCO + Training Techniques: + - InstaBoost + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Paper: + URL: https://arxiv.org/abs/1908.07801 + Title: 'Instaboost: Boosting instance segmentation via probability map guided copy-pasting' + README: configs/instaboost/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/datasets/pipelines/instaboost.py#L7 + Version: v2.0.0 + +Models: + - Name: mask_rcnn_r50_fpn_instaboost_4x_coco + In Collection: InstaBoost + Config: configs/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco.py + Metadata: + Training Memory (GB): 4.4 + inference time (ms/im): + - value: 57.14 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 48 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.6 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 36.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco/mask_rcnn_r50_fpn_instaboost_4x_coco_20200307-d025f83a.pth + + - Name: mask_rcnn_r101_fpn_instaboost_4x_coco + In Collection: InstaBoost + Config: configs/instaboost/mask_rcnn_r101_fpn_instaboost_4x_coco.py + Metadata: + Training Memory (GB): 6.4 + Epochs: 48 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.5 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_r101_fpn_instaboost_4x_coco/mask_rcnn_r101_fpn_instaboost_4x_coco_20200703_235738-f23f3a5f.pth + + - Name: mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco + In Collection: InstaBoost + Config: configs/instaboost/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco.py + Metadata: + Training Memory (GB): 10.7 + Epochs: 48 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 44.7 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 39.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco_20200515_080947-8ed58c1b.pth + + - Name: cascade_mask_rcnn_r50_fpn_instaboost_4x_coco + In Collection: InstaBoost + Config: configs/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco.py + Metadata: + Training Memory (GB): 6.0 + inference time (ms/im): + - value: 83.33 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 48 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.7 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco_20200307-c19d98d9.pth diff --git a/configs/ld/README.md b/configs/ld/README.md new file mode 100644 index 0000000..0177f1e --- /dev/null +++ b/configs/ld/README.md @@ -0,0 +1,31 @@ +# Localization Distillation for Object Detection + +## Introduction + + + +```latex +@Article{zheng2021LD, + title={Localization Distillation for Object Detection}, + author= {Zhaohui Zheng, Rongguang Ye, Ping Wang, Jun Wang, Dongwei Ren, Wangmeng Zuo}, + journal={arXiv:2102.12252}, + year={2021} +} +``` + +### GFocalV1 with LD + +| Teacher | Student | Training schedule | Mini-batch size | AP (val) | AP50 (val) | AP75 (val) | Config | +| :-------: | :-----: | :---------------: | :-------------: | :------: | :--------: | :--------: | :--------------: | +| -- | R-18 | 1x | 6 | 35.8 | 53.1 | 38.2 | | +| R-101 | R-18 | 1x | 6 | 36.5 | 52.9 | 39.3 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/ld/ld_r18_gflv1_r101_fpn_coco_1x.py) | +| -- | R-34 | 1x | 6 | 38.9 | 56.6 | 42.2 | | +| R-101 | R-34 | 1x | 6 | 39.8 | 56.6 | 43.1 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/ld/ld_r34_gflv1_r101_fpn_coco_1x.py) | +| -- | R-50 | 1x | 6 | 40.1 | 58.2 | 43.1 | | +| R-101 | R-50 | 1x | 6 | 41.1 | 58.7 | 44.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/ld/ld_r50_gflv1_r101_fpn_coco_1x.py) | +| -- | R-101 | 2x | 6 | 44.6 | 62.9 | 48.4 | | +| R-101-DCN | R-101 | 2x | 6 | 45.4 | 63.1 | 49.5 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/ld/ld_r101_gflv1_r101dcn_fpn_coco_1x.py) | + +## Note + +- Meaning of Config name: ld_r18(student model)_gflv1(based on gflv1)_r101(teacher model)_fpn(neck)_coco(dataset)_1x(12 epoch).py diff --git a/configs/ld/ld_r101_gflv1_r101dcn_fpn_coco_2x.py b/configs/ld/ld_r101_gflv1_r101dcn_fpn_coco_2x.py new file mode 100644 index 0000000..1cbdb4c --- /dev/null +++ b/configs/ld/ld_r101_gflv1_r101dcn_fpn_coco_2x.py @@ -0,0 +1,44 @@ +_base_ = ['./ld_r18_gflv1_r101_fpn_coco_1x.py'] +teacher_ckpt = 'https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco_20200630_102002-134b07df.pth' # noqa +model = dict( + teacher_config='configs/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco.py', + teacher_ckpt=teacher_ckpt, + backbone=dict( + type='ResNet', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs='on_output', + num_outs=5)) + +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) +# multi-scale training +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 480), (1333, 800)], + multiscale_mode='range', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +data = dict(train=dict(pipeline=train_pipeline)) diff --git a/configs/ld/ld_r18_gflv1_r101_fpn_coco_1x.py b/configs/ld/ld_r18_gflv1_r101_fpn_coco_1x.py new file mode 100644 index 0000000..18dce81 --- /dev/null +++ b/configs/ld/ld_r18_gflv1_r101_fpn_coco_1x.py @@ -0,0 +1,62 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +teacher_ckpt = 'https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_mstrain_2x_coco/gfl_r101_fpn_mstrain_2x_coco_20200629_200126-dd12f847.pth' # noqa +model = dict( + type='KnowledgeDistillationSingleStageDetector', + teacher_config='configs/gfl/gfl_r101_fpn_mstrain_2x_coco.py', + teacher_ckpt=teacher_ckpt, + backbone=dict( + type='ResNet', + depth=18, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18')), + neck=dict( + type='FPN', + in_channels=[64, 128, 256, 512], + out_channels=256, + start_level=1, + add_extra_convs='on_output', + num_outs=5), + bbox_head=dict( + type='LDHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + octave_base_scale=8, + scales_per_octave=1, + strides=[8, 16, 32, 64, 128]), + loss_cls=dict( + type='QualityFocalLoss', + use_sigmoid=True, + beta=2.0, + loss_weight=1.0), + loss_dfl=dict(type='DistributionFocalLoss', loss_weight=0.25), + loss_ld=dict( + type='KnowledgeDistillationKLDivLoss', loss_weight=0.25, T=10), + reg_max=16, + loss_bbox=dict(type='GIoULoss', loss_weight=2.0)), + # training and testing settings + train_cfg=dict( + assigner=dict(type='ATSSAssigner', topk=9), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.6), + max_per_img=100)) + +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/ld/ld_r34_gflv1_r101_fpn_coco_1x.py b/configs/ld/ld_r34_gflv1_r101_fpn_coco_1x.py new file mode 100644 index 0000000..3b6996d --- /dev/null +++ b/configs/ld/ld_r34_gflv1_r101_fpn_coco_1x.py @@ -0,0 +1,19 @@ +_base_ = ['./ld_r18_gflv1_r101_fpn_coco_1x.py'] +model = dict( + backbone=dict( + type='ResNet', + depth=34, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet34')), + neck=dict( + type='FPN', + in_channels=[64, 128, 256, 512], + out_channels=256, + start_level=1, + add_extra_convs='on_output', + num_outs=5)) diff --git a/configs/ld/ld_r50_gflv1_r101_fpn_coco_1x.py b/configs/ld/ld_r50_gflv1_r101_fpn_coco_1x.py new file mode 100644 index 0000000..2b18785 --- /dev/null +++ b/configs/ld/ld_r50_gflv1_r101_fpn_coco_1x.py @@ -0,0 +1,19 @@ +_base_ = ['./ld_r18_gflv1_r101_fpn_coco_1x.py'] +model = dict( + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs='on_output', + num_outs=5)) diff --git a/configs/ld/metafile.yml b/configs/ld/metafile.yml new file mode 100644 index 0000000..cd833bf --- /dev/null +++ b/configs/ld/metafile.yml @@ -0,0 +1,72 @@ +Collections: + - Name: Localization Distillation + Metadata: + Training Data: COCO + Training Techniques: + - Localization Distillation + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - FPN + - ResNet + Paper: + URL: https://arxiv.org/abs/2102.12252 + Title: 'Localization Distillation for Object Detection' + README: configs/ld/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.11.0/mmdet/models/dense_heads/ld_head.py#L11 + Version: v2.11.0 + +Models: + - Name: ld_r18_gflv1_r101_fpn_coco_1x + In Collection: Localization Distillation + Config: configs/ld/ld_r18_gflv1_r101_fpn_coco_1x.py + Metadata: + Teacher: R-101 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 36.5 + box AP@0.5: 52.9 + box AP@0.75: 39.3 + + - Name: ld_r34_gflv1_r101_fpn_coco_1x + In Collection: Localization Distillation + Config: configs/ld/ld_r34_gflv1_r101_fpn_coco_1x.py + Metadata: + Teacher: R-101 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.8 + box AP@0.5: 56.6 + box AP@0.75: 43.1 + + - Name: ld_r50_gflv1_r101_fpn_coco_1x + In Collection: Localization Distillation + Config: configs/ld/ld_r50_gflv1_r101_fpn_coco_1x.py + Metadata: + Teacher: R-101 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.1 + box AP@0.5: 58.7 + box AP@0.75: 44.9 + + - Name: ld_r101_gflv1_r101dcn_fpn_coco_1x + In Collection: Localization Distillation + Config: configs/ld/ld_r101_gflv1_r101dcn_fpn_coco_1x.py + Metadata: + Teacher: R-101-DCN + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 45.4 + box AP@0.5: 63.1 + box AP@0.75: 49.5 diff --git a/configs/legacy_1.x/README.md b/configs/legacy_1.x/README.md new file mode 100644 index 0000000..38a2a0e --- /dev/null +++ b/configs/legacy_1.x/README.md @@ -0,0 +1,53 @@ +# Legacy Configs in MMDetection V1.x + + + +Configs in this directory implement the legacy configs used by MMDetection V1.x and its model zoos. + +To help users convert their models from V1.x to MMDetection V2.0, we provide v1.x configs to inference the converted v1.x models. +Due to the BC-breaking changes in MMDetection V2.0 from MMDetection V1.x, running inference with the same model weights in these two version will produce different results. The difference will cause within 1% AP absolute difference as can be found in the following table. + +## Usage + +To upgrade the model version, the users need to do the following steps. + +### 1. Convert model weights + +There are three main difference in the model weights between V1.x and V2.0 codebases. + +1. Since the class order in all the detector's classification branch is reordered, all the legacy model weights need to go through the conversion process. +2. The regression and segmentation head no longer contain the background channel. Weights in these background channels should be removed to fix in the current codebase. +3. For two-stage detectors, their wegihts need to be upgraded since MMDetection V2.0 refactors all the two-stage detectors with `RoIHead`. + +The users can do the same modification as mentioned above for the self-implemented +detectors. We provide a scripts `tools/model_converters/upgrade_model_version.py` to convert the model weights in the V1.x model zoo. + +```bash +python tools/model_converters/upgrade_model_version.py ${OLD_MODEL_PATH} ${NEW_MODEL_PATH} --num-classes ${NUM_CLASSES} + +``` + +- OLD_MODEL_PATH: the path to load the model weights in 1.x version. +- NEW_MODEL_PATH: the path to save the converted model weights in 2.0 version. +- NUM_CLASSES: number of classes of the original model weights. Usually it is 81 for COCO dataset, 21 for VOC dataset. + The number of classes in V2.0 models should be equal to that in V1.x models - 1. + +### 2. Use configs with legacy settings + +After converting the model weights, checkout to the v1.2 release to find the corresponding config file that uses the legacy settings. +The V1.x models usually need these three legacy modules: `LegacyAnchorGenerator`, `LegacyDeltaXYWHBBoxCoder`, and `RoIAlign(align=False)`. +For models using ResNet Caffe backbones, they also need to change the pretrain name and the corresponding `img_norm_cfg`. +An example is in [`retinanet_r50_caffe_fpn_1x_coco_v1.py`](retinanet_r50_caffe_fpn_1x_coco_v1.py) +Then use the config to test the model weights. For most models, the obtained results should be close to that in V1.x. +We provide configs of some common structures in this directory. + +## Performance + +The performance change after converting the models in this directory are listed as the following. +| Method | Style | Lr schd | V1.x box AP | V1.x mask AP | V2.0 box AP | V2.0 mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------:| :-----: |:------:| :-----: | :-------: |:------------------------------------------------------------------------------------------------------------------------------: | +| Mask R-CNN R-50-FPN | pytorch | 1x | 37.3 | 34.2 | 36.8 | 33.9 | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/legacy_1.x/mask_rcnn_r50_fpn_1x_coco_v1.py) | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/mask_rcnn_r50_fpn_1x_20181010-069fa190.pth)| +| RetinaNet R-50-FPN | caffe | 1x | 35.8 | - | 35.4 | - | [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/legacy_1.x/retinanet_r50_caffe_1x_coco_v1.py) | +| RetinaNet R-50-FPN | pytorch | 1x | 35.6 |-|35.2| -| [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/legacy_1.x/retinanet_r50_fpn_1x_coco_v1.py) | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/retinanet_r50_fpn_1x_20181125-7b0c2548.pth) | +| Cascade Mask R-CNN R-50-FPN | pytorch | 1x | 41.2 | 35.7 |40.8| 35.6| [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/legacy_1.x/cascade_mask_rcnn_r50_fpn_1x_coco_v1.py) | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/cascade_mask_rcnn_r50_fpn_1x_20181123-88b170c9.pth) | +| SSD300-VGG16 | caffe | 120e | 25.7 |-|25.4|-| [config](https://github.com/open-mmlab/mmdetection/blob/master/configs/legacy_1.x/ssd300_coco_v1.py) | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd300_coco_vgg16_caffe_120e_20181221-84d7110b.pth) | diff --git a/configs/legacy_1.x/cascade_mask_rcnn_r50_fpn_1x_coco_v1.py b/configs/legacy_1.x/cascade_mask_rcnn_r50_fpn_1x_coco_v1.py new file mode 100644 index 0000000..fc9d004 --- /dev/null +++ b/configs/legacy_1.x/cascade_mask_rcnn_r50_fpn_1x_coco_v1.py @@ -0,0 +1,79 @@ +_base_ = [ + '../_base_/models/cascade_mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + type='CascadeRCNN', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + anchor_generator=dict(type='LegacyAnchorGenerator', center_offset=0.5), + bbox_coder=dict( + type='LegacyDeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0])), + roi_head=dict( + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict( + type='RoIAlign', + output_size=7, + sampling_ratio=2, + aligned=False)), + bbox_head=[ + dict( + type='Shared2FCBBoxHead', + reg_class_agnostic=True, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='LegacyDeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2])), + dict( + type='Shared2FCBBoxHead', + reg_class_agnostic=True, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='LegacyDeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.05, 0.05, 0.1, 0.1])), + dict( + type='Shared2FCBBoxHead', + reg_class_agnostic=True, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='LegacyDeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.033, 0.033, 0.067, 0.067])), + ], + mask_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict( + type='RoIAlign', + output_size=14, + sampling_ratio=2, + aligned=False)))) +dist_params = dict(backend='nccl', port=29515) diff --git a/configs/legacy_1.x/faster_rcnn_r50_fpn_1x_coco_v1.py b/configs/legacy_1.x/faster_rcnn_r50_fpn_1x_coco_v1.py new file mode 100644 index 0000000..8c573be --- /dev/null +++ b/configs/legacy_1.x/faster_rcnn_r50_fpn_1x_coco_v1.py @@ -0,0 +1,38 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +model = dict( + type='FasterRCNN', + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + rpn_head=dict( + type='RPNHead', + anchor_generator=dict( + type='LegacyAnchorGenerator', + center_offset=0.5, + scales=[8], + ratios=[0.5, 1.0, 2.0], + strides=[4, 8, 16, 32, 64]), + bbox_coder=dict(type='LegacyDeltaXYWHBBoxCoder'), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), + roi_head=dict( + type='StandardRoIHead', + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict( + type='RoIAlign', + output_size=7, + sampling_ratio=2, + aligned=False), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + bbox_coder=dict(type='LegacyDeltaXYWHBBoxCoder'), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rpn_proposal=dict(max_per_img=2000), + rcnn=dict(assigner=dict(match_low_quality=True)))) diff --git a/configs/legacy_1.x/mask_rcnn_r50_fpn_1x_coco_v1.py b/configs/legacy_1.x/mask_rcnn_r50_fpn_1x_coco_v1.py new file mode 100644 index 0000000..04581bb --- /dev/null +++ b/configs/legacy_1.x/mask_rcnn_r50_fpn_1x_coco_v1.py @@ -0,0 +1,34 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +model = dict( + rpn_head=dict( + anchor_generator=dict(type='LegacyAnchorGenerator', center_offset=0.5), + bbox_coder=dict(type='LegacyDeltaXYWHBBoxCoder'), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), + roi_head=dict( + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict( + type='RoIAlign', + output_size=7, + sampling_ratio=2, + aligned=False)), + mask_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict( + type='RoIAlign', + output_size=14, + sampling_ratio=2, + aligned=False)), + bbox_head=dict( + bbox_coder=dict(type='LegacyDeltaXYWHBBoxCoder'), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))), + + # model training and testing settings + train_cfg=dict( + rpn_proposal=dict(max_per_img=2000), + rcnn=dict(assigner=dict(match_low_quality=True)))) diff --git a/configs/legacy_1.x/retinanet_r50_caffe_fpn_1x_coco_v1.py b/configs/legacy_1.x/retinanet_r50_caffe_fpn_1x_coco_v1.py new file mode 100644 index 0000000..a63d248 --- /dev/null +++ b/configs/legacy_1.x/retinanet_r50_caffe_fpn_1x_coco_v1.py @@ -0,0 +1,41 @@ +_base_ = './retinanet_r50_fpn_1x_coco_v1.py' +model = dict( + backbone=dict( + norm_cfg=dict(requires_grad=False), + norm_eval=True, + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron/resnet50_caffe'))) +# use caffe img_norm +img_norm_cfg = dict( + mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/legacy_1.x/retinanet_r50_fpn_1x_coco_v1.py b/configs/legacy_1.x/retinanet_r50_fpn_1x_coco_v1.py new file mode 100644 index 0000000..6198b97 --- /dev/null +++ b/configs/legacy_1.x/retinanet_r50_fpn_1x_coco_v1.py @@ -0,0 +1,17 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + bbox_head=dict( + type='RetinaHead', + anchor_generator=dict( + type='LegacyAnchorGenerator', + center_offset=0.5, + octave_base_scale=4, + scales_per_octave=3, + ratios=[0.5, 1.0, 2.0], + strides=[8, 16, 32, 64, 128]), + bbox_coder=dict(type='LegacyDeltaXYWHBBoxCoder'), + loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0))) diff --git a/configs/legacy_1.x/ssd300_coco_v1.py b/configs/legacy_1.x/ssd300_coco_v1.py new file mode 100644 index 0000000..b194e76 --- /dev/null +++ b/configs/legacy_1.x/ssd300_coco_v1.py @@ -0,0 +1,79 @@ +_base_ = [ + '../_base_/models/ssd300.py', '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py' +] +# model settings +input_size = 300 +model = dict( + bbox_head=dict( + type='SSDHead', + anchor_generator=dict( + type='LegacySSDAnchorGenerator', + scale_major=False, + input_size=input_size, + basesize_ratio_range=(0.15, 0.9), + strides=[8, 16, 32, 64, 100, 300], + ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]), + bbox_coder=dict( + type='LegacyDeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.1, 0.1, 0.2, 0.2]))) +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='Expand', + mean=img_norm_cfg['mean'], + to_rgb=img_norm_cfg['to_rgb'], + ratio_range=(1, 4)), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.3), + dict(type='Resize', img_scale=(300, 300), keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(300, 300), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=8, + workers_per_gpu=3, + train=dict( + _delete_=True, + type='RepeatDataset', + times=5, + dataset=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline)), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='SGD', lr=2e-3, momentum=0.9, weight_decay=5e-4) +optimizer_config = dict(_delete_=True) +dist_params = dict(backend='nccl', port=29555) diff --git a/configs/libra_rcnn/README.md b/configs/libra_rcnn/README.md new file mode 100644 index 0000000..8b78af4 --- /dev/null +++ b/configs/libra_rcnn/README.md @@ -0,0 +1,41 @@ +# Libra R-CNN: Towards Balanced Learning for Object Detection + +## Introduction + + + +We provide config files to reproduce the results in the CVPR 2019 paper [Libra R-CNN](https://arxiv.org/pdf/1904.02701.pdf). + +The extended version of [Libra R-CNN](https://arxiv.org/pdf/2108.10175.pdf) is accpeted by IJCV. + +``` +@inproceedings{pang2019libra, + title={Libra R-CNN: Towards Balanced Learning for Object Detection}, + author={Pang, Jiangmiao and Chen, Kai and Shi, Jianping and Feng, Huajun and Ouyang, Wanli and Dahua Lin}, + booktitle={IEEE Conference on Computer Vision and Pattern Recognition}, + year={2019} +} + +@article{pang2021towards, + title={Towards Balanced Learning for Instance Recognition}, + author={Pang, Jiangmiao and Chen, Kai and Li, Qi and Xu, Zhihai and Feng, Huajun and Shi, Jianping and Ouyang, Wanli and Lin, Dahua}, + journal={International Journal of Computer Vision}, + volume={129}, + number={5}, + pages={1376--1393}, + year={2021}, + publisher={Springer} +} +``` + +## Results and models + +The results on COCO 2017val are shown in the below table. (results on test-dev are usually slightly higher than val) + +| Architecture | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:------------:|:---------------:|:-------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| +| Faster R-CNN | R-50-FPN | pytorch | 1x | 4.6 | 19.0 | 38.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco/libra_faster_rcnn_r50_fpn_1x_coco_20200130-3afee3a9.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco/libra_faster_rcnn_r50_fpn_1x_coco_20200130_204655.log.json) | +| Fast R-CNN | R-50-FPN | pytorch | 1x | | | | | +| Faster R-CNN | R-101-FPN | pytorch | 1x | 6.5 | 14.4 | 40.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_coco/libra_faster_rcnn_r101_fpn_1x_coco_20200203-8dba6a5a.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_coco/libra_faster_rcnn_r101_fpn_1x_coco_20200203_001405.log.json) | +| Faster R-CNN | X-101-64x4d-FPN | pytorch | 1x | 10.8 | 8.5 | 42.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_coco/libra_faster_rcnn_x101_64x4d_fpn_1x_coco_20200315-3a7d0488.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_coco/libra_faster_rcnn_x101_64x4d_fpn_1x_coco_20200315_231625.log.json) | +| RetinaNet | R-50-FPN | pytorch | 1x | 4.2 | 17.7 | 37.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/libra_rcnn/libra_retinanet_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_retinanet_r50_fpn_1x_coco/libra_retinanet_r50_fpn_1x_coco_20200205-804d94ce.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_retinanet_r50_fpn_1x_coco/libra_retinanet_r50_fpn_1x_coco_20200205_112757.log.json) | diff --git a/configs/libra_rcnn/libra_fast_rcnn_r50_fpn_1x_coco.py b/configs/libra_rcnn/libra_fast_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..efbedc8 --- /dev/null +++ b/configs/libra_rcnn/libra_fast_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,50 @@ +_base_ = '../fast_rcnn/fast_rcnn_r50_fpn_1x_coco.py' +# model settings +model = dict( + neck=[ + dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + dict( + type='BFP', + in_channels=256, + num_levels=5, + refine_level=2, + refine_type='non_local') + ], + roi_head=dict( + bbox_head=dict( + loss_bbox=dict( + _delete_=True, + type='BalancedL1Loss', + alpha=0.5, + gamma=1.5, + beta=1.0, + loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rcnn=dict( + sampler=dict( + _delete_=True, + type='CombinedSampler', + num=512, + pos_fraction=0.25, + add_gt_as_proposals=True, + pos_sampler=dict(type='InstanceBalancedPosSampler'), + neg_sampler=dict( + type='IoUBalancedNegSampler', + floor_thr=-1, + floor_fraction=0, + num_bins=3))))) +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +data = dict( + train=dict(proposal_file=data_root + + 'libra_proposals/rpn_r50_fpn_1x_train2017.pkl'), + val=dict(proposal_file=data_root + + 'libra_proposals/rpn_r50_fpn_1x_val2017.pkl'), + test=dict(proposal_file=data_root + + 'libra_proposals/rpn_r50_fpn_1x_val2017.pkl')) diff --git a/configs/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_coco.py b/configs/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_coco.py new file mode 100644 index 0000000..e899706 --- /dev/null +++ b/configs/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = './libra_faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco.py b/configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..89a0d7b --- /dev/null +++ b/configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,41 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' +# model settings +model = dict( + neck=[ + dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + dict( + type='BFP', + in_channels=256, + num_levels=5, + refine_level=2, + refine_type='non_local') + ], + roi_head=dict( + bbox_head=dict( + loss_bbox=dict( + _delete_=True, + type='BalancedL1Loss', + alpha=0.5, + gamma=1.5, + beta=1.0, + loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rpn=dict(sampler=dict(neg_pos_ub=5), allowed_border=-1), + rcnn=dict( + sampler=dict( + _delete_=True, + type='CombinedSampler', + num=512, + pos_fraction=0.25, + add_gt_as_proposals=True, + pos_sampler=dict(type='InstanceBalancedPosSampler'), + neg_sampler=dict( + type='IoUBalancedNegSampler', + floor_thr=-1, + floor_fraction=0, + num_bins=3))))) diff --git a/configs/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_coco.py b/configs/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000..06740a7 --- /dev/null +++ b/configs/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './libra_faster_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) diff --git a/configs/libra_rcnn/libra_retinanet_r50_fpn_1x_coco.py b/configs/libra_rcnn/libra_retinanet_r50_fpn_1x_coco.py new file mode 100644 index 0000000..be27420 --- /dev/null +++ b/configs/libra_rcnn/libra_retinanet_r50_fpn_1x_coco.py @@ -0,0 +1,26 @@ +_base_ = '../retinanet/retinanet_r50_fpn_1x_coco.py' +# model settings +model = dict( + neck=[ + dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs='on_input', + num_outs=5), + dict( + type='BFP', + in_channels=256, + num_levels=5, + refine_level=1, + refine_type='non_local') + ], + bbox_head=dict( + loss_bbox=dict( + _delete_=True, + type='BalancedL1Loss', + alpha=0.5, + gamma=1.5, + beta=0.11, + loss_weight=1.0))) diff --git a/configs/libra_rcnn/metafile.yml b/configs/libra_rcnn/metafile.yml new file mode 100644 index 0000000..8c32795 --- /dev/null +++ b/configs/libra_rcnn/metafile.yml @@ -0,0 +1,99 @@ +Collections: + - Name: Libra R-CNN + Metadata: + Training Data: COCO + Training Techniques: + - IoU-Balanced Sampling + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - Balanced Feature Pyramid + Paper: + URL: https://arxiv.org/abs/1904.02701 + Title: 'Libra R-CNN: Towards Balanced Learning for Object Detection' + README: configs/libra_rcnn/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/necks/bfp.py#L10 + Version: v2.0.0 + +Models: + - Name: libra_faster_rcnn_r50_fpn_1x_coco + In Collection: Libra R-CNN + Config: configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco.py + Metadata: + Training Memory (GB): 4.6 + inference time (ms/im): + - value: 52.63 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 38.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco/libra_faster_rcnn_r50_fpn_1x_coco_20200130-3afee3a9.pth + + - Name: libra_faster_rcnn_r101_fpn_1x_coco + In Collection: Libra R-CNN + Config: configs/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_coco.py + Metadata: + Training Memory (GB): 6.5 + inference time (ms/im): + - value: 69.44 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_coco/libra_faster_rcnn_r101_fpn_1x_coco_20200203-8dba6a5a.pth + + - Name: libra_faster_rcnn_x101_64x4d_fpn_1x_coco + In Collection: Libra R-CNN + Config: configs/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_coco.py + Metadata: + Training Memory (GB): 10.8 + inference time (ms/im): + - value: 117.65 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_coco/libra_faster_rcnn_x101_64x4d_fpn_1x_coco_20200315-3a7d0488.pth + + - Name: libra_retinanet_r50_fpn_1x_coco + In Collection: Libra R-CNN + Config: configs/libra_rcnn/libra_retinanet_r50_fpn_1x_coco.py + Metadata: + Training Memory (GB): 4.2 + inference time (ms/im): + - value: 56.5 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 37.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_retinanet_r50_fpn_1x_coco/libra_retinanet_r50_fpn_1x_coco_20200205-804d94ce.pth diff --git a/configs/lvis/README.md b/configs/lvis/README.md new file mode 100644 index 0000000..f613de8 --- /dev/null +++ b/configs/lvis/README.md @@ -0,0 +1,44 @@ +# LVIS dataset + +## Introduction + + + +```latex +@inproceedings{gupta2019lvis, + title={{LVIS}: A Dataset for Large Vocabulary Instance Segmentation}, + author={Gupta, Agrim and Dollar, Piotr and Girshick, Ross}, + booktitle={Proceedings of the {IEEE} Conference on Computer Vision and Pattern Recognition}, + year={2019} +} +``` + +## Common Setting + +* Please follow [install guide](../../docs/get_started.md#install-mmdetection) to install open-mmlab forked cocoapi first. +* Run following scripts to install our forked lvis-api. + + ```shell + pip install git+https://github.com/lvis-dataset/lvis-api.git + ``` + +* All experiments use oversample strategy [here](../../docs/tutorials/customize_dataset.md#class-balanced-dataset) with oversample threshold `1e-3`. +* The size of LVIS v0.5 is half of COCO, so schedule `2x` in LVIS is roughly the same iterations as `1x` in COCO. + +## Results and models of LVIS v0.5 + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: |:--------: | +| R-50-FPN | pytorch | 2x | - | - | 26.1 | 25.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis-dbd06831.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis_20200531_160435.log.json) | +| R-101-FPN | pytorch | 2x | - | - | 27.1 | 27.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis-54582ee2.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis_20200601_134748.log.json) | +| X-101-32x4d-FPN | pytorch | 2x | - | - | 26.7 | 26.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis-3cf55ea2.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis_20200531_221749.log.json) | +| X-101-64x4d-FPN | pytorch | 2x | - | - | 26.4 | 26.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis-1c99a5ad.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis_20200601_194651.log.json) | + +## Results and models of LVIS v1 + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | +| R-50-FPN | pytorch | 1x | 9.1 | - | 22.5 | 21.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1-aa78ac3d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1-20200829_061305.log.json) | +| R-101-FPN | pytorch | 1x | 10.8 | - | 24.6 | 23.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1-ec55ce32.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1-20200829_070959.log.json) | +| X-101-32x4d-FPN | pytorch | 1x | 11.8 | - | 26.7 | 25.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1-ebbc5c81.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1-20200829_071317.log.json) | +| X-101-64x4d-FPN | pytorch | 1x | 14.6 | - | 27.2 | 25.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1-43d9edfe.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1-20200830_060206.log.json) | diff --git a/configs/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1.py b/configs/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1.py new file mode 100644 index 0000000..0f017f5 --- /dev/null +++ b/configs/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1.py @@ -0,0 +1,6 @@ +_base_ = './mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py b/configs/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py new file mode 100644 index 0000000..637f4a6 --- /dev/null +++ b/configs/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py @@ -0,0 +1,6 @@ +_base_ = './mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1.py b/configs/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1.py new file mode 100644 index 0000000..92ddb52 --- /dev/null +++ b/configs/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1.py @@ -0,0 +1,31 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/lvis_v1_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + roi_head=dict( + bbox_head=dict(num_classes=1203), mask_head=dict(num_classes=1203)), + test_cfg=dict( + rcnn=dict( + score_thr=0.0001, + # LVIS allows up to 300 + max_per_img=300))) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), + (1333, 768), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +data = dict(train=dict(dataset=dict(pipeline=train_pipeline))) diff --git a/configs/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py b/configs/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py new file mode 100644 index 0000000..d53c5dc --- /dev/null +++ b/configs/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py @@ -0,0 +1,31 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/lvis_v0.5_instance.py', + '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py' +] +model = dict( + roi_head=dict( + bbox_head=dict(num_classes=1230), mask_head=dict(num_classes=1230)), + test_cfg=dict( + rcnn=dict( + score_thr=0.0001, + # LVIS allows up to 300 + max_per_img=300))) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), + (1333, 768), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +data = dict(train=dict(dataset=dict(pipeline=train_pipeline))) diff --git a/configs/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1.py b/configs/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1.py new file mode 100644 index 0000000..a6115c1 --- /dev/null +++ b/configs/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1.py @@ -0,0 +1,14 @@ +_base_ = './mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) diff --git a/configs/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py b/configs/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py new file mode 100644 index 0000000..96b6252 --- /dev/null +++ b/configs/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py @@ -0,0 +1,14 @@ +_base_ = './mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) diff --git a/configs/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1.py b/configs/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1.py new file mode 100644 index 0000000..0f95a73 --- /dev/null +++ b/configs/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1.py @@ -0,0 +1,14 @@ +_base_ = './mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) diff --git a/configs/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py b/configs/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py new file mode 100644 index 0000000..986acda --- /dev/null +++ b/configs/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py @@ -0,0 +1,14 @@ +_base_ = './mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) diff --git a/configs/mask_rcnn/README.md b/configs/mask_rcnn/README.md new file mode 100644 index 0000000..77cfabe --- /dev/null +++ b/configs/mask_rcnn/README.md @@ -0,0 +1,48 @@ +# Mask R-CNN + +## Introduction + + + +```latex +@article{He_2017, + title={Mask R-CNN}, + journal={2017 IEEE International Conference on Computer Vision (ICCV)}, + publisher={IEEE}, + author={He, Kaiming and Gkioxari, Georgia and Dollar, Piotr and Girshick, Ross}, + year={2017}, + month={Oct} +} +``` + +## Results and models + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | +| R-50-FPN | caffe | 1x | 4.3 | | 38.0 | 34.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco/mask_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.38__segm_mAP-0.344_20200504_231812-0ebd1859.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco/mask_rcnn_r50_caffe_fpn_1x_coco_20200504_231812.log.json) | +| R-50-FPN | pytorch | 1x | 4.4 | 16.1 | 38.2 | 34.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205_050542.log.json) | +| R-50-FPN | pytorch | 2x | - | - | 39.2 | 35.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_2x_coco/mask_rcnn_r50_fpn_2x_coco_bbox_mAP-0.392__segm_mAP-0.354_20200505_003907-3e542a40.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_2x_coco/mask_rcnn_r50_fpn_2x_coco_20200505_003907.log.json) | +| R-101-FPN | caffe | 1x | | | 40.4 | 36.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_caffe_fpn_1x_coco/mask_rcnn_r101_caffe_fpn_1x_coco_20200601_095758-805e06c1.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_caffe_fpn_1x_coco/mask_rcnn_r101_caffe_fpn_1x_coco_20200601_095758.log.json)| +| R-101-FPN | pytorch | 1x | 6.4 | 13.5 | 40.0 | 36.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_1x_coco/mask_rcnn_r101_fpn_1x_coco_20200204-1efe0ed5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_1x_coco/mask_rcnn_r101_fpn_1x_coco_20200204_144809.log.json) | +| R-101-FPN | pytorch | 2x | - | - | 40.8 | 36.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r101_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_2x_coco/mask_rcnn_r101_fpn_2x_coco_bbox_mAP-0.408__segm_mAP-0.366_20200505_071027-14b391c7.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_2x_coco/mask_rcnn_r101_fpn_2x_coco_20200505_071027.log.json) | +| X-101-32x4d-FPN | pytorch | 1x | 7.6 | 11.3 | 41.9 | 37.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco/mask_rcnn_x101_32x4d_fpn_1x_coco_20200205-478d0b67.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco/mask_rcnn_x101_32x4d_fpn_1x_coco_20200205_034906.log.json) | +| X-101-32x4d-FPN | pytorch | 2x | - | - | 42.2 | 37.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_2x_coco/mask_rcnn_x101_32x4d_fpn_2x_coco_bbox_mAP-0.422__segm_mAP-0.378_20200506_004702-faef898c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_2x_coco/mask_rcnn_x101_32x4d_fpn_2x_coco_20200506_004702.log.json) | +| X-101-64x4d-FPN | pytorch | 1x | 10.7 | 8.0 | 42.8 | 38.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco/mask_rcnn_x101_64x4d_fpn_1x_coco_20200201-9352eb0d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco/mask_rcnn_x101_64x4d_fpn_1x_coco_20200201_124310.log.json) | +| X-101-64x4d-FPN | pytorch | 2x | - | - | 42.7 | 38.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_2x_coco/mask_rcnn_x101_64x4d_fpn_2x_coco_20200509_224208-39d6f70c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_2x_coco/mask_rcnn_x101_64x4d_fpn_2x_coco_20200509_224208.log.json)| +| X-101-32x8d-FPN | pytorch | 1x | - | - | 42.8 | 38.3 | | + +## Pre-trained Models + +We also train some models with longer schedules and multi-scale training. The users could finetune them for downstream tasks. + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | +| [R-50-FPN](./mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco.py) | caffe | 2x | 4.3 | | 40.3 | 36.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco_bbox_mAP-0.403__segm_mAP-0.365_20200504_231822-a75c98ce.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco_20200504_231822.log.json) +| [R-50-FPN](./mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco.py) | caffe | 3x | 4.3 | | 40.8 | 37.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_20200504_163245.log.json) +| [R-50-FPN](./mask_rcnn_r50_fpn_mstrain-poly_3x_coco.py) | pytorch| 3x | 4.1 | | 40.9 | 37.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_fpn_mstrain-poly_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_fpn_mstrain-poly_3x_coco_20210524_201154-21b550bb.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_fpn_mstrain-poly_3x_coco_20210524_201154.log.json) +| [R-101-FPN](./mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco.py) | caffe | 3x | 5.9 | | 42.9 | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco_20210526_132339-3c33ce02.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco_20210526_132339.log.json) +| [R-101-FPN](./mask_rcnn_r101_fpn_mstrain-poly_3x_coco.py) | pytorch| 3x | 6.1 | | 42.7 | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r101_fpn_mstrain-poly_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_mstrain-poly_3x_coco/mask_rcnn_r101_fpn_mstrain-poly_3x_coco_20210524_200244-5675c317.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_mstrain-poly_3x_coco/mask_rcnn_r101_fpn_mstrain-poly_3x_coco_20210524_200244.log.json) +| [x101-32x4d-FPN](./mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco.py) | pytorch| 3x | 7.3 | | 43.6 | 39.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco/mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco_20210524_201410-abcd7859.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco/mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco_20210524_201410.log.json) +| [X-101-32x8d-FPN](./mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco.py) | pytorch | 1x | - | | 43.6 | 39.0 | +| [X-101-32x8d-FPN](./mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco.py) | pytorch | 3x | 10.3 | | 44.3 | 39.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco_20210607_161042-8bd2c639.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco_20210607_161042.log.json) +| [X-101-64x4d-FPN](./mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco.py) | pytorch | 3x | 10.4 | | 44.5 | 39.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco/mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco_20210526_120447-c376f129.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco/mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco_20210526_120447.log.json) diff --git a/configs/mask_rcnn/mask_rcnn_r101_caffe_fpn_1x_coco.py b/configs/mask_rcnn/mask_rcnn_r101_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..95b324f --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r101_caffe_fpn_1x_coco.py @@ -0,0 +1,7 @@ +_base_ = './mask_rcnn_r50_caffe_fpn_1x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet101_caffe'))) diff --git a/configs/mask_rcnn/mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco.py b/configs/mask_rcnn/mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco.py new file mode 100644 index 0000000..e39781d --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco.py @@ -0,0 +1,55 @@ +_base_ = [ + '../common/mstrain-poly_3x_coco_instance.py', + '../_base_/models/mask_rcnn_r50_fpn.py' +] + +model = dict( + backbone=dict( + depth=101, + norm_cfg=dict(requires_grad=False), + norm_eval=True, + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet101_caffe'))) +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', + with_bbox=True, + with_mask=True, + poly2mask=False), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='range', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +data = dict( + train=dict(dataset=dict(pipeline=train_pipeline)), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py b/configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py new file mode 100644 index 0000000..b7986e8 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = './mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/mask_rcnn/mask_rcnn_r101_fpn_2x_coco.py b/configs/mask_rcnn/mask_rcnn_r101_fpn_2x_coco.py new file mode 100644 index 0000000..c9059d5 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r101_fpn_2x_coco.py @@ -0,0 +1,6 @@ +_base_ = './mask_rcnn_r50_fpn_2x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/mask_rcnn/mask_rcnn_r101_fpn_mstrain-poly_3x_coco.py b/configs/mask_rcnn/mask_rcnn_r101_fpn_mstrain-poly_3x_coco.py new file mode 100644 index 0000000..0696cbe --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r101_fpn_mstrain-poly_3x_coco.py @@ -0,0 +1,10 @@ +_base_ = [ + '../common/mstrain-poly_3x_coco_instance.py', + '../_base_/models/mask_rcnn_r50_fpn.py' +] + +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/mask_rcnn/mask_rcnn_r50_caffe_c4_1x_coco.py b/configs/mask_rcnn/mask_rcnn_r50_caffe_c4_1x_coco.py new file mode 100644 index 0000000..a44c018 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r50_caffe_c4_1x_coco.py @@ -0,0 +1,39 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_caffe_c4.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco.py b/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..5a23f8c --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,40 @@ +_base_ = './mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(requires_grad=False), + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet50_caffe'))) +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py b/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py new file mode 100644 index 0000000..6308e40 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py @@ -0,0 +1,49 @@ +_base_ = './mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(requires_grad=False), + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet50_caffe'))) +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', + with_bbox=True, + with_mask=True, + poly2mask=False), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), + (1333, 768), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco.py b/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco.py new file mode 100644 index 0000000..4f7150c --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 23]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco.py b/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco.py new file mode 100644 index 0000000..1b48a21 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco.py @@ -0,0 +1,4 @@ +_base_ = './mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py' +# learning policy +lr_config = dict(step=[28, 34]) +runner = dict(type='EpochBasedRunner', max_epochs=36) diff --git a/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain_1x_coco.py b/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain_1x_coco.py new file mode 100644 index 0000000..bebbaaa --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain_1x_coco.py @@ -0,0 +1,45 @@ +_base_ = './mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(requires_grad=False), + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet50_caffe'))) +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), + (1333, 768), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_poly_1x_coco_v1.py b/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_poly_1x_coco_v1.py new file mode 100644 index 0000000..3f8079d --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_poly_1x_coco_v1.py @@ -0,0 +1,61 @@ +_base_ = './mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + norm_cfg=dict(requires_grad=False), + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet50_caffe')), + rpn_head=dict( + loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), + roi_head=dict( + bbox_roi_extractor=dict( + roi_layer=dict( + type='RoIAlign', + output_size=7, + sampling_ratio=2, + aligned=False)), + bbox_head=dict( + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), + mask_roi_extractor=dict( + roi_layer=dict( + type='RoIAlign', + output_size=14, + sampling_ratio=2, + aligned=False)))) +# use caffe img_norm +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', + with_bbox=True, + with_mask=True, + poly2mask=False), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py b/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..6a6c924 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] diff --git a/configs/mask_rcnn/mask_rcnn_r50_fpn_2x_coco.py b/configs/mask_rcnn/mask_rcnn_r50_fpn_2x_coco.py new file mode 100644 index 0000000..932b1f9 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r50_fpn_2x_coco.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py' +] diff --git a/configs/mask_rcnn/mask_rcnn_r50_fpn_mstrain-poly_3x_coco.py b/configs/mask_rcnn/mask_rcnn_r50_fpn_mstrain-poly_3x_coco.py new file mode 100644 index 0000000..b3d9242 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r50_fpn_mstrain-poly_3x_coco.py @@ -0,0 +1,4 @@ +_base_ = [ + '../common/mstrain-poly_3x_coco_instance.py', + '../_base_/models/mask_rcnn_r50_fpn.py' +] diff --git a/configs/mask_rcnn/mask_rcnn_r50_fpn_poly_1x_coco.py b/configs/mask_rcnn/mask_rcnn_r50_fpn_poly_1x_coco.py new file mode 100644 index 0000000..9eb6d57 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_r50_fpn_poly_1x_coco.py @@ -0,0 +1,23 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', + with_bbox=True, + with_mask=True, + poly2mask=False), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +data = dict(train=dict(pipeline=train_pipeline)) diff --git a/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py b/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000..a8b3799 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './mask_rcnn_r101_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) diff --git a/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_2x_coco.py b/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_2x_coco.py new file mode 100644 index 0000000..2cd3cee --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_2x_coco.py @@ -0,0 +1,14 @@ +_base_ = './mask_rcnn_r101_fpn_2x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) diff --git a/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco.py b/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco.py new file mode 100644 index 0000000..b698a7d --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco.py @@ -0,0 +1,18 @@ +_base_ = [ + '../common/mstrain-poly_3x_coco_instance.py', + '../_base_/models/mask_rcnn_r50_fpn.py' +] + +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) diff --git a/configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_1x_coco.py b/configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_1x_coco.py new file mode 100644 index 0000000..108ea4e --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_1x_coco.py @@ -0,0 +1,65 @@ +_base_ = './mask_rcnn_r101_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=8, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + style='pytorch', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnext101_32x8d'))) + +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], + std=[57.375, 57.120, 58.395], + to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) diff --git a/configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_1x_coco.py b/configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_1x_coco.py new file mode 100644 index 0000000..6b912f6 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_1x_coco.py @@ -0,0 +1,60 @@ +_base_ = './mask_rcnn_r101_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=8, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + style='pytorch', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnext101_32x8d'))) + +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], + std=[57.375, 57.120, 58.395], + to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', + with_bbox=True, + with_mask=True, + poly2mask=False), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), + (1333, 768), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco.py b/configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco.py new file mode 100644 index 0000000..8ba0e9c --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco.py @@ -0,0 +1,85 @@ +_base_ = [ + '../common/mstrain-poly_3x_coco_instance.py', + '../_base_/models/mask_rcnn_r50_fpn.py' +] + +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=8, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False), + style='pytorch', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnext101_32x8d'))) + +dataset_type = 'CocoDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], + std=[57.375, 57.120, 58.395], + to_rgb=False) + +# In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)], +# multiscale_mode='range' +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadAnnotations', + with_bbox=True, + with_mask=True, + poly2mask=False), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='range', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +# Use RepeatDataset to speed up training +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type='RepeatDataset', + times=3, + dataset=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline)) diff --git a/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco.py b/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000..2333b03 --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './mask_rcnn_x101_32x4d_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) diff --git a/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_2x_coco.py b/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_2x_coco.py new file mode 100644 index 0000000..6074cca --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_2x_coco.py @@ -0,0 +1,14 @@ +_base_ = './mask_rcnn_x101_32x4d_fpn_2x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) diff --git a/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco.py b/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco.py new file mode 100644 index 0000000..9f9cb1c --- /dev/null +++ b/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco.py @@ -0,0 +1,18 @@ +_base_ = [ + '../common/mstrain-poly_3x_coco_instance.py', + '../_base_/models/mask_rcnn_r50_fpn.py' +] + +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) diff --git a/configs/mask_rcnn/metafile.yml b/configs/mask_rcnn/metafile.yml new file mode 100644 index 0000000..747a99a --- /dev/null +++ b/configs/mask_rcnn/metafile.yml @@ -0,0 +1,419 @@ +Collections: + - Name: Mask R-CNN + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - Softmax + - RPN + - Convolution + - Dense Connections + - FPN + - ResNet + - RoIAlign + Paper: + URL: https://arxiv.org/abs/1703.06870v3 + Title: 'Mask R-CNN' + README: configs/mask_rcnn/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/detectors/mask_rcnn.py#L6 + Version: v2.0.0 + +Models: + - Name: mask_rcnn_r50_caffe_fpn_1x_coco + In Collection: Mask R-CNN + Config: configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco.py + Metadata: + Training Memory (GB): 4.3 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 38.0 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 34.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco/mask_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.38__segm_mAP-0.344_20200504_231812-0ebd1859.pth + + - Name: mask_rcnn_r50_fpn_1x_coco + In Collection: Mask R-CNN + Config: configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py + Metadata: + Training Memory (GB): 4.4 + inference time (ms/im): + - value: 62.11 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 38.2 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 34.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth + + - Name: mask_rcnn_r50_fpn_2x_coco + In Collection: Mask R-CNN + Config: configs/mask_rcnn/mask_rcnn_r50_fpn_2x_coco.py + Metadata: + Training Memory (GB): 4.4 + inference time (ms/im): + - value: 62.11 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.2 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 35.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_2x_coco/mask_rcnn_r50_fpn_2x_coco_bbox_mAP-0.392__segm_mAP-0.354_20200505_003907-3e542a40.pth + + - Name: mask_rcnn_r101_caffe_fpn_1x_coco + In Collection: Mask R-CNN + Config: configs/mask_rcnn/mask_rcnn_r101_caffe_fpn_1x_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.4 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 36.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_caffe_fpn_1x_coco/mask_rcnn_r101_caffe_fpn_1x_coco_20200601_095758-805e06c1.pth + + - Name: mask_rcnn_r101_fpn_1x_coco + In Collection: Mask R-CNN + Config: configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py + Metadata: + Training Memory (GB): 6.4 + inference time (ms/im): + - value: 74.07 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.0 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 36.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_1x_coco/mask_rcnn_r101_fpn_1x_coco_20200204-1efe0ed5.pth + + - Name: mask_rcnn_r101_fpn_2x_coco + In Collection: Mask R-CNN + Config: configs/mask_rcnn/mask_rcnn_r101_fpn_2x_coco.py + Metadata: + Training Memory (GB): 6.4 + inference time (ms/im): + - value: 74.07 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.8 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 36.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_2x_coco/mask_rcnn_r101_fpn_2x_coco_bbox_mAP-0.408__segm_mAP-0.366_20200505_071027-14b391c7.pth + + - Name: mask_rcnn_x101_32x4d_fpn_1x_coco + In Collection: Mask R-CNN + Config: configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py + Metadata: + Training Memory (GB): 7.6 + inference time (ms/im): + - value: 88.5 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.9 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco/mask_rcnn_x101_32x4d_fpn_1x_coco_20200205-478d0b67.pth + + - Name: mask_rcnn_x101_32x4d_fpn_2x_coco + In Collection: Mask R-CNN + Config: configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_2x_coco.py + Metadata: + Training Memory (GB): 7.6 + inference time (ms/im): + - value: 88.5 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.2 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_2x_coco/mask_rcnn_x101_32x4d_fpn_2x_coco_bbox_mAP-0.422__segm_mAP-0.378_20200506_004702-faef898c.pth + + - Name: mask_rcnn_x101_64x4d_fpn_1x_coco + In Collection: Mask R-CNN + Config: configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco.py + Metadata: + Training Memory (GB): 10.7 + inference time (ms/im): + - value: 125 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.8 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco/mask_rcnn_x101_64x4d_fpn_1x_coco_20200201-9352eb0d.pth + + - Name: mask_rcnn_x101_64x4d_fpn_2x_coco + In Collection: Mask R-CNN + Config: configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_2x_coco.py + Metadata: + Training Memory (GB): 10.7 + inference time (ms/im): + - value: 125 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.7 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_2x_coco/mask_rcnn_x101_64x4d_fpn_2x_coco_20200509_224208-39d6f70c.pth + + - Name: mask_rcnn_x101_32x8d_fpn_1x_coco + In Collection: Mask R-CNN + Config: configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_1x_coco.py + Metadata: + Training Memory (GB): 10.7 + inference time (ms/im): + - value: 125 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.8 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38.3 + + - Name: mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco + In Collection: Mask R-CNN + Config: configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco.py + Metadata: + Training Memory (GB): 4.3 + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.3 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 36.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco_bbox_mAP-0.403__segm_mAP-0.365_20200504_231822-a75c98ce.pth + + - Name: mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco + In Collection: Mask R-CNN + Config: configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco.py + Metadata: + Training Memory (GB): 4.3 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.8 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth + + - Name: mask_rcnn_r50_fpn_mstrain-poly_3x_coco + In Collection: Mask R-CNN + Config: configs/mask_rcnn/mask_rcnn_r50_fpn_mstrain-poly_3x_coco.py + Metadata: + Training Memory (GB): 4.1 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.9 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_fpn_mstrain-poly_3x_coco_20210524_201154-21b550bb.pth + + - Name: mask_rcnn_r101_fpn_mstrain-poly_3x_coco + In Collection: Mask R-CNN + Config: configs/mask_rcnn/mask_rcnn_r101_fpn_mstrain-poly_3x_coco.py + Metadata: + Training Memory (GB): 6.1 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.7 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_mstrain-poly_3x_coco/mask_rcnn_r101_fpn_mstrain-poly_3x_coco_20210524_200244-5675c317.pth + + - Name: mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco + In Collection: Mask R-CNN + Config: configs/mask_rcnn/mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco.py + Metadata: + Training Memory (GB): 5.9 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.9 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco_20210526_132339-3c33ce02.pth + + - Name: mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco + In Collection: Mask R-CNN + Config: configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco.py + Metadata: + Training Memory (GB): 7.3 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.6 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 39.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco/mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco_20210524_201410-abcd7859.pth + + - Name: mask_rcnn_x101_32x8d_fpn_mstrain-poly_1x_coco + In Collection: Mask R-CNN + Config: configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_1x_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.6 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 39.0 + + - Name: mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco + In Collection: Mask R-CNN + Config: configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco + Metadata: + Training Memory (GB): 10.3 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 44.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco_20210607_161042-8bd2c639.pth + + - Name: mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco + In Collection: Mask R-CNN + Config: configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco.py + Metadata: + Epochs: 36 + Training Memory (GB): 10.4 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 44.5 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 39.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco/mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco_20210526_120447-c376f129.pth diff --git a/configs/ms_rcnn/README.md b/configs/ms_rcnn/README.md new file mode 100644 index 0000000..f36b64c --- /dev/null +++ b/configs/ms_rcnn/README.md @@ -0,0 +1,26 @@ +# Mask Scoring R-CNN + +## Introduction + + + +``` +@inproceedings{huang2019msrcnn, + title={Mask Scoring R-CNN}, + author={Zhaojin Huang and Lichao Huang and Yongchao Gong and Chang Huang and Xinggang Wang}, + booktitle={IEEE Conference on Computer Vision and Pattern Recognition}, + year={2019}, +} +``` + +## Results and Models + +| Backbone | style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +|:-------------:|:----------:|:-------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| +| R-50-FPN | caffe | 1x | 4.5 | | 38.2 | 36.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco/ms_rcnn_r50_caffe_fpn_1x_coco_20200702_180848-61c9355e.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco/ms_rcnn_r50_caffe_fpn_1x_coco_20200702_180848.log.json) | +| R-50-FPN | caffe | 2x | - | - | 38.8 | 36.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r50_caffe_fpn_2x_coco/ms_rcnn_r50_caffe_fpn_2x_coco_bbox_mAP-0.388__segm_mAP-0.363_20200506_004738-ee87b137.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r50_caffe_fpn_2x_coco/ms_rcnn_r50_caffe_fpn_2x_coco_20200506_004738.log.json) | +| R-101-FPN | caffe | 1x | 6.5 | | 40.4 | 37.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco/ms_rcnn_r101_caffe_fpn_1x_coco_bbox_mAP-0.404__segm_mAP-0.376_20200506_004755-b9b12a37.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco/ms_rcnn_r101_caffe_fpn_1x_coco_20200506_004755.log.json) | +| R-101-FPN | caffe | 2x | - | - | 41.1 | 38.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r101_caffe_fpn_2x_coco/ms_rcnn_r101_caffe_fpn_2x_coco_bbox_mAP-0.411__segm_mAP-0.381_20200506_011134-5f3cc74f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r101_caffe_fpn_2x_coco/ms_rcnn_r101_caffe_fpn_2x_coco_20200506_011134.log.json) | +| R-X101-32x4d | pytorch | 2x | 7.9 | 11.0 | 41.8 | 38.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_coco/ms_rcnn_x101_32x4d_fpn_1x_coco_20200206-81fd1740.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_coco/ms_rcnn_x101_32x4d_fpn_1x_coco_20200206_100113.log.json) | +| R-X101-64x4d | pytorch | 1x | 11.0 | 8.0 | 43.0 | 39.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco/ms_rcnn_x101_64x4d_fpn_1x_coco_20200206-86ba88d2.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco/ms_rcnn_x101_64x4d_fpn_1x_coco_20200206_091744.log.json) | +| R-X101-64x4d | pytorch | 2x | 11.0 | 8.0 | 42.6 | 39.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_64x4d_fpn_2x_coco/ms_rcnn_x101_64x4d_fpn_2x_coco_20200308-02a445e2.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_64x4d_fpn_2x_coco/ms_rcnn_x101_64x4d_fpn_2x_coco_20200308_012247.log.json) | diff --git a/configs/ms_rcnn/metafile.yml b/configs/ms_rcnn/metafile.yml new file mode 100644 index 0000000..a6c7dc5 --- /dev/null +++ b/configs/ms_rcnn/metafile.yml @@ -0,0 +1,159 @@ +Collections: + - Name: Mask Scoring R-CNN + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RPN + - FPN + - ResNet + - RoIAlign + Paper: + URL: https://arxiv.org/abs/1903.00241 + Title: 'Mask Scoring R-CNN' + README: configs/ms_rcnn/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/detectors/mask_scoring_rcnn.py#L6 + Version: v2.0.0 + +Models: + - Name: ms_rcnn_r50_caffe_fpn_1x_coco + In Collection: Mask Scoring R-CNN + Config: configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco.py + Metadata: + Training Memory (GB): 4.5 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 38.2 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 36.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco/ms_rcnn_r50_caffe_fpn_1x_coco_20200702_180848-61c9355e.pth + + - Name: ms_rcnn_r50_caffe_fpn_2x_coco + In Collection: Mask Scoring R-CNN + Config: configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_2x_coco.py + Metadata: + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 38.8 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 36.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r50_caffe_fpn_2x_coco/ms_rcnn_r50_caffe_fpn_2x_coco_bbox_mAP-0.388__segm_mAP-0.363_20200506_004738-ee87b137.pth + + - Name: ms_rcnn_r101_caffe_fpn_1x_coco + In Collection: Mask Scoring R-CNN + Config: configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco.py + Metadata: + Training Memory (GB): 6.5 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.4 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco/ms_rcnn_r101_caffe_fpn_1x_coco_bbox_mAP-0.404__segm_mAP-0.376_20200506_004755-b9b12a37.pth + + - Name: ms_rcnn_r101_caffe_fpn_2x_coco + In Collection: Mask Scoring R-CNN + Config: configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_2x_coco.py + Metadata: + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.1 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r101_caffe_fpn_2x_coco/ms_rcnn_r101_caffe_fpn_2x_coco_bbox_mAP-0.411__segm_mAP-0.381_20200506_011134-5f3cc74f.pth + + - Name: ms_rcnn_x101_32x4d_fpn_1x_coco + In Collection: Mask Scoring R-CNN + Config: configs/ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_coco.py + Metadata: + Training Memory (GB): 7.9 + inference time (ms/im): + - value: 90.91 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.8 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_coco/ms_rcnn_x101_32x4d_fpn_1x_coco_20200206-81fd1740.pth + + - Name: ms_rcnn_x101_64x4d_fpn_1x_coco + In Collection: Mask Scoring R-CNN + Config: configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco.py + Metadata: + Training Memory (GB): 11.0 + inference time (ms/im): + - value: 125 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.0 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 39.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco/ms_rcnn_x101_64x4d_fpn_1x_coco_20200206-86ba88d2.pth + + - Name: ms_rcnn_x101_64x4d_fpn_2x_coco + In Collection: Mask Scoring R-CNN + Config: configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_2x_coco.py + Metadata: + Training Memory (GB): 11.0 + inference time (ms/im): + - value: 125 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.6 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 39.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_64x4d_fpn_2x_coco/ms_rcnn_x101_64x4d_fpn_2x_coco_20200308-02a445e2.pth diff --git a/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco.py b/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..9b7dcbb --- /dev/null +++ b/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco.py @@ -0,0 +1,7 @@ +_base_ = './ms_rcnn_r50_caffe_fpn_1x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet101_caffe'))) diff --git a/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_2x_coco.py b/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_2x_coco.py new file mode 100644 index 0000000..202bcce --- /dev/null +++ b/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './ms_rcnn_r101_caffe_fpn_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco.py b/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco.py new file mode 100644 index 0000000..5845125 --- /dev/null +++ b/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco.py @@ -0,0 +1,16 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco.py' +model = dict( + type='MaskScoringRCNN', + roi_head=dict( + type='MaskScoringRoIHead', + mask_iou_head=dict( + type='MaskIoUHead', + num_convs=4, + num_fcs=2, + roi_feat_size=14, + in_channels=256, + conv_out_channels=256, + fc_out_channels=1024, + num_classes=80)), + # model training and testing settings + train_cfg=dict(rcnn=dict(mask_thr_binary=0.5))) diff --git a/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_2x_coco.py b/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_2x_coco.py new file mode 100644 index 0000000..008a70a --- /dev/null +++ b/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './ms_rcnn_r50_caffe_fpn_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/ms_rcnn/ms_rcnn_r50_fpn_1x_coco.py b/configs/ms_rcnn/ms_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..0a163ce --- /dev/null +++ b/configs/ms_rcnn/ms_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,16 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' +model = dict( + type='MaskScoringRCNN', + roi_head=dict( + type='MaskScoringRoIHead', + mask_iou_head=dict( + type='MaskIoUHead', + num_convs=4, + num_fcs=2, + roi_feat_size=14, + in_channels=256, + conv_out_channels=256, + fc_out_channels=1024, + num_classes=80)), + # model training and testing settings + train_cfg=dict(rcnn=dict(mask_thr_binary=0.5))) diff --git a/configs/ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_coco.py b/configs/ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000..20479bb --- /dev/null +++ b/configs/ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './ms_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=32, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) diff --git a/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco.py b/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco.py new file mode 100644 index 0000000..ee5b734 --- /dev/null +++ b/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = './ms_rcnn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='ResNeXt', + depth=101, + groups=64, + base_width=4, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) diff --git a/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_2x_coco.py b/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_2x_coco.py new file mode 100644 index 0000000..54c605b --- /dev/null +++ b/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_2x_coco.py @@ -0,0 +1,4 @@ +_base_ = './ms_rcnn_x101_64x4d_fpn_1x_coco.py' +# learning policy +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/nas_fcos/README.md b/configs/nas_fcos/README.md new file mode 100644 index 0000000..21a2b22 --- /dev/null +++ b/configs/nas_fcos/README.md @@ -0,0 +1,25 @@ +# NAS-FCOS: Fast Neural Architecture Search for Object Detection + +## Introduction + + + +```latex +@article{wang2019fcos, + title={Nas-fcos: Fast neural architecture search for object detection}, + author={Wang, Ning and Gao, Yang and Chen, Hao and Wang, Peng and Tian, Zhi and Shen, Chunhua}, + journal={arXiv preprint arXiv:1906.04423}, + year={2019} +} +``` + +## Results and Models + +| Head | Backbone | Style | GN-head | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:---------:|:---------:|:-------:|:-------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| +| NAS-FCOSHead | R-50 | caffe | Y | 1x | | | 39.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco_20200520-1bdba3ce.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco_20200520.log.json) | +| FCOSHead | R-50 | caffe | Y | 1x | | | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/nas_fcos/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/nas_fcos/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco_20200521-7fdcbce0.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/nas_fcos/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco_20200521.log.json) | + +**Notes:** + +- To be consistent with the author's implementation, we use 4 GPUs with 4 images/GPU. diff --git a/configs/nas_fcos/metafile.yml b/configs/nas_fcos/metafile.yml new file mode 100644 index 0000000..1ea28cf --- /dev/null +++ b/configs/nas_fcos/metafile.yml @@ -0,0 +1,44 @@ +Collections: + - Name: NAS-FCOS + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 4x V100 GPUs + Architecture: + - FPN + - NAS-FCOS + - ResNet + Paper: + URL: https://arxiv.org/abs/1906.04423 + Title: 'NAS-FCOS: Fast Neural Architecture Search for Object Detection' + README: configs/nas_fcos/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/detectors/nasfcos.py#L6 + Version: v2.1.0 + +Models: + - Name: nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco + In Collection: NAS-FCOS + Config: configs/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco_20200520-1bdba3ce.pth + + - Name: nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco + In Collection: NAS-FCOS + Config: configs/nas_fcos/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 38.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/nas_fcos/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco_20200521-7fdcbce0.pth diff --git a/configs/nas_fcos/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco.py b/configs/nas_fcos/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco.py new file mode 100644 index 0000000..a455c92 --- /dev/null +++ b/configs/nas_fcos/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco.py @@ -0,0 +1,100 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +model = dict( + type='NASFCOS', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False, eps=0), + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet50_caffe')), + neck=dict( + type='NASFCOS_FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs=True, + num_outs=5, + norm_cfg=dict(type='BN'), + conv_cfg=dict(type='DCNv2', deform_groups=2)), + bbox_head=dict( + type='FCOSHead', + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + strides=[8, 16, 32, 64, 128], + norm_cfg=dict(type='GN', num_groups=32), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='IoULoss', loss_weight=1.0), + loss_centerness=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), + train_cfg=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0, + ignore_iof_thr=-1), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.6), + max_per_img=100)) + +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=2, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) + +optimizer = dict( + lr=0.01, paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.)) diff --git a/configs/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco.py b/configs/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco.py new file mode 100644 index 0000000..b779492 --- /dev/null +++ b/configs/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco.py @@ -0,0 +1,99 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] + +model = dict( + type='NASFCOS', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=False, eps=0), + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet50_caffe')), + neck=dict( + type='NASFCOS_FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs=True, + num_outs=5, + norm_cfg=dict(type='BN'), + conv_cfg=dict(type='DCNv2', deform_groups=2)), + bbox_head=dict( + type='NASFCOSHead', + num_classes=80, + in_channels=256, + feat_channels=256, + strides=[8, 16, 32, 64, 128], + norm_cfg=dict(type='GN', num_groups=32), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='IoULoss', loss_weight=1.0), + loss_centerness=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), + train_cfg=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.4, + min_pos_iou=0, + ignore_iof_thr=-1), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.6), + max_per_img=100)) + +img_norm_cfg = dict( + mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=2, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) + +optimizer = dict( + lr=0.01, paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.)) diff --git a/configs/nas_fpn/README.md b/configs/nas_fpn/README.md new file mode 100644 index 0000000..9862b9a --- /dev/null +++ b/configs/nas_fpn/README.md @@ -0,0 +1,26 @@ +# NAS-FPN: Learning Scalable Feature Pyramid Architecture for Object Detection + +## Introduction + + + +```latex +@inproceedings{ghiasi2019fpn, + title={Nas-fpn: Learning scalable feature pyramid architecture for object detection}, + author={Ghiasi, Golnaz and Lin, Tsung-Yi and Le, Quoc V}, + booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, + pages={7036--7045}, + year={2019} +} +``` + +## Results and Models + +We benchmark the new training schedule (crop training, large batch, unfrozen BN, 50 epochs) introduced in NAS-FPN. RetinaNet is used in the paper. + +| Backbone | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:-----------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| +| R-50-FPN | 50e | 12.9 | 22.9 | 37.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/nas_fpn/retinanet_r50_fpn_crop640_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/nas_fpn/retinanet_r50_fpn_crop640_50e_coco/retinanet_r50_fpn_crop640_50e_coco-9b953d76.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/nas_fpn/retinanet_r50_fpn_crop640_50e_coco/retinanet_r50_fpn_crop640_50e_coco_20200529_095329.log.json) | +| R-50-NASFPN | 50e | 13.2 | 23.0 | 40.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco/retinanet_r50_nasfpn_crop640_50e_coco-0ad1f644.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco/retinanet_r50_nasfpn_crop640_50e_coco_20200528_230008.log.json) | + +**Note**: We find that it is unstable to train NAS-FPN and there is a small chance that results can be 3% mAP lower. diff --git a/configs/nas_fpn/metafile.yml b/configs/nas_fpn/metafile.yml new file mode 100644 index 0000000..ab8d649 --- /dev/null +++ b/configs/nas_fpn/metafile.yml @@ -0,0 +1,59 @@ +Collections: + - Name: NAS-FPN + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - NAS-FPN + - ResNet + Paper: + URL: https://arxiv.org/abs/1904.07392 + Title: 'NAS-FPN: Learning Scalable Feature Pyramid Architecture for Object Detection' + README: configs/nas_fpn/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/necks/nas_fpn.py#L67 + Version: v2.0.0 + +Models: + - Name: retinanet_r50_fpn_crop640_50e_coco + In Collection: NAS-FPN + Config: configs/nas_fpn/retinanet_r50_fpn_crop640_50e_coco.py + Metadata: + Training Memory (GB): 12.9 + inference time (ms/im): + - value: 43.67 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 50 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 37.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/nas_fpn/retinanet_r50_fpn_crop640_50e_coco/retinanet_r50_fpn_crop640_50e_coco-9b953d76.pth + + - Name: retinanet_r50_nasfpn_crop640_50e_coco + In Collection: NAS-FPN + Config: configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py + Metadata: + Training Memory (GB): 13.2 + inference time (ms/im): + - value: 43.48 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 50 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco/retinanet_r50_nasfpn_crop640_50e_coco-0ad1f644.pth diff --git a/configs/nas_fpn/retinanet_r50_fpn_crop640_50e_coco.py b/configs/nas_fpn/retinanet_r50_fpn_crop640_50e_coco.py new file mode 100644 index 0000000..6ea44a0 --- /dev/null +++ b/configs/nas_fpn/retinanet_r50_fpn_crop640_50e_coco.py @@ -0,0 +1,80 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', + '../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py' +] +cudnn_benchmark = True +norm_cfg = dict(type='BN', requires_grad=True) +model = dict( + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict( + relu_before_extra_convs=True, + no_norm_on_lateral=True, + norm_cfg=norm_cfg), + bbox_head=dict(type='RetinaSepBNHead', num_ins=5, norm_cfg=norm_cfg), + # training and testing settings + train_cfg=dict(assigner=dict(neg_iou_thr=0.5))) +# dataset settings +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=(640, 640), + ratio_range=(0.8, 1.2), + keep_ratio=True), + dict(type='RandomCrop', crop_size=(640, 640)), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=(640, 640)), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(640, 640), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=64), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=8, + workers_per_gpu=4, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict( + type='SGD', + lr=0.08, + momentum=0.9, + weight_decay=0.0001, + paramwise_cfg=dict(norm_decay_mult=0, bypass_duplicate=True)) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=1000, + warmup_ratio=0.1, + step=[30, 40]) +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=50) diff --git a/configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py b/configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py new file mode 100644 index 0000000..3e03919 --- /dev/null +++ b/configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py @@ -0,0 +1,79 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', + '../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py' +] +cudnn_benchmark = True +# model settings +norm_cfg = dict(type='BN', requires_grad=True) +model = dict( + type='RetinaNet', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict(type='NASFPN', stack_times=7, norm_cfg=norm_cfg), + bbox_head=dict(type='RetinaSepBNHead', num_ins=5, norm_cfg=norm_cfg), + # training and testing settings + train_cfg=dict(assigner=dict(neg_iou_thr=0.5))) +# dataset settings +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=(640, 640), + ratio_range=(0.8, 1.2), + keep_ratio=True), + dict(type='RandomCrop', crop_size=(640, 640)), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=(640, 640)), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(640, 640), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=128), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=8, + workers_per_gpu=4, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict( + type='SGD', + lr=0.08, + momentum=0.9, + weight_decay=0.0001, + paramwise_cfg=dict(norm_decay_mult=0, bypass_duplicate=True)) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=1000, + warmup_ratio=0.1, + step=[30, 40]) +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=50) diff --git a/configs/paa/README.md b/configs/paa/README.md new file mode 100644 index 0000000..3e9b6e3 --- /dev/null +++ b/configs/paa/README.md @@ -0,0 +1,35 @@ +# Probabilistic Anchor Assignment with IoU Prediction for Object Detection + + + +```latex +@inproceedings{paa-eccv2020, + title={Probabilistic Anchor Assignment with IoU Prediction for Object Detection}, + author={Kim, Kang and Lee, Hee Seok}, + booktitle = {ECCV}, + year={2020} +} +``` + +## Results and Models + +We provide config files to reproduce the object detection results in the +ECCV 2020 paper for Probabilistic Anchor Assignment with IoU +Prediction for Object Detection. + +| Backbone | Lr schd | Mem (GB) | Score voting | box AP | Config | Download | +|:-----------:|:-------:|:--------:|:------------:|:------:|:------:|:--------:| +| R-50-FPN | 12e | 3.7 | True | 40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/paa/paa_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_1x_coco/paa_r50_fpn_1x_coco_20200821-936edec3.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_1x_coco/paa_r50_fpn_1x_coco_20200821-936edec3.log.json) | +| R-50-FPN | 12e | 3.7 | False | 40.2 | - | +| R-50-FPN | 18e | 3.7 | True | 41.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/paa/paa_r50_fpn_1.5x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_1.5x_coco/paa_r50_fpn_1.5x_coco_20200823-805d6078.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_1.5x_coco/paa_r50_fpn_1.5x_coco_20200823-805d6078.log.json) | +| R-50-FPN | 18e | 3.7 | False | 41.2 | - | +| R-50-FPN | 24e | 3.7 | True | 41.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/paa/paa_r50_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_2x_coco/paa_r50_fpn_2x_coco_20200821-c98bfc4e.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_2x_coco/paa_r50_fpn_2x_coco_20200821-c98bfc4e.log.json) | +| R-50-FPN | 36e | 3.7 | True | 43.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/paa/paa_r50_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_mstrain_3x_coco/paa_r50_fpn_mstrain_3x_coco_20210121_145722-06a6880b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_mstrain_3x_coco/paa_r50_fpn_mstrain_3x_coco_20210121_145722.log.json) | +| R-101-FPN | 12e | 6.2 | True | 42.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/paa/paa_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_1x_coco/paa_r101_fpn_1x_coco_20200821-0a1825a4.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_1x_coco/paa_r101_fpn_1x_coco_20200821-0a1825a4.log.json) | +| R-101-FPN | 12e | 6.2 | False | 42.4 | - | +| R-101-FPN | 24e | 6.2 | True | 43.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/paa/paa_r101_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_2x_coco/paa_r101_fpn_2x_coco_20200821-6829f96b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_2x_coco/paa_r101_fpn_2x_coco_20200821-6829f96b.log.json) | +| R-101-FPN | 36e | 6.2 | True | 45.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/paa/paa_r101_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_mstrain_3x_coco/paa_r101_fpn_mstrain_3x_coco_20210122_084202-83250d22.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_mstrain_3x_coco/paa_r101_fpn_mstrain_3x_coco_20210122_084202.log.json) | + +**Note**: + +1. We find that the performance is unstable with 1x setting and may fluctuate by about 0.2 mAP. We report the best results. diff --git a/configs/paa/metafile.yml b/configs/paa/metafile.yml new file mode 100644 index 0000000..e08b663 --- /dev/null +++ b/configs/paa/metafile.yml @@ -0,0 +1,104 @@ +Collections: + - Name: PAA + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - FPN + - Probabilistic Anchor Assignment + - ResNet + Paper: + URL: https://arxiv.org/abs/2007.08103 + Title: 'Probabilistic Anchor Assignment with IoU Prediction for Object Detection' + README: configs/paa/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.4.0/mmdet/models/detectors/paa.py#L6 + Version: v2.4.0 + +Models: + - Name: paa_r50_fpn_1x_coco + In Collection: PAA + Config: configs/paa/paa_r50_fpn_1x_coco.py + Metadata: + Training Memory (GB): 3.7 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_1x_coco/paa_r50_fpn_1x_coco_20200821-936edec3.pth + + - Name: paa_r50_fpn_1.5x_coco + In Collection: PAA + Config: configs/paa/paa_r50_fpn_1.5x_coco.py + Metadata: + Training Memory (GB): 3.7 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_1.5x_coco/paa_r50_fpn_1.5x_coco_20200823-805d6078.pth + + - Name: paa_r50_fpn_2x_coco + In Collection: PAA + Config: configs/paa/paa_r50_fpn_2x_coco.py + Metadata: + Training Memory (GB): 3.7 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_2x_coco/paa_r50_fpn_2x_coco_20200821-c98bfc4e.pth + + - Name: paa_r50_fpn_mstrain_3x_coco + In Collection: PAA + Config: configs/paa/paa_r50_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 3.7 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_mstrain_3x_coco/paa_r50_fpn_mstrain_3x_coco_20210121_145722-06a6880b.pth + + - Name: paa_r101_fpn_1x_coco + In Collection: PAA + Config: configs/paa/paa_r101_fpn_1x_coco.py + Metadata: + Training Memory (GB): 6.2 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_1x_coco/paa_r101_fpn_1x_coco_20200821-0a1825a4.pth + + - Name: paa_r101_fpn_2x_coco + In Collection: PAA + Config: configs/paa/paa_r101_fpn_2x_coco.py + Metadata: + Training Memory (GB): 6.2 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_2x_coco/paa_r101_fpn_2x_coco_20200821-6829f96b.pth + + - Name: paa_r101_fpn_mstrain_3x_coco + In Collection: PAA + Config: configs/paa/paa_r101_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 6.2 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 45.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_mstrain_3x_coco/paa_r101_fpn_mstrain_3x_coco_20210122_084202-83250d22.pth diff --git a/configs/paa/paa_r101_fpn_1x_coco.py b/configs/paa/paa_r101_fpn_1x_coco.py new file mode 100644 index 0000000..94f1c27 --- /dev/null +++ b/configs/paa/paa_r101_fpn_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = './paa_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/paa/paa_r101_fpn_2x_coco.py b/configs/paa/paa_r101_fpn_2x_coco.py new file mode 100644 index 0000000..641ef76 --- /dev/null +++ b/configs/paa/paa_r101_fpn_2x_coco.py @@ -0,0 +1,3 @@ +_base_ = './paa_r101_fpn_1x_coco.py' +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/paa/paa_r101_fpn_mstrain_3x_coco.py b/configs/paa/paa_r101_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..71858ed --- /dev/null +++ b/configs/paa/paa_r101_fpn_mstrain_3x_coco.py @@ -0,0 +1,6 @@ +_base_ = './paa_r50_fpn_mstrain_3x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/paa/paa_r50_fpn_1.5x_coco.py b/configs/paa/paa_r50_fpn_1.5x_coco.py new file mode 100644 index 0000000..aabce4a --- /dev/null +++ b/configs/paa/paa_r50_fpn_1.5x_coco.py @@ -0,0 +1,3 @@ +_base_ = './paa_r50_fpn_1x_coco.py' +lr_config = dict(step=[12, 16]) +runner = dict(type='EpochBasedRunner', max_epochs=18) diff --git a/configs/paa/paa_r50_fpn_1x_coco.py b/configs/paa/paa_r50_fpn_1x_coco.py new file mode 100644 index 0000000..4c9c4aa --- /dev/null +++ b/configs/paa/paa_r50_fpn_1x_coco.py @@ -0,0 +1,70 @@ +_base_ = [ + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + type='PAA', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=1, + add_extra_convs='on_output', + num_outs=5), + bbox_head=dict( + type='PAAHead', + reg_decoded_bbox=True, + score_voting=True, + topk=9, + num_classes=80, + in_channels=256, + stacked_convs=4, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + ratios=[1.0], + octave_base_scale=8, + scales_per_octave=1, + strides=[8, 16, 32, 64, 128]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[0.1, 0.1, 0.2, 0.2]), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='GIoULoss', loss_weight=1.3), + loss_centerness=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.5)), + # training and testing settings + train_cfg=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.1, + neg_iou_thr=0.1, + min_pos_iou=0, + ignore_iof_thr=-1), + allowed_border=-1, + pos_weight=-1, + debug=False), + test_cfg=dict( + nms_pre=1000, + min_bbox_size=0, + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.6), + max_per_img=100)) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) diff --git a/configs/paa/paa_r50_fpn_2x_coco.py b/configs/paa/paa_r50_fpn_2x_coco.py new file mode 100644 index 0000000..663d2c0 --- /dev/null +++ b/configs/paa/paa_r50_fpn_2x_coco.py @@ -0,0 +1,3 @@ +_base_ = './paa_r50_fpn_1x_coco.py' +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/paa/paa_r50_fpn_mstrain_3x_coco.py b/configs/paa/paa_r50_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..91fa28c --- /dev/null +++ b/configs/paa/paa_r50_fpn_mstrain_3x_coco.py @@ -0,0 +1,20 @@ +_base_ = './paa_r50_fpn_1x_coco.py' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='range', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +data = dict(train=dict(pipeline=train_pipeline)) +lr_config = dict(step=[28, 34]) +runner = dict(type='EpochBasedRunner', max_epochs=36) diff --git a/configs/pafpn/README.md b/configs/pafpn/README.md new file mode 100644 index 0000000..9602c6d --- /dev/null +++ b/configs/pafpn/README.md @@ -0,0 +1,26 @@ +# Path Aggregation Network for Instance Segmentation + +## Introduction + + + +``` +@inproceedings{liu2018path, + author = {Shu Liu and + Lu Qi and + Haifang Qin and + Jianping Shi and + Jiaya Jia}, + title = {Path Aggregation Network for Instance Segmentation}, + booktitle = {Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year = {2018} +} +``` + +## Results and Models + +## Results and Models + +| Backbone | style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +|:-------------:|:----------:|:-------:|:--------:|:--------------:|:------:|:-------:|:------:|:--------:| +| R-50-FPN | pytorch | 1x | 4.0 | 17.2 | 37.5 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pafpn/faster_rcnn_r50_pafpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pafpn/faster_rcnn_r50_pafpn_1x_coco/faster_rcnn_r50_pafpn_1x_coco_bbox_mAP-0.375_20200503_105836-b7b4b9bd.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pafpn/faster_rcnn_r50_pafpn_1x_coco/faster_rcnn_r50_pafpn_1x_coco_20200503_105836.log.json) | diff --git a/configs/pafpn/faster_rcnn_r50_pafpn_1x_coco.py b/configs/pafpn/faster_rcnn_r50_pafpn_1x_coco.py new file mode 100644 index 0000000..b2fdef9 --- /dev/null +++ b/configs/pafpn/faster_rcnn_r50_pafpn_1x_coco.py @@ -0,0 +1,8 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' + +model = dict( + neck=dict( + type='PAFPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5)) diff --git a/configs/pafpn/metafile.yml b/configs/pafpn/metafile.yml new file mode 100644 index 0000000..f9cf97c --- /dev/null +++ b/configs/pafpn/metafile.yml @@ -0,0 +1,38 @@ +Collections: + - Name: PAFPN + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - PAFPN + Paper: + URL: https://arxiv.org/abs/1803.01534 + Title: 'Path Aggregation Network for Instance Segmentation' + README: configs/pafpn/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/necks/pafpn.py#L11 + Version: v2.0.0 + +Models: + - Name: faster_rcnn_r50_pafpn_1x_coco + In Collection: PAFPN + Config: configs/pafpn/faster_rcnn_r50_pafpn_1x_coco.py + Metadata: + Training Memory (GB): 4.0 + inference time (ms/im): + - value: 58.14 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (800, 1333) + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 37.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/pafpn/faster_rcnn_r50_pafpn_1x_coco/faster_rcnn_r50_pafpn_1x_coco_bbox_mAP-0.375_20200503_105836-b7b4b9bd.pth diff --git a/configs/panoptic_fpn/README.md b/configs/panoptic_fpn/README.md new file mode 100644 index 0000000..d209183 --- /dev/null +++ b/configs/panoptic_fpn/README.md @@ -0,0 +1,50 @@ +# Panoptic feature pyramid networks +## Introduction + + +The base method for panoptic segmentation task. + +``` +@inproceedings{kirillov2018panopticfpn, + author = { + Alexander Kirillov, + Ross Girshick, + Kaiming He, + Piotr Dollar, + }, + title = {Panoptic Feature Pyramid Networks}, + booktitle = {Proceedings of IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year = {2019} +} +``` + +## Dataset + +PanopticFPN requires COCO and [COCO-panoptic](http://images.cocodataset.org/annotations/panoptic_annotations_trainval2017.zip) dataset for training and evaluation. You need to download and extract it in the COCO dataset path. +The directory should be like this. + +```none +mmdetection +├── mmdet +├── tools +├── configs +├── data +│ ├── coco +│ │ ├── annotations +│ │ │ ├── panoptic_train2017.json +│ │ │ ├── panoptic_train2017 +│ │ │ ├── panoptic_val2017.json +│ │ │ ├── panoptic_val2017 +│ │ ├── train2017 +│ │ ├── val2017 +│ │ ├── test2017 +``` + +## Results and Models + +| Backbone | style | Lr schd | Mem (GB) | Inf time (fps) | PQ | SQ | RQ | PQ_th | SQ_th | RQ_th | PQ_st | SQ_st | RQ_st | Config | Download | +|:-------------:|:----------:|:-------:|:--------:|:--------------:|:----:|:----:|:----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:------:|:--------:| +| R-50-FPN | pytorch | 1x | 4.7 | | 40.2 | 77.8 | 49.3 | 47.8 | 80.9 | 57.5 | 28.9 | 73.1 | 37.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco/panoptic_fpn_r50_fpn_1x_coco_20210821_101153-9668fd13.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco/panoptic_fpn_r50_fpn_1x_coco_20210821_101153.log.json) | +| R-50-FPN | pytorch | 3x | - | - | 42.5 | 78.1 | 51.7 | 50.3 | 81.5 | 60.3 | 30.7 | 73.0 | 38.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/panoptic_fpn/panoptic_fpn_r50_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r50_fpn_mstrain_3x_coco/panoptic_fpn_r50_fpn_mstrain_3x_coco_20210824_171155-5650f98b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r50_fpn_mstrain_3x_coco/panoptic_fpn_r50_fpn_mstrain_3x_coco_20210824_171155.log.json) | +| R-101-FPN | pytorch | 1x | 6.7 | | 42.2 | 78.3 | 51.4 | 50.1 | 81.4 | 59.9 | 30.3 | 73.6 | 38.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/panoptic_fpn/panoptic_fpn_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r101_fpn_1x_coco/panoptic_fpn_r101_fpn_1x_coco_20210820_193950-ab9157a2.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r101_fpn_1x_coco/panoptic_fpn_r101_fpn_1x_coco_20210820_193950.log.json) | +| R-101-FPN | pytorch | 3x | - | - | 44.1 | 78.9 | 53.6 | 52.1 | 81.7 | 62.3 | 32.0 | 74.6 | 40.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/panoptic_fpn/panoptic_fpn_r101_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r101_fpn_mstrain_3x_coco/panoptic_fpn_r101_fpn_mstrain_3x_coco_20210823_114712-9c99acc4.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r101_fpn_mstrain_3x_coco/panoptic_fpn_r101_fpn_mstrain_3x_coco_20210823_114712.log.json) | diff --git a/configs/panoptic_fpn/metafile.yml b/configs/panoptic_fpn/metafile.yml new file mode 100644 index 0000000..8c9d39d --- /dev/null +++ b/configs/panoptic_fpn/metafile.yml @@ -0,0 +1,70 @@ +Collections: + - Name: PanopticFPN + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - PanopticFPN + Paper: + URL: https://arxiv.org/pdf/1901.02446 + Title: 'Panoptic feature pyramid networks' + README: configs/panoptic_fpn/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.16.0/mmdet/models/detectors/panoptic_fpn.py#L7 + Version: v2.16.0 + +Models: + - Name: panoptic_fpn_r50_fpn_1x_coco + In Collection: PanopticFPN + Config: configs/panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco.py + Metadata: + Training Memory (GB): 4.6 + Epochs: 12 + Results: + - Task: Panoptic Segmentation + Dataset: COCO + Metrics: + PQ: 40.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco/panoptic_fpn_r50_fpn_1x_coco_20210821_101153-9668fd13.pth + + - Name: panoptic_fpn_r50_fpn_mstrain_3x_coco + In Collection: PanopticFPN + Config: configs/panoptic_fpn/panoptic_fpn_r50_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 4.6 + Epochs: 36 + Results: + - Task: Panoptic Segmentation + Dataset: COCO + Metrics: + PQ: 42.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r50_fpn_mstrain_3x_coco/panoptic_fpn_r50_fpn_mstrain_3x_coco_20210824_171155-5650f98b.pth + + - Name: panoptic_fpn_r101_fpn_1x_coco + In Collection: PanopticFPN + Config: configs/panoptic_fpn/panoptic_fpn_r101_fpn_1x_coco.py + Metadata: + Training Memory (GB): 6.5 + Epochs: 12 + Results: + - Task: Panoptic Segmentation + Dataset: COCO + Metrics: + PQ: 42.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r101_fpn_1x_coco/panoptic_fpn_r101_fpn_1x_coco_20210820_193950-ab9157a2.pth + + - Name: panoptic_fpn_r101_fpn_mstrain_3x_coco + In Collection: PanopticFPN + Config: configs/panoptic_fpn/panoptic_fpn_r101_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 6.5 + Epochs: 36 + Results: + - Task: Panoptic Segmentation + Dataset: COCO + Metrics: + PQ: 44.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r101_fpn_mstrain_3x_coco/panoptic_fpn_r101_fpn_mstrain_3x_coco_20210823_114712-9c99acc4.pth diff --git a/configs/panoptic_fpn/panoptic_fpn_r101_fpn_1x_coco.py b/configs/panoptic_fpn/panoptic_fpn_r101_fpn_1x_coco.py new file mode 100644 index 0000000..78b8079 --- /dev/null +++ b/configs/panoptic_fpn/panoptic_fpn_r101_fpn_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = './panoptic_fpn_r50_fpn_1x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/panoptic_fpn/panoptic_fpn_r101_fpn_mstrain_3x_coco.py b/configs/panoptic_fpn/panoptic_fpn_r101_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..057e481 --- /dev/null +++ b/configs/panoptic_fpn/panoptic_fpn_r101_fpn_mstrain_3x_coco.py @@ -0,0 +1,6 @@ +_base_ = './panoptic_fpn_r50_fpn_mstrain_3x_coco.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco.py b/configs/panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..2995524 --- /dev/null +++ b/configs/panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco.py @@ -0,0 +1,33 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_panoptic.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + type='PanopticFPN', + semantic_head=dict( + type='PanopticFPNHead', + num_things_classes=80, + num_stuff_classes=53, + in_channels=256, + inner_channels=128, + start_level=0, + end_level=4, + norm_cfg=dict(type='GN', num_groups=32, requires_grad=True), + conv_cfg=None, + loss_seg=dict( + type='CrossEntropyLoss', ignore_index=255, loss_weight=0.5)), + panoptic_fusion_head=dict( + type='HeuristicFusionHead', + num_things_classes=80, + num_stuff_classes=53), + test_cfg=dict( + panoptic=dict( + score_thr=0.6, + max_per_img=100, + mask_thr_binary=0.5, + mask_overlap=0.5, + nms=dict(type='nms', iou_threshold=0.5, class_agnostic=True), + stuff_area_limit=4096))) + +custom_hooks = [] diff --git a/configs/panoptic_fpn/panoptic_fpn_r50_fpn_mstrain_3x_coco.py b/configs/panoptic_fpn/panoptic_fpn_r50_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..b510935 --- /dev/null +++ b/configs/panoptic_fpn/panoptic_fpn_r50_fpn_mstrain_3x_coco.py @@ -0,0 +1,61 @@ +_base_ = './panoptic_fpn_r50_fpn_1x_coco.py' + +# dataset settings +dataset_type = 'CocoPanopticDataset' +data_root = 'data/coco/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +# In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)], +# multiscale_mode='range' +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='LoadPanopticAnnotations', + with_bbox=True, + with_mask=True, + with_seg=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='range', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='SegRescale', scale_factor=1 / 4), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +# Use RepeatDataset to speed up training +data = dict( + train=dict( + _delete_=True, + type='RepeatDataset', + times=3, + dataset=dict( + type=dataset_type, + ann_file=data_root + 'annotations/panoptic_train2017.json', + img_prefix=data_root + 'train2017/', + seg_prefix=data_root + 'annotations/panoptic_train2017/', + pipeline=train_pipeline)), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/pascal_voc/README.md b/configs/pascal_voc/README.md new file mode 100644 index 0000000..69e0220 --- /dev/null +++ b/configs/pascal_voc/README.md @@ -0,0 +1,23 @@ +# PASCAL VOC Dataset + + + +``` +@Article{Everingham10, + author = "Everingham, M. and Van~Gool, L. and Williams, C. K. I. and Winn, J. and Zisserman, A.", + title = "The Pascal Visual Object Classes (VOC) Challenge", + journal = "International Journal of Computer Vision", + volume = "88", + year = "2010", + number = "2", + month = jun, + pages = "303--338", +} +``` + +## Results and Models + +| Architecture | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:------------:|:---------:|:-------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| +| Faster R-CNN | R-50 | pytorch | 1x | 2.6 | - | 79.5 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712/faster_rcnn_r50_fpn_1x_voc0712_20200624-c9895d40.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712/20200623_015208.log.json) | +| Retinanet | R-50 | pytorch | 1x | 2.1 | - | 77.3 |[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pascal_voc/retinanet_r50_fpn_1x_voc0712.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pascal_voc/retinanet_r50_fpn_1x_voc0712/retinanet_r50_fpn_1x_voc0712_20200617-47cbdd0e.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pascal_voc/retinanet_r50_fpn_1x_voc0712/retinanet_r50_fpn_1x_voc0712_20200616_014642.log.json) | diff --git a/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py b/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py new file mode 100644 index 0000000..7866ace --- /dev/null +++ b/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py @@ -0,0 +1,14 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', '../_base_/datasets/voc0712.py', + '../_base_/default_runtime.py' +] +model = dict(roi_head=dict(bbox_head=dict(num_classes=20))) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +# actual epoch = 3 * 3 = 9 +lr_config = dict(policy='step', step=[3]) +# runtime settings +runner = dict( + type='EpochBasedRunner', max_epochs=4) # actual epoch = 4 * 3 = 12 diff --git a/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712_cocofmt.py b/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712_cocofmt.py new file mode 100644 index 0000000..12eee2c --- /dev/null +++ b/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712_cocofmt.py @@ -0,0 +1,75 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', '../_base_/datasets/voc0712.py', + '../_base_/default_runtime.py' +] +model = dict(roi_head=dict(bbox_head=dict(num_classes=20))) + +CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', + 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', + 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor') + +# dataset settings +dataset_type = 'CocoDataset' +data_root = 'data/VOCdevkit/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1000, 600), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1000, 600), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type='RepeatDataset', + times=3, + dataset=dict( + type=dataset_type, + ann_file='data/voc0712_trainval.json', + img_prefix='data/VOCdevkit', + pipeline=train_pipeline, + classes=CLASSES)), + val=dict( + type=dataset_type, + ann_file='data/voc07_test.json', + img_prefix='data/VOCdevkit', + pipeline=test_pipeline, + classes=CLASSES), + test=dict( + type=dataset_type, + ann_file='data/voc07_test.json', + img_prefix='data/VOCdevkit', + pipeline=test_pipeline, + classes=CLASSES)) +evaluation = dict(interval=1, metric='bbox') + +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +# actual epoch = 3 * 3 = 9 +lr_config = dict(policy='step', step=[3]) +# runtime settings +runner = dict( + type='EpochBasedRunner', max_epochs=4) # actual epoch = 4 * 3 = 12 diff --git a/configs/pascal_voc/retinanet_r50_fpn_1x_voc0712.py b/configs/pascal_voc/retinanet_r50_fpn_1x_voc0712.py new file mode 100644 index 0000000..b4b050d --- /dev/null +++ b/configs/pascal_voc/retinanet_r50_fpn_1x_voc0712.py @@ -0,0 +1,14 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', '../_base_/datasets/voc0712.py', + '../_base_/default_runtime.py' +] +model = dict(bbox_head=dict(num_classes=20)) +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +# actual epoch = 3 * 3 = 9 +lr_config = dict(policy='step', step=[3]) +# runtime settings +runner = dict( + type='EpochBasedRunner', max_epochs=4) # actual epoch = 4 * 3 = 12 diff --git a/configs/pascal_voc/ssd300_voc0712.py b/configs/pascal_voc/ssd300_voc0712.py new file mode 100644 index 0000000..271ebe3 --- /dev/null +++ b/configs/pascal_voc/ssd300_voc0712.py @@ -0,0 +1,69 @@ +_base_ = [ + '../_base_/models/ssd300.py', '../_base_/datasets/voc0712.py', + '../_base_/default_runtime.py' +] +model = dict( + bbox_head=dict( + num_classes=20, anchor_generator=dict(basesize_ratio_range=(0.2, + 0.9)))) +# dataset settings +dataset_type = 'VOCDataset' +data_root = 'data/VOCdevkit/' +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='Expand', + mean=img_norm_cfg['mean'], + to_rgb=img_norm_cfg['to_rgb'], + ratio_range=(1, 4)), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.3), + dict(type='Resize', img_scale=(300, 300), keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(300, 300), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=8, + workers_per_gpu=3, + train=dict( + type='RepeatDataset', times=10, dataset=dict(pipeline=train_pipeline)), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# optimizer +optimizer = dict(type='SGD', lr=1e-3, momentum=0.9, weight_decay=5e-4) +optimizer_config = dict() +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[16, 20]) +checkpoint_config = dict(interval=1) +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/pascal_voc/ssd512_voc0712.py b/configs/pascal_voc/ssd512_voc0712.py new file mode 100644 index 0000000..ea2b69f --- /dev/null +++ b/configs/pascal_voc/ssd512_voc0712.py @@ -0,0 +1,52 @@ +_base_ = 'ssd300_voc0712.py' +input_size = 512 +model = dict( + bbox_head=dict( + in_channels=(512, 1024, 512, 256, 256, 256, 256), + anchor_generator=dict( + input_size=input_size, + strides=[8, 16, 32, 64, 128, 256, 512], + basesize_ratio_range=(0.15, 0.9), + ratios=([2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2])))) +img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='PhotoMetricDistortion', + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18), + dict( + type='Expand', + mean=img_norm_cfg['mean'], + to_rgb=img_norm_cfg['to_rgb'], + ratio_range=(1, 4)), + dict( + type='MinIoURandomCrop', + min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), + min_crop_size=0.3), + dict(type='Resize', img_scale=(512, 512), keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(dataset=dict(pipeline=train_pipeline)), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/configs/pisa/README.md b/configs/pisa/README.md new file mode 100644 index 0000000..3ae3392 --- /dev/null +++ b/configs/pisa/README.md @@ -0,0 +1,40 @@ +# Prime Sample Attention in Object Detection + +## Introduction + + + +```latex +@inproceedings{cao2019prime, + title={Prime sample attention in object detection}, + author={Cao, Yuhang and Chen, Kai and Loy, Chen Change and Lin, Dahua}, + booktitle={IEEE Conference on Computer Vision and Pattern Recognition}, + year={2020} +} +``` + +## Results and models + +| PISA | Network | Backbone | Lr schd | box AP | mask AP | Config | Download | +|:----:|:-------:|:-------------------:|:-------:|:------:|:-------:|:------:|:--------:| +| × | Faster R-CNN | R-50-FPN | 1x | 36.4 | | - | +| √ | Faster R-CNN | R-50-FPN | 1x | 38.4 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pisa/pisa_faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_faster_rcnn_r50_fpn_1x_coco/pisa_faster_rcnn_r50_fpn_1x_coco-dea93523.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_faster_rcnn_r50_fpn_1x_coco/pisa_faster_rcnn_r50_fpn_1x_coco_20200506_185619.log.json) | +| × | Faster R-CNN | X101-32x4d-FPN | 1x | 40.1 | | - | +| √ | Faster R-CNN | X101-32x4d-FPN | 1x | 41.9 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pisa/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco-e4accec4.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco_20200505_181503.log.json) | +| × | Mask R-CNN | R-50-FPN | 1x | 37.3 | 34.2 | - | +| √ | Mask R-CNN | R-50-FPN | 1x | 39.1 | 35.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pisa/pisa_mask_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_mask_rcnn_r50_fpn_1x_coco/pisa_mask_rcnn_r50_fpn_1x_coco-dfcedba6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_mask_rcnn_r50_fpn_1x_coco/pisa_mask_rcnn_r50_fpn_1x_coco_20200508_150500.log.json) | +| × | Mask R-CNN | X101-32x4d-FPN | 1x | 41.1 | 37.1 | - | +| √ | Mask R-CNN | X101-32x4d-FPN | 1x | | | | +| × | RetinaNet | R-50-FPN | 1x | 35.6 | | - | +| √ | RetinaNet | R-50-FPN | 1x | 36.9 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pisa/pisa_retinanet_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_retinanet_r50_fpn_1x_coco/pisa_retinanet_r50_fpn_1x_coco-76409952.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_retinanet_r50_fpn_1x_coco/pisa_retinanet_r50_fpn_1x_coco_20200504_014311.log.json) | +| × | RetinaNet | X101-32x4d-FPN | 1x | 39.0 | | - | +| √ | RetinaNet | X101-32x4d-FPN | 1x | 40.7 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pisa/pisa_retinanet_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_retinanet_x101_32x4d_fpn_1x_coco/pisa_retinanet_x101_32x4d_fpn_1x_coco-a0c13c73.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_retinanet_x101_32x4d_fpn_1x_coco/pisa_retinanet_x101_32x4d_fpn_1x_coco_20200505_001404.log.json) | +| × | SSD300 | VGG16 | 1x | 25.6 | | - | +| √ | SSD300 | VGG16 | 1x | 27.6 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pisa/pisa_ssd300_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_ssd300_coco/pisa_ssd300_coco-710e3ac9.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_ssd300_coco/pisa_ssd300_coco_20200504_144325.log.json) | +| × | SSD300 | VGG16 | 1x | 29.3 | | - | +| √ | SSD300 | VGG16 | 1x | 31.8 | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pisa/pisa_ssd512_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_ssd512_coco/pisa_ssd512_coco-247addee.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_ssd512_coco/pisa_ssd512_coco_20200508_131030.log.json) | + +**Notes:** + +- In the original paper, all models are trained and tested on mmdet v1.x, thus results may not be exactly the same with this release on v2.0. +- It is noted PISA only modifies the training pipeline so the inference time remains the same with the baseline. diff --git a/configs/pisa/metafile.yml b/configs/pisa/metafile.yml new file mode 100644 index 0000000..cd43afb --- /dev/null +++ b/configs/pisa/metafile.yml @@ -0,0 +1,110 @@ +Collections: + - Name: PISA + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - FPN + - PISA + - RPN + - ResNet + - RoIPool + Paper: + URL: https://arxiv.org/abs/1904.04821 + Title: 'Prime Sample Attention in Object Detection' + README: configs/pisa/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/roi_heads/pisa_roi_head.py#L8 + Version: v2.1.0 + +Models: + - Name: pisa_faster_rcnn_r50_fpn_1x_coco + In Collection: PISA + Config: configs/pisa/pisa_faster_rcnn_r50_fpn_1x_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 38.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_faster_rcnn_r50_fpn_1x_coco/pisa_faster_rcnn_r50_fpn_1x_coco-dea93523.pth + + - Name: pisa_faster_rcnn_x101_32x4d_fpn_1x_coco + In Collection: PISA + Config: configs/pisa/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco-e4accec4.pth + + - Name: pisa_mask_rcnn_r50_fpn_1x_coco + In Collection: PISA + Config: configs/pisa/pisa_mask_rcnn_r50_fpn_1x_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.1 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 35.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_mask_rcnn_r50_fpn_1x_coco/pisa_mask_rcnn_r50_fpn_1x_coco-dfcedba6.pth + + - Name: pisa_retinanet_r50_fpn_1x_coco + In Collection: PISA + Config: configs/pisa/pisa_retinanet_r50_fpn_1x_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 36.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_retinanet_r50_fpn_1x_coco/pisa_retinanet_r50_fpn_1x_coco-76409952.pth + + - Name: pisa_retinanet_x101_32x4d_fpn_1x_coco + In Collection: PISA + Config: configs/pisa/pisa_retinanet_x101_32x4d_fpn_1x_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_retinanet_x101_32x4d_fpn_1x_coco/pisa_retinanet_x101_32x4d_fpn_1x_coco-a0c13c73.pth + + - Name: pisa_ssd300_coco + In Collection: PISA + Config: configs/pisa/pisa_ssd300_coco.py + Metadata: + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 27.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_ssd300_coco/pisa_ssd300_coco-710e3ac9.pth + + - Name: pisa_ssd512_coco + In Collection: PISA + Config: configs/pisa/pisa_ssd512_coco.py + Metadata: + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 31.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_ssd512_coco/pisa_ssd512_coco-247addee.pth diff --git a/configs/pisa/pisa_faster_rcnn_r50_fpn_1x_coco.py b/configs/pisa/pisa_faster_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..71e65b0 --- /dev/null +++ b/configs/pisa/pisa_faster_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,30 @@ +_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' + +model = dict( + roi_head=dict( + type='PISARoIHead', + bbox_head=dict( + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))), + train_cfg=dict( + rpn_proposal=dict( + nms_pre=2000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + sampler=dict( + type='ScoreHLRSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True, + k=0.5, + bias=0.), + isr=dict(k=2, bias=0), + carl=dict(k=1, bias=0.2))), + test_cfg=dict( + rpn=dict( + nms_pre=2000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0))) diff --git a/configs/pisa/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco.py b/configs/pisa/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000..16edd99 --- /dev/null +++ b/configs/pisa/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,30 @@ +_base_ = '../faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco.py' + +model = dict( + roi_head=dict( + type='PISARoIHead', + bbox_head=dict( + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))), + train_cfg=dict( + rpn_proposal=dict( + nms_pre=2000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + sampler=dict( + type='ScoreHLRSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True, + k=0.5, + bias=0.), + isr=dict(k=2, bias=0), + carl=dict(k=1, bias=0.2))), + test_cfg=dict( + rpn=dict( + nms_pre=2000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0))) diff --git a/configs/pisa/pisa_mask_rcnn_r50_fpn_1x_coco.py b/configs/pisa/pisa_mask_rcnn_r50_fpn_1x_coco.py new file mode 100644 index 0000000..047a293 --- /dev/null +++ b/configs/pisa/pisa_mask_rcnn_r50_fpn_1x_coco.py @@ -0,0 +1,30 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' + +model = dict( + roi_head=dict( + type='PISARoIHead', + bbox_head=dict( + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))), + train_cfg=dict( + rpn_proposal=dict( + nms_pre=2000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + sampler=dict( + type='ScoreHLRSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True, + k=0.5, + bias=0.), + isr=dict(k=2, bias=0), + carl=dict(k=1, bias=0.2))), + test_cfg=dict( + rpn=dict( + nms_pre=2000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0))) diff --git a/configs/pisa/pisa_mask_rcnn_x101_32x4d_fpn_1x_coco.py b/configs/pisa/pisa_mask_rcnn_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000..2186a8f --- /dev/null +++ b/configs/pisa/pisa_mask_rcnn_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,30 @@ +_base_ = '../mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py' + +model = dict( + roi_head=dict( + type='PISARoIHead', + bbox_head=dict( + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))), + train_cfg=dict( + rpn_proposal=dict( + nms_pre=2000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + sampler=dict( + type='ScoreHLRSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True, + k=0.5, + bias=0.), + isr=dict(k=2, bias=0), + carl=dict(k=1, bias=0.2))), + test_cfg=dict( + rpn=dict( + nms_pre=2000, + max_per_img=2000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0))) diff --git a/configs/pisa/pisa_retinanet_r50_fpn_1x_coco.py b/configs/pisa/pisa_retinanet_r50_fpn_1x_coco.py new file mode 100644 index 0000000..70f89e2 --- /dev/null +++ b/configs/pisa/pisa_retinanet_r50_fpn_1x_coco.py @@ -0,0 +1,7 @@ +_base_ = '../retinanet/retinanet_r50_fpn_1x_coco.py' + +model = dict( + bbox_head=dict( + type='PISARetinaHead', + loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)), + train_cfg=dict(isr=dict(k=2., bias=0.), carl=dict(k=1., bias=0.2))) diff --git a/configs/pisa/pisa_retinanet_x101_32x4d_fpn_1x_coco.py b/configs/pisa/pisa_retinanet_x101_32x4d_fpn_1x_coco.py new file mode 100644 index 0000000..b97b672 --- /dev/null +++ b/configs/pisa/pisa_retinanet_x101_32x4d_fpn_1x_coco.py @@ -0,0 +1,7 @@ +_base_ = '../retinanet/retinanet_x101_32x4d_fpn_1x_coco.py' + +model = dict( + bbox_head=dict( + type='PISARetinaHead', + loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)), + train_cfg=dict(isr=dict(k=2., bias=0.), carl=dict(k=1., bias=0.2))) diff --git a/configs/pisa/pisa_ssd300_coco.py b/configs/pisa/pisa_ssd300_coco.py new file mode 100644 index 0000000..b5cc006 --- /dev/null +++ b/configs/pisa/pisa_ssd300_coco.py @@ -0,0 +1,8 @@ +_base_ = '../ssd/ssd300_coco.py' + +model = dict( + bbox_head=dict(type='PISASSDHead'), + train_cfg=dict(isr=dict(k=2., bias=0.), carl=dict(k=1., bias=0.2))) + +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/pisa/pisa_ssd512_coco.py b/configs/pisa/pisa_ssd512_coco.py new file mode 100644 index 0000000..3219d6d --- /dev/null +++ b/configs/pisa/pisa_ssd512_coco.py @@ -0,0 +1,8 @@ +_base_ = '../ssd/ssd512_coco.py' + +model = dict( + bbox_head=dict(type='PISASSDHead'), + train_cfg=dict(isr=dict(k=2., bias=0.), carl=dict(k=1., bias=0.2))) + +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/pix2seq/README.md b/configs/pix2seq/README.md new file mode 100644 index 0000000..c6b3f20 --- /dev/null +++ b/configs/pix2seq/README.md @@ -0,0 +1,21 @@ +# Pix2seq + +## Introduction + + + +We provide the config files for Pix2seq: [Pix2seq: A Language Modeling Framework for Object Detection](https://arxiv.org/abs/2109.10852). + +```BibTeX +@article{chen2021pix2seq, + title={Pix2seq: A language modeling framework for object detection}, + author={Chen, Ting and Saxena, Saurabh and Li, Lala and Fleet, David J and Hinton, Geoffrey}, + journal={arXiv preprint arXiv:2109.10852}, + year={2021} +} +``` + +## Results and Models + +| Backbone | Model | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +|:------:|:--------:|:-------:|:--------:|:--------------:|:------:|:------:|:--------:| diff --git a/configs/pix2seq/metafile.yml b/configs/pix2seq/metafile.yml new file mode 100644 index 0000000..31c7be9 --- /dev/null +++ b/configs/pix2seq/metafile.yml @@ -0,0 +1,33 @@ +Collections: + - Name: Pix2seq + Metadata: + Training Data: COCO + Training Techniques: + - AdamW + - Multi Scale Train + - Gradient Clip + Training Resources: 8x V100 GPUs + Architecture: + - ResNet + - Transformer + Paper: + URL: https://arxiv.org/abs/2109.10852 + Title: 'Pix2seq: A Language Modeling Framework for Object Detection' + README: configs/pix2seq/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.7.0/mmdet/models/detectors/pix2seq.py + Version: v1.0.0 + +Models: + - Name: pix2seq_r50_4x2_50e_coco + In Collection: Pix2seq + Config: configs/pix2seq/pix2seq_r50_4x2_50e_coco.py + Metadata: + Training Memory (GB): 7.9 + Epochs: 300 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 37.0 + Weights: https://drive.google.com/file/d/1Ku8ZORiLtMs66uleS3aXId7pxlJrTK9d/view?usp=sharing diff --git a/configs/pix2seq/pix2seq_r50_8x4_300e_coco.py b/configs/pix2seq/pix2seq_r50_8x4_300e_coco.py new file mode 100644 index 0000000..cfdd142 --- /dev/null +++ b/configs/pix2seq/pix2seq_r50_8x4_300e_coco.py @@ -0,0 +1,156 @@ +_base_ = [ + '../_base_/datasets/coco_detection_pix2seq.py', '../_base_/default_runtime.py' +] +model = dict( + type='Pix2seq', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(3, ), + frozen_stages=0, + norm_cfg=dict(type='BN', requires_grad=False), + norm_eval=True, # Frozen BN weight and bias + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + bbox_head=dict( + type='Pix2seqHead', + num_classes=91, + in_channels=2048, + num_vocal=2094, + rand_target=True, + drop_token=True, + random_token=True, + split_loss=False, + transformer=dict( + type='Pix2seqTransformer', + encoder=dict( + type='DetrTransformerEncoder', + num_layers=6, + transformerlayers=dict( + type='BaseTransformerLayer', + attn_cfgs=[ + dict( + type='MultiheadAttention', + embed_dims=256, + num_heads=8, + dropout=0.1) + ], + feedforward_channels=1024, + ffn_dropout=0.1, + operation_order=('self_attn', 'norm', 'ffn', 'norm'))), + decoder=dict( + type='Pix2seqTransformerDecoder', + num_layers=6, + post_norm_cfg=dict(type='LN'), + transformerlayers=dict( + type='Pix2seqTransformerDecoderLayer', + attn_cfgs=[ + dict( + type='Pix2seqAttention', + embed_dims=256, + num_heads=8, + dropout=0.1, + self_attn_dropout=0.1), + dict( + type='MultiheadAttention', + embed_dims=256, + num_heads=8, + dropout=0.1) + ], + feedforward_channels=1024, + ffn_dropout=0.1, + operation_order=('self_attn', 'norm', 'cross_attn', 'norm', + 'ffn', 'norm')), + ), + ), + positional_encoding=dict( + type='SinePositionalEncoding', num_feats=128, normalize=False))) +# augment +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile', to_float32=True), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='RandomDistortion', + brightness=0.5, + contrast=0.5, + saturation=0.5, + hue=0.5), + dict(type='RandomFlip', flip_ratio=0.5), + dict( + type='LargeScaleJitter', + desired_size=1333, + ratio_range=(0.3, 2.0), + keep_ratio=True, + allow_negative_crop=True), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=1), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) +] +# test_pipeline, NOTE the Pad's size_divisor is different from the default +# setting (size_divisor=32). While there is little effect on the performance +# whether we use the default setting or use size_divisor=1. +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 1333), + flip=False, + transforms=[ + dict( + type='LargeScaleJitter', + desired_size=1333, + ratio_range=(1.0, 1.0), + keep_ratio=True, + allow_negative_crop=True), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=1), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +# dataset +dataset_type = 'Pix2seqCocoDataset' +data_root = 'data/coco/' +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_train2017.json', + img_prefix=data_root + 'train2017/', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + ann_file=data_root + 'annotations/instances_val2017.json', + img_prefix=data_root + 'val2017/', + pipeline=test_pipeline) +) +# optimizer +optimizer = dict( + type='AdamW', + lr=1e-3, + weight_decay=0.05, + paramwise_cfg=dict( + custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)})) +optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2)) +runner = dict(type='EpochBasedRunner', max_epochs=300) +fp16 = dict(loss_scale='dynamic') +checkpoint_config = dict(interval=50) +lr_config = dict( + policy='LinearlyDecay', + warmup='linear', + by_epoch=True, + warmup_by_epoch=True, + warmup_ratio=0.01, + warmup_iters=10, # 10 epoch + min_lr_ratio=0.01) +evaluation = dict(interval=10, metric='bbox') \ No newline at end of file diff --git a/configs/point_rend/README.md b/configs/point_rend/README.md new file mode 100644 index 0000000..fafb29c --- /dev/null +++ b/configs/point_rend/README.md @@ -0,0 +1,23 @@ +# PointRend + +## Introduction + + + +```latex +@InProceedings{kirillov2019pointrend, + title={{PointRend}: Image Segmentation as Rendering}, + author={Alexander Kirillov and Yuxin Wu and Kaiming He and Ross Girshick}, + journal={ArXiv:1912.08193}, + year={2019} +} +``` + +## Results and models + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | +| R-50-FPN | caffe | 1x | 4.6 | | 38.4 | 36.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco/point_rend_r50_caffe_fpn_mstrain_1x_coco-1bcb5fb4.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco/point_rend_r50_caffe_fpn_mstrain_1x_coco_20200612_161407.log.json) | +| R-50-FPN | caffe | 3x | 4.6 | | 41.0 | 38.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/point_rend/point_rend_r50_caffe_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/point_rend/point_rend_r50_caffe_fpn_mstrain_3x_coco/point_rend_r50_caffe_fpn_mstrain_3x_coco-e0ebb6b7.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/point_rend/point_rend_r50_caffe_fpn_mstrain_3x_coco/point_rend_r50_caffe_fpn_mstrain_3x_coco_20200614_002632.log.json) | + +Note: All models are trained with multi-scale, the input image shorter side is randomly scaled to one of (640, 672, 704, 736, 768, 800). diff --git a/configs/point_rend/metafile.yml b/configs/point_rend/metafile.yml new file mode 100644 index 0000000..82aea05 --- /dev/null +++ b/configs/point_rend/metafile.yml @@ -0,0 +1,54 @@ +Collections: + - Name: PointRend + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - PointRend + - FPN + - ResNet + Paper: + URL: https://arxiv.org/abs/1912.08193 + Title: 'PointRend: Image Segmentation as Rendering' + README: configs/point_rend/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.2.0/mmdet/models/detectors/point_rend.py#L6 + Version: v2.2.0 + +Models: + - Name: point_rend_r50_caffe_fpn_mstrain_1x_coco + In Collection: PointRend + Config: configs/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco.py + Metadata: + Training Memory (GB): 4.6 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 38.4 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 36.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco/point_rend_r50_caffe_fpn_mstrain_1x_coco-1bcb5fb4.pth + + - Name: point_rend_r50_caffe_fpn_mstrain_3x_coco + In Collection: PointRend + Config: configs/point_rend/point_rend_r50_caffe_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 4.6 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.0 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/point_rend/point_rend_r50_caffe_fpn_mstrain_3x_coco/point_rend_r50_caffe_fpn_mstrain_3x_coco-e0ebb6b7.pth diff --git a/configs/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco.py b/configs/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco.py new file mode 100644 index 0000000..0c0e563 --- /dev/null +++ b/configs/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco.py @@ -0,0 +1,44 @@ +_base_ = '../mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain_1x_coco.py' +# model settings +model = dict( + type='PointRend', + roi_head=dict( + type='PointRendRoIHead', + mask_roi_extractor=dict( + type='GenericRoIExtractor', + aggregation='concat', + roi_layer=dict( + _delete_=True, type='SimpleRoIAlign', output_size=14), + out_channels=256, + featmap_strides=[4]), + mask_head=dict( + _delete_=True, + type='CoarseMaskHead', + num_fcs=2, + in_channels=256, + conv_out_channels=256, + fc_out_channels=1024, + num_classes=80, + loss_mask=dict( + type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)), + point_head=dict( + type='MaskPointHead', + num_fcs=3, + in_channels=256, + fc_channels=256, + num_classes=80, + coarse_pred_each_layer=True, + loss_point=dict( + type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rcnn=dict( + mask_size=7, + num_points=14 * 14, + oversample_ratio=3, + importance_sample_ratio=0.75)), + test_cfg=dict( + rcnn=dict( + subdivision_steps=5, + subdivision_num_points=28 * 28, + scale_factor=2))) diff --git a/configs/point_rend/point_rend_r50_caffe_fpn_mstrain_3x_coco.py b/configs/point_rend/point_rend_r50_caffe_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..169278e --- /dev/null +++ b/configs/point_rend/point_rend_r50_caffe_fpn_mstrain_3x_coco.py @@ -0,0 +1,4 @@ +_base_ = './point_rend_r50_caffe_fpn_mstrain_1x_coco.py' +# learning policy +lr_config = dict(step=[28, 34]) +runner = dict(type='EpochBasedRunner', max_epochs=36) diff --git a/configs/pvt/README.md b/configs/pvt/README.md new file mode 100644 index 0000000..99ace87 --- /dev/null +++ b/configs/pvt/README.md @@ -0,0 +1,43 @@ +# Pyramid vision transformer: A versatile backbone for dense prediction without convolutions + +## Introduction + + + +```latex +@article{wang2021pyramid, + title={Pyramid vision transformer: A versatile backbone for dense prediction without convolutions}, + author={Wang, Wenhai and Xie, Enze and Li, Xiang and Fan, Deng-Ping and Song, Kaitao and Liang, Ding and Lu, Tong and Luo, Ping and Shao, Ling}, + journal={arXiv preprint arXiv:2102.12122}, + year={2021} +} +``` + +```latex +@article{wang2021pvtv2, + title={PVTv2: Improved Baselines with Pyramid Vision Transformer}, + author={Wang, Wenhai and Xie, Enze and Li, Xiang and Fan, Deng-Ping and Song, Kaitao and Liang, Ding and Lu, Tong and Luo, Ping and Shao, Ling}, + journal={arXiv preprint arXiv:2106.13797}, + year={2021} +} +``` +## Results and Models + +### RetinaNet (PVTv1) + +| Backbone | Lr schd | Mem (GB) | box AP | Config | Download | +|:-----------:|:-------:|:--------:|:------:|:------:|:--------:| +| PVT-Tiny | 12e |8.5 |36.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pvt/retinanet_pvt_t_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvt-t_fpn_1x_coco/retinanet_pvt-t_fpn_1x_coco_20210831_103110-17b566bd.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvt-t_fpn_1x_coco/retinanet_pvt-t_fpn_1x_coco_20210831_103110.log.json) | +| PVT-Small | 12e |14.5 |40.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pvt/retinanet_pvt_s_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvt-s_fpn_1x_coco/retinanet_pvt-s_fpn_1x_coco_20210906_142921-b6c94a5b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvt-s_fpn_1x_coco/retinanet_pvt-s_fpn_1x_coco_20210906_142921.log.json) | +| PVT-Medium | 12e |20.9 |41.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pvt/retinanet_pvt_m_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvt-m_fpn_1x_coco/retinanet_pvt-m_fpn_1x_coco_20210831_103243-55effa1b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvt-m_fpn_1x_coco/retinanet_pvt-m_fpn_1x_coco_20210831_103243.log.json) | + +### RetinaNet (PVTv2) + +| Backbone | Lr schd | Mem (GB) | box AP | Config | Download | +|:-----------:|:-------:|:--------:|:------:|:------:|:--------:| +| PVTv2-B0 | 12e |7.4 |37.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pvt/retinanet_pvt_v2_b0_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b0_fpn_1x_coco/retinanet_pvtv2-b0_fpn_1x_coco_20210831_103157-13e9aabe.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b0_fpn_1x_coco/retinanet_pvtv2-b0_fpn_1x_coco_20210831_103157.log.json) | +| PVTv2-B1 | 12e |9.5 |41.2 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pvt/retinanet_pvt_v2_b1_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b1_fpn_1x_coco/retinanet_pvtv2-b1_fpn_1x_coco_20210831_103318-7e169a7d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b1_fpn_1x_coco/retinanet_pvtv2-b1_fpn_1x_coco_20210831_103318.log.json) | +| PVTv2-B2 | 12e |16.2 |44.6 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pvt/retinanet_pvt_v2_b2_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b2_fpn_1x_coco/retinanet_pvtv2-b2_fpn_1x_coco_20210901_174843-529f0b9a.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b2_fpn_1x_coco/retinanet_pvtv2-b2_fpn_1x_coco_20210901_174843.log.json) | +| PVTv2-B3 | 12e |23.0 |46.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pvt/retinanet_pvt_v2_b3_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b3_fpn_1x_coco/retinanet_pvtv2-b3_fpn_1x_coco_20210903_151512-8357deff.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b3_fpn_1x_coco/retinanet_pvtv2-b3_fpn_1x_coco_20210903_151512.log.json) | +| PVTv2-B4 | 12e |17.0 |46.3 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pvt/retinanet_pvt_v2_b4_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b4_fpn_1x_coco/retinanet_pvtv2-b4_fpn_1x_coco_20210901_170151-83795c86.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b4_fpn_1x_coco/retinanet_pvtv2-b4_fpn_1x_coco_20210901_170151.log.json) | +| PVTv2-B5 | 12e |18.7 |46.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/pvt/retinanet_pvt_v2_b5_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b5_fpn_1x_coco/retinanet_pvtv2-b5_fpn_1x_coco_20210902_201800-3420eb57.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b5_fpn_1x_coco/retinanet_pvtv2-b5_fpn_1x_coco_20210902_201800.log.json) | diff --git a/configs/pvt/metafile.yml b/configs/pvt/metafile.yml new file mode 100644 index 0000000..48a0e2c --- /dev/null +++ b/configs/pvt/metafile.yml @@ -0,0 +1,136 @@ +Collections: + - Name: PVT + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x NVIDIA V100 GPUs + Architecture: + - RetinaNet + - PyramidVisionTransformer + - FPN + Paper: https://arxiv.org/abs/2102.12122 + README: configs/pvt/README.md + - Name: PVT-v2 + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x NVIDIA V100 GPUs + Architecture: + - RetinaNet + - PyramidVisionTransformerV2 + - FPN + Paper: https://arxiv.org/abs/2106.13797 + README: configs/pvt/README.md +Models: + - Name: retinanet_pvt-t_fpn_1x_coco + In Collection: PVT + Config: configs/pvt/retinanet_pvt-t_fpn_1x_coco.py + Metadata: + Training Memory (GB): 8.5 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 36.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvt-t_fpn_1x_coco/retinanet_pvt-t_fpn_1x_coco_20210831_103110-17b566bd.pth + - Name: retinanet_pvt-s_fpn_1x_coco + In Collection: PVT + Config: configs/pvt/retinanet_pvt-s_fpn_1x_coco.py + Metadata: + Training Memory (GB): 14.5 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvt-s_fpn_1x_coco/retinanet_pvt-s_fpn_1x_coco_20210906_142921-b6c94a5b.pth + - Name: retinanet_pvt-m_fpn_1x_coco + In Collection: PVT + Config: configs/pvt/retinanet_pvt-m_fpn_1x_coco.py + Metadata: + Training Memory (GB): 20.9 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvt-m_fpn_1x_coco/retinanet_pvt-m_fpn_1x_coco_20210831_103243-55effa1b.pth + - Name: retinanet_pvtv2-b0_fpn_1x_coco + In Collection: PVT-v2 + Config: configs/pvt/retinanet_pvtv2-b0_fpn_1x_coco.py + Metadata: + Training Memory (GB): 7.4 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 37.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b0_fpn_1x_coco/retinanet_pvtv2-b0_fpn_1x_coco_20210831_103157-13e9aabe.pth + - Name: retinanet_pvtv2-b1_fpn_1x_coco + In Collection: PVT-v2 + Config: configs/pvt/retinanet_pvtv2-b1_fpn_1x_coco.py + Metadata: + Training Memory (GB): 9.5 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b1_fpn_1x_coco/retinanet_pvtv2-b1_fpn_1x_coco_20210831_103318-7e169a7d.pth + - Name: retinanet_pvtv2-b2_fpn_1x_coco + In Collection: PVT-v2 + Config: configs/pvt/retinanet_pvtv2-b2_fpn_1x_coco.py + Metadata: + Training Memory (GB): 16.2 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 44.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b2_fpn_1x_coco/retinanet_pvtv2-b2_fpn_1x_coco_20210901_174843-529f0b9a.pth + - Name: retinanet_pvtv2-b3_fpn_1x_coco + In Collection: PVT-v2 + Config: configs/pvt/retinanet_pvtv2-b3_fpn_1x_coco.py + Metadata: + Training Memory (GB): 23.0 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 46.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b3_fpn_1x_coco/retinanet_pvtv2-b3_fpn_1x_coco_20210903_151512-8357deff.pth + - Name: retinanet_pvtv2-b4_fpn_1x_coco + In Collection: PVT-v2 + Config: configs/pvt/retinanet_pvtv2-b4_fpn_1x_coco.py + Metadata: + Training Memory (GB): 17.0 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 46.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b4_fpn_1x_coco/retinanet_pvtv2-b4_fpn_1x_coco_20210901_170151-83795c86.pth + - Name: retinanet_pvtv2-b5_fpn_1x_coco + In Collection: PVT-v2 + Config: configs/pvt/retinanet_pvtv2-b5_fpn_1x_coco.py + Metadata: + Training Memory (GB): 18.7 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 46.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b5_fpn_1x_coco/retinanet_pvtv2-b5_fpn_1x_coco_20210902_201800-3420eb57.pth diff --git a/configs/pvt/retinanet_pvt-l_fpn_1x_coco.py b/configs/pvt/retinanet_pvt-l_fpn_1x_coco.py new file mode 100644 index 0000000..e299f2a --- /dev/null +++ b/configs/pvt/retinanet_pvt-l_fpn_1x_coco.py @@ -0,0 +1,7 @@ +_base_ = 'retinanet_pvt-t_fpn_1x_coco.py' +model = dict( + backbone=dict( + num_layers=[3, 8, 27, 3], + init_cfg=dict(checkpoint='https://github.com/whai362/PVT/' + 'releases/download/v2/pvt_large.pth'))) +fp16 = dict(loss_scale=dict(init_scale=512)) diff --git a/configs/pvt/retinanet_pvt-m_fpn_1x_coco.py b/configs/pvt/retinanet_pvt-m_fpn_1x_coco.py new file mode 100644 index 0000000..b888f78 --- /dev/null +++ b/configs/pvt/retinanet_pvt-m_fpn_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = 'retinanet_pvt-t_fpn_1x_coco.py' +model = dict( + backbone=dict( + num_layers=[3, 4, 18, 3], + init_cfg=dict(checkpoint='https://github.com/whai362/PVT/' + 'releases/download/v2/pvt_medium.pth'))) diff --git a/configs/pvt/retinanet_pvt-s_fpn_1x_coco.py b/configs/pvt/retinanet_pvt-s_fpn_1x_coco.py new file mode 100644 index 0000000..4660348 --- /dev/null +++ b/configs/pvt/retinanet_pvt-s_fpn_1x_coco.py @@ -0,0 +1,6 @@ +_base_ = 'retinanet_pvt-t_fpn_1x_coco.py' +model = dict( + backbone=dict( + num_layers=[3, 4, 6, 3], + init_cfg=dict(checkpoint='https://github.com/whai362/PVT/' + 'releases/download/v2/pvt_small.pth'))) diff --git a/configs/pvt/retinanet_pvt-t_fpn_1x_coco.py b/configs/pvt/retinanet_pvt-t_fpn_1x_coco.py new file mode 100644 index 0000000..a6cff7d --- /dev/null +++ b/configs/pvt/retinanet_pvt-t_fpn_1x_coco.py @@ -0,0 +1,16 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + type='RetinaNet', + backbone=dict( + _delete_=True, + type='PyramidVisionTransformer', + num_layers=[2, 2, 2, 2], + init_cfg=dict(checkpoint='https://github.com/whai362/PVT/' + 'releases/download/v2/pvt_tiny.pth')), + neck=dict(in_channels=[64, 128, 320, 512])) +# optimizer +optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0001) diff --git a/configs/pvt/retinanet_pvtv2-b0_fpn_1x_coco.py b/configs/pvt/retinanet_pvtv2-b0_fpn_1x_coco.py new file mode 100644 index 0000000..cbe2295 --- /dev/null +++ b/configs/pvt/retinanet_pvtv2-b0_fpn_1x_coco.py @@ -0,0 +1,17 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + type='RetinaNet', + backbone=dict( + _delete_=True, + type='PyramidVisionTransformerV2', + embed_dims=32, + num_layers=[2, 2, 2, 2], + init_cfg=dict(checkpoint='https://github.com/whai362/PVT/' + 'releases/download/v2/pvt_v2_b0.pth')), + neck=dict(in_channels=[32, 64, 160, 256])) +# optimizer +optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0001) diff --git a/configs/pvt/retinanet_pvtv2-b1_fpn_1x_coco.py b/configs/pvt/retinanet_pvtv2-b1_fpn_1x_coco.py new file mode 100644 index 0000000..5374c50 --- /dev/null +++ b/configs/pvt/retinanet_pvtv2-b1_fpn_1x_coco.py @@ -0,0 +1,7 @@ +_base_ = 'retinanet_pvtv2-b0_fpn_1x_coco.py' +model = dict( + backbone=dict( + embed_dims=64, + init_cfg=dict(checkpoint='https://github.com/whai362/PVT/' + 'releases/download/v2/pvt_v2_b1.pth')), + neck=dict(in_channels=[64, 128, 320, 512])) diff --git a/configs/pvt/retinanet_pvtv2-b2_fpn_1x_coco.py b/configs/pvt/retinanet_pvtv2-b2_fpn_1x_coco.py new file mode 100644 index 0000000..cf9a18d --- /dev/null +++ b/configs/pvt/retinanet_pvtv2-b2_fpn_1x_coco.py @@ -0,0 +1,8 @@ +_base_ = 'retinanet_pvtv2-b0_fpn_1x_coco.py' +model = dict( + backbone=dict( + embed_dims=64, + num_layers=[3, 4, 6, 3], + init_cfg=dict(checkpoint='https://github.com/whai362/PVT/' + 'releases/download/v2/pvt_v2_b2.pth')), + neck=dict(in_channels=[64, 128, 320, 512])) diff --git a/configs/pvt/retinanet_pvtv2-b3_fpn_1x_coco.py b/configs/pvt/retinanet_pvtv2-b3_fpn_1x_coco.py new file mode 100644 index 0000000..7a47f82 --- /dev/null +++ b/configs/pvt/retinanet_pvtv2-b3_fpn_1x_coco.py @@ -0,0 +1,8 @@ +_base_ = 'retinanet_pvtv2-b0_fpn_1x_coco.py' +model = dict( + backbone=dict( + embed_dims=64, + num_layers=[3, 4, 18, 3], + init_cfg=dict(checkpoint='https://github.com/whai362/PVT/' + 'releases/download/v2/pvt_v2_b3.pth')), + neck=dict(in_channels=[64, 128, 320, 512])) diff --git a/configs/pvt/retinanet_pvtv2-b4_fpn_1x_coco.py b/configs/pvt/retinanet_pvtv2-b4_fpn_1x_coco.py new file mode 100644 index 0000000..ec9103b --- /dev/null +++ b/configs/pvt/retinanet_pvtv2-b4_fpn_1x_coco.py @@ -0,0 +1,13 @@ +_base_ = 'retinanet_pvtv2-b0_fpn_1x_coco.py' +model = dict( + backbone=dict( + embed_dims=64, + num_layers=[3, 8, 27, 3], + init_cfg=dict(checkpoint='https://github.com/whai362/PVT/' + 'releases/download/v2/pvt_v2_b4.pth')), + neck=dict(in_channels=[64, 128, 320, 512])) +# optimizer +optimizer = dict( + _delete_=True, type='AdamW', lr=0.0001 / 1.4, weight_decay=0.0001) +# dataset settings +data = dict(samples_per_gpu=1, workers_per_gpu=1) diff --git a/configs/pvt/retinanet_pvtv2-b5_fpn_1x_coco.py b/configs/pvt/retinanet_pvtv2-b5_fpn_1x_coco.py new file mode 100644 index 0000000..d8e6d23 --- /dev/null +++ b/configs/pvt/retinanet_pvtv2-b5_fpn_1x_coco.py @@ -0,0 +1,14 @@ +_base_ = 'retinanet_pvtv2-b0_fpn_1x_coco.py' +model = dict( + backbone=dict( + embed_dims=64, + num_layers=[3, 6, 40, 3], + mlp_ratios=(4, 4, 4, 4), + init_cfg=dict(checkpoint='https://github.com/whai362/PVT/' + 'releases/download/v2/pvt_v2_b5.pth')), + neck=dict(in_channels=[64, 128, 320, 512])) +# optimizer +optimizer = dict( + _delete_=True, type='AdamW', lr=0.0001 / 1.4, weight_decay=0.0001) +# dataset settings +data = dict(samples_per_gpu=1, workers_per_gpu=1) diff --git a/configs/queryinst/README.md b/configs/queryinst/README.md new file mode 100644 index 0000000..2051c5c --- /dev/null +++ b/configs/queryinst/README.md @@ -0,0 +1,26 @@ +# Instances as Queries + +## Introduction + + + +``` +@InProceedings{Fang_2021_ICCV, + author = {Fang, Yuxin and Yang, Shusheng and Wang, Xinggang and Li, Yu and Fang, Chen and Shan, Ying and Feng, Bin and Liu, Wenyu}, + title = {Instances As Queries}, + booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, + month = {October}, + year = {2021}, + pages = {6910-6919} +} +``` + +## Results and Models + +| Model | Backbone | Style | Lr schd | Number of Proposals |Multi-Scale| RandomCrop | box AP | mask AP | Config | Download | +|:------------:|:---------:|:-------:|:-------:|:-------: |:-------: |:---------:|:------:|:------:|:------:|:--------:| +| QueryInst | R-50-FPN | pytorch | 1x | 100 | False | False | 42.0 | 37.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/queryinst/queryinst_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r50_fpn_1x_coco/queryinst_r50_fpn_1x_coco_20210907_084916-5a8f1998.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r50_fpn_1x_coco/queryinst_r50_fpn_1x_coco_20210907_084916.log.json) | +| QueryInst | R-50-FPN | pytorch | 3x | 100 | True | False | 44.8 | 39.8 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/queryinst/queryinst_r50_fpn_mstrain_480-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r50_fpn_mstrain_480-800_3x_coco/queryinst_r50_fpn_mstrain_480-800_3x_coco_20210901_103643-7837af86.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r50_fpn_mstrain_480-800_3x_coco/queryinst_r50_fpn_mstrain_480-800_3x_coco_20210901_103643.log.json) | +| QueryInst | R-50-FPN | pytorch | 3x | 300 | True | True | 47.5 | 41.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/queryinst/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20210904_101802-85cffbd8.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20210904_101802.log.json) | +| QueryInst | R-101-FPN | pytorch | 3x | 100 | True | False | 46.4 | 41.0 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/queryinst/queryinst_r101_fpn_mstrain_480-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r101_fpn_mstrain_480-800_3x_coco/queryinst_r101_fpn_mstrain_480-800_3x_coco_20210904_104048-91f9995b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r101_fpn_mstrain_480-800_3x_coco/queryinst_r101_fpn_mstrain_480-800_3x_coco_20210904_104048.log.json) | +| QueryInst | R-101-FPN | pytorch | 3x | 300 | True | True | 49.0 | 42.9 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/queryinst/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20210904_153621-76cce59f.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20210904_153621.log.json) | diff --git a/configs/queryinst/metafile.yml b/configs/queryinst/metafile.yml new file mode 100644 index 0000000..da7f0a7 --- /dev/null +++ b/configs/queryinst/metafile.yml @@ -0,0 +1,100 @@ +Collections: + - Name: QueryInst + Metadata: + Training Data: COCO + Training Techniques: + - AdamW + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - FPN + - ResNet + - QueryInst + Paper: + URL: https://openaccess.thecvf.com/content/ICCV2021/papers/Fang_Instances_As_Queries_ICCV_2021_paper.pdf + Title: 'Instances as Queries' + README: configs/queryinst/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/detectors/queryinst.py + Version: v2.18.0 + +Models: + - Name: queryinst_r50_fpn_1x_coco + In Collection: QueryInst + Config: configs/queryinst/queryinst_r50_fpn_1x_coco.py + Metadata: + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.0 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r50_fpn_1x_coco/queryinst_r50_fpn_1x_coco_20210907_084916-5a8f1998.pth + + - Name: queryinst_r50_fpn_mstrain_480-800_3x_coco + In Collection: QueryInst + Config: configs/queryinst/queryinst_r50_fpn_mstrain_480-800_3x_coco.py + Metadata: + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 44.8 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 39.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r50_fpn_mstrain_480-800_3x_coco/queryinst_r50_fpn_mstrain_480-800_3x_coco_20210901_103643-7837af86.pth + + - Name: queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco + In Collection: QueryInst + Config: configs/queryinst/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py + Metadata: + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 47.5 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 41.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20210904_101802-85cffbd8.pth + + - Name: queryinst_r101_fpn_mstrain_480-800_3x_coco + In Collection: QueryInst + Config: configs/queryinst/queryinst_r101_fpn_mstrain_480-800_3x_coco.py + Metadata: + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 46.4 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 41.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r101_fpn_mstrain_480-800_3x_coco/queryinst_r101_fpn_mstrain_480-800_3x_coco_20210904_104048-91f9995b.pth + + - Name: queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco + In Collection: QueryInst + Config: configs/queryinst/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py + Metadata: + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 49.0 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 42.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20210904_153621-76cce59f.pth diff --git a/configs/queryinst/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py b/configs/queryinst/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py new file mode 100644 index 0000000..fd138f5 --- /dev/null +++ b/configs/queryinst/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py @@ -0,0 +1,7 @@ +_base_ = './queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py' + +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/queryinst/queryinst_r101_fpn_mstrain_480-800_3x_coco.py b/configs/queryinst/queryinst_r101_fpn_mstrain_480-800_3x_coco.py new file mode 100644 index 0000000..07cae19 --- /dev/null +++ b/configs/queryinst/queryinst_r101_fpn_mstrain_480-800_3x_coco.py @@ -0,0 +1,7 @@ +_base_ = './queryinst_r50_fpn_mstrain_480-800_3x_coco.py' + +model = dict( + backbone=dict( + depth=101, + init_cfg=dict(type='Pretrained', + checkpoint='torchvision://resnet101'))) diff --git a/configs/queryinst/queryinst_r50_fpn_1x_coco.py b/configs/queryinst/queryinst_r50_fpn_1x_coco.py new file mode 100644 index 0000000..48f5773 --- /dev/null +++ b/configs/queryinst/queryinst_r50_fpn_1x_coco.py @@ -0,0 +1,138 @@ +_base_ = [ + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +num_stages = 6 +num_proposals = 100 +model = dict( + type='QueryInst', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + start_level=0, + add_extra_convs='on_input', + num_outs=4), + rpn_head=dict( + type='EmbeddingRPNHead', + num_proposals=num_proposals, + proposal_feature_channel=256), + roi_head=dict( + type='SparseRoIHead', + num_stages=num_stages, + stage_loss_weights=[1] * num_stages, + proposal_feature_channel=256, + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + mask_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=[ + dict( + type='DIIHead', + num_classes=80, + num_ffn_fcs=2, + num_heads=8, + num_cls_fcs=1, + num_reg_fcs=3, + feedforward_channels=2048, + in_channels=256, + dropout=0.0, + ffn_act_cfg=dict(type='ReLU', inplace=True), + dynamic_conv_cfg=dict( + type='DynamicConv', + in_channels=256, + feat_channels=64, + out_channels=256, + input_feat_shape=7, + act_cfg=dict(type='ReLU', inplace=True), + norm_cfg=dict(type='LN')), + loss_bbox=dict(type='L1Loss', loss_weight=5.0), + loss_iou=dict(type='GIoULoss', loss_weight=2.0), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=2.0), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + clip_border=False, + target_means=[0., 0., 0., 0.], + target_stds=[0.5, 0.5, 1., 1.])) for _ in range(num_stages) + ], + mask_head=[ + dict( + type='DynamicMaskHead', + dynamic_conv_cfg=dict( + type='DynamicConv', + in_channels=256, + feat_channels=64, + out_channels=256, + input_feat_shape=14, + with_proj=False, + act_cfg=dict(type='ReLU', inplace=True), + norm_cfg=dict(type='LN')), + num_convs=4, + num_classes=80, + roi_feat_size=14, + in_channels=256, + conv_kernel_size=3, + conv_out_channels=256, + class_agnostic=False, + norm_cfg=dict(type='BN'), + upsample_cfg=dict(type='deconv', scale_factor=2), + loss_mask=dict( + type='DiceLoss', + loss_weight=8.0, + use_sigmoid=True, + activate=False, + eps=1e-5)) for _ in range(num_stages) + ]), + # training and testing settings + train_cfg=dict( + rpn=None, + rcnn=[ + dict( + assigner=dict( + type='HungarianAssigner', + cls_cost=dict(type='FocalLossCost', weight=2.0), + reg_cost=dict(type='BBoxL1Cost', weight=5.0), + iou_cost=dict(type='IoUCost', iou_mode='giou', + weight=2.0)), + sampler=dict(type='PseudoSampler'), + pos_weight=1, + mask_size=28, + ) for _ in range(num_stages) + ]), + test_cfg=dict( + rpn=None, rcnn=dict(max_per_img=num_proposals, mask_thr_binary=0.5))) + +# optimizer +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.0001, + weight_decay=0.0001, + paramwise_cfg=dict( + custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)})) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=0.1, norm_type=2)) +# learning policy +lr_config = dict(policy='step', step=[8, 11], warmup_iters=1000) +runner = dict(type='EpochBasedRunner', max_epochs=12) diff --git a/configs/queryinst/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py b/configs/queryinst/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py new file mode 100644 index 0000000..3089b3c --- /dev/null +++ b/configs/queryinst/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py @@ -0,0 +1,54 @@ +_base_ = './queryinst_r50_fpn_mstrain_480-800_3x_coco.py' +num_proposals = 300 +model = dict( + rpn_head=dict(num_proposals=num_proposals), + test_cfg=dict( + _delete_=True, + rpn=None, + rcnn=dict(max_per_img=num_proposals, mask_thr_binary=0.5))) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +# augmentation strategy originates from DETR. +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict( + type='AutoAugment', + policies=[[ + dict( + type='Resize', + img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), + (608, 1333), (640, 1333), (672, 1333), (704, 1333), + (736, 1333), (768, 1333), (800, 1333)], + multiscale_mode='value', + keep_ratio=True) + ], + [ + dict( + type='Resize', + img_scale=[(400, 1333), (500, 1333), (600, 1333)], + multiscale_mode='value', + keep_ratio=True), + dict( + type='RandomCrop', + crop_type='absolute_range', + crop_size=(384, 600), + allow_negative_crop=True), + dict( + type='Resize', + img_scale=[(480, 1333), (512, 1333), (544, 1333), + (576, 1333), (608, 1333), (640, 1333), + (672, 1333), (704, 1333), (736, 1333), + (768, 1333), (800, 1333)], + multiscale_mode='value', + override=True, + keep_ratio=True) + ]]), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']) +] +data = dict(train=dict(pipeline=train_pipeline)) diff --git a/configs/queryinst/queryinst_r50_fpn_mstrain_480-800_3x_coco.py b/configs/queryinst/queryinst_r50_fpn_mstrain_480-800_3x_coco.py new file mode 100644 index 0000000..89e2cd1 --- /dev/null +++ b/configs/queryinst/queryinst_r50_fpn_mstrain_480-800_3x_coco.py @@ -0,0 +1,23 @@ +_base_ = './queryinst_r50_fpn_1x_coco.py' + +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +min_values = (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict( + type='Resize', + img_scale=[(1333, value) for value in min_values], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']) +] + +data = dict(train=dict(pipeline=train_pipeline)) +lr_config = dict(policy='step', step=[27, 33]) +runner = dict(type='EpochBasedRunner', max_epochs=36) diff --git a/configs/regnet/README.md b/configs/regnet/README.md new file mode 100644 index 0000000..79ee258 --- /dev/null +++ b/configs/regnet/README.md @@ -0,0 +1,110 @@ +# Designing Network Design Spaces + +## Introduction + +[BACKBONE] + +We implement RegNetX and RegNetY models in detection systems and provide their first results on Mask R-CNN, Faster R-CNN and RetinaNet. + +The pre-trained modles are converted from [model zoo of pycls](https://github.com/facebookresearch/pycls/blob/master/MODEL_ZOO.md). + +```latex +@article{radosavovic2020designing, + title={Designing Network Design Spaces}, + author={Ilija Radosavovic and Raj Prateek Kosaraju and Ross Girshick and Kaiming He and Piotr Dollár}, + year={2020}, + eprint={2003.13678}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +## Usage + +To use a regnet model, there are two steps to do: + +1. Convert the model to ResNet-style supported by MMDetection +2. Modify backbone and neck in config accordingly + +### Convert model + +We already prepare models of FLOPs from 400M to 12G in our model zoo. + +For more general usage, we also provide script `regnet2mmdet.py` in the tools directory to convert the key of models pretrained by [pycls](https://github.com/facebookresearch/pycls/) to +ResNet-style checkpoints used in MMDetection. + +```bash +python -u tools/model_converters/regnet2mmdet.py ${PRETRAIN_PATH} ${STORE_PATH} +``` + +This script convert model from `PRETRAIN_PATH` and store the converted model in `STORE_PATH`. + +### Modify config + +The users can modify the config's `depth` of backbone and corresponding keys in `arch` according to the configs in the [pycls model zoo](https://github.com/facebookresearch/pycls/blob/master/MODEL_ZOO.md). +The parameter `in_channels` in FPN can be found in the Figure 15 & 16 of the paper (`wi` in the legend). +This directory already provides some configs with their performance, using RegNetX from 800MF to 12GF level. +For other pre-trained models or self-implemented regnet models, the users are responsible to check these parameters by themselves. + +**Note**: Although Fig. 15 & 16 also provide `w0`, `wa`, `wm`, `group_w`, and `bot_mul` for `arch`, they are quantized thus inaccurate, using them sometimes produces different backbone that does not match the key in the pre-trained model. + +## Results + +### Mask R-CNN + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :---------: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | +| [R-50-FPN](../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py)| pytorch | 1x | 4.4 | 12.0 | 38.2 | 34.7 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205_050542.log.json) | +|[RegNetX-3.2GF-FPN](./mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py)| pytorch | 1x |5.0 ||40.3|36.6|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco/mask_rcnn_regnetx-3.2GF_fpn_1x_coco_20200520_163141-2a9d1814.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco/mask_rcnn_regnetx-3.2GF_fpn_1x_coco_20200520_163141.log.json) | +|[RegNetX-4.0GF-FPN](./mask_rcnn_regnetx-4GF_fpn_1x_coco.py)| pytorch | 1x |5.5||41.5|37.4|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-4GF_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-4GF_fpn_1x_coco/mask_rcnn_regnetx-4GF_fpn_1x_coco_20200517_180217-32e9c92d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-4GF_fpn_1x_coco/mask_rcnn_regnetx-4GF_fpn_1x_coco_20200517_180217.log.json) | +| [R-101-FPN](../mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py)| pytorch | 1x | 6.4 | 10.3 | 40.0 | 36.1 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_1x_coco/mask_rcnn_r101_fpn_1x_coco_20200204-1efe0ed5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_1x_coco/mask_rcnn_r101_fpn_1x_coco_20200204_144809.log.json) | +|[RegNetX-6.4GF-FPN](./mask_rcnn_regnetx-6.4GF_fpn_1x_coco.py)| pytorch | 1x |6.1 ||41.0|37.1|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-6.4GF_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-6.4GF_fpn_1x_coco/mask_rcnn_regnetx-6.4GF_fpn_1x_coco_20200517_180439-3a7aae83.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-6.4GF_fpn_1x_coco/mask_rcnn_regnetx-6.4GF_fpn_1x_coco_20200517_180439.log.json) | +| [X-101-32x4d-FPN](../mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py) | pytorch | 1x | 7.6 | 9.4 | 41.9 | 37.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco/mask_rcnn_x101_32x4d_fpn_1x_coco_20200205-478d0b67.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco/mask_rcnn_x101_32x4d_fpn_1x_coco_20200205_034906.log.json) | +|[RegNetX-8.0GF-FPN](./mask_rcnn_regnetx-8GF_fpn_1x_coco.py)| pytorch | 1x |6.4 ||41.7|37.5|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-8GF_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-8GF_fpn_1x_coco/mask_rcnn_regnetx-8GF_fpn_1x_coco_20200517_180515-09daa87e.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-8GF_fpn_1x_coco/mask_rcnn_regnetx-8GF_fpn_1x_coco_20200517_180515.log.json) | +|[RegNetX-12GF-FPN](./mask_rcnn_regnetx-12GF_fpn_1x_coco.py)| pytorch | 1x |7.4 ||42.2|38|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-12GF_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-12GF_fpn_1x_coco/mask_rcnn_regnetx-12GF_fpn_1x_coco_20200517_180552-b538bd8b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-12GF_fpn_1x_coco/mask_rcnn_regnetx-12GF_fpn_1x_coco_20200517_180552.log.json) | +|[RegNetX-3.2GF-FPN-DCN-C3-C5](./mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco.py)| pytorch | 1x |5.0 ||40.3|36.6|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco_20200520_172726-75f40794.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco_20200520_172726.log.json) | + +### Faster R-CNN + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :---------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | +| [R-50-FPN](../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py)| pytorch | 1x | 4.0 | 18.2 | 37.4 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130_204655.log.json) | +|[RegNetX-3.2GF-FPN](./faster_rcnn_regnetx-3.2GF_fpn_1x_coco.py)| pytorch | 1x | 4.5||39.9|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_1x_coco/faster_rcnn_regnetx-3.2GF_fpn_1x_coco_20200517_175927-126fd9bf.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_1x_coco/faster_rcnn_regnetx-3.2GF_fpn_1x_coco_20200517_175927.log.json) | +|[RegNetX-3.2GF-FPN](./faster_rcnn_regnetx-3.2GF_fpn_2x_coco.py)| pytorch | 2x | 4.5||41.1|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_2x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_2x_coco/faster_rcnn_regnetx-3.2GF_fpn_2x_coco_20200520_223955-e2081918.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_2x_coco/faster_rcnn_regnetx-3.2GF_fpn_2x_coco_20200520_223955.log.json) | + +### RetinaNet + +| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Config | Download | +| :---------: | :-----: | :-----: | :------: | :------------: | :----: | :------: | :--------: | +| [R-50-FPN](../retinanet/retinanet_r50_fpn_1x_coco.py) | pytorch | 1x | 3.8 | 16.6 | 36.5 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet/retinanet_r50_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_1x_coco/retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_1x_coco/retinanet_r50_fpn_1x_coco_20200130_002941.log.json) | +|[RegNetX-800MF-FPN](./retinanet_regnetx-800MF_fpn_1x_coco.py)| pytorch | 1x |2.5||35.6|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/retinanet_regnetx-800MF_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-800MF_fpn_1x_coco/retinanet_regnetx-800MF_fpn_1x_coco_20200517_191403-f6f91d10.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-800MF_fpn_1x_coco/retinanet_regnetx-800MF_fpn_1x_coco_20200517_191403.log.json) | +|[RegNetX-1.6GF-FPN](./retinanet_regnetx-1.6GF_fpn_1x_coco.py)| pytorch | 1x |3.3||37.3|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/retinanet_regnetx-1.6GF_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-1.6GF_fpn_1x_coco/retinanet_regnetx-1.6GF_fpn_1x_coco_20200517_191403-37009a9d.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-1.6GF_fpn_1x_coco/retinanet_regnetx-1.6GF_fpn_1x_coco_20200517_191403.log.json) | +|[RegNetX-3.2GF-FPN](./retinanet_regnetx-3.2GF_fpn_1x_coco.py)| pytorch | 1x |4.2 ||39.1|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/retinanet_regnetx-3.2GF_fpn_1x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-3.2GF_fpn_1x_coco/retinanet_regnetx-3.2GF_fpn_1x_coco_20200520_163141-cb1509e8.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-3.2GF_fpn_1x_coco/retinanet_regnetx-3.2GF_fpn_1x_coco_20200520_163141.log.json) | + +### Pre-trained models + +We also train some models with longer schedules and multi-scale training. The users could finetune them for downstream tasks. + +| Method | Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | mask AP | Config | Download | +| :-----: | :-----: | :-----: | :-----: | :------: | :------------: | :----: | :-----: | :------: | :--------: | +|Faster RCNN |[RegNetX-400MF-FPN](./faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py)| pytorch | 3x |2.3 ||37.1|-|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco_20210526_095112-e1967c37.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco_20210526_095112.log.json) | +|Faster RCNN |[RegNetX-800MF-FPN](./faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py)| pytorch | 3x |2.8 ||38.8|-|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco_20210526_095118-a2c70b20.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco_20210526_095118.log.json) | +|Faster RCNN |[RegNetX-1.6GF-FPN](./faster_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py)| pytorch | 3x |3.4 ||40.5|-|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/faster_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-1_20210526_095325-94aa46cc.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-1_20210526_095325.log.json) | +|Faster RCNN |[RegNetX-3.2GF-FPN](./faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py)| pytorch | 3x |4.4 ||42.3|-|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-3_20210526_095152-e16a5227.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-3_20210526_095152.log.json) | +|Faster RCNN |[RegNetX-4GF-FPN](./faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py)| pytorch | 3x |4.9 ||42.8|-|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco_20210526_095201-65eaf841.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco_20210526_095201.log.json) | +|Mask RCNN |[RegNetX-3.2GF-FPN](./mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py)| pytorch | 3x |5.0 ||43.1|38.7|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco_20200521_202221-99879813.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco_20200521_202221.log.json) | +|Mask RCNN |[RegNetX-400MF-FPN](./mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco.py)| pytorch | 3x |2.5 ||37.6|34.4|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco_20210601_235443-8aac57a4.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco_20210601_235443.log.json) | +|Mask RCNN |[RegNetX-800MF-FPN](./mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco.py)| pytorch | 3x |2.9 ||39.5|36.1|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco_20210602_210641-715d51f5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco_20210602_210641.log.json) | +|Mask RCNN |[RegNetX-1.6GF-FPN](./mask_rcnn_regnetx-1.6GF_fpn_mstrain-poly_3x_coco.py)| pytorch | 3x |3.6 ||40.9|37.5|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-1.6GF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-1_20210602_210641-6764cff5.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-1.6GF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-1_20210602_210641.log.json) | +|Mask RCNN |[RegNetX-3.2GF-FPN](./mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py) | pytorch | 3x |5.0 ||43.1|38.7|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco_20200521_202221-99879813.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco_20200521_202221.log.json) | +|Mask RCNN |[RegNetX-4GF-FPN](./mask_rcnn_regnetx-4GF_fpn_mstrain-poly_3x_coco.py) | pytorch | 3x |5.1 ||43.4|39.2|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-4GF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-4GF_fpn_mstrain-poly_3x_coco_20210602_032621-00f0331c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-4GF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-4GF_fpn_mstrain-poly_3x_coco_20210602_032621.log.json) | +|Cascade Mask RCNN |[RegNetX-400MF-FPN](./cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py)| pytorch | 3x |4.3||41.6|36.4|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco_20210715_211619-5142f449.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco_20210715_211619.log.json) | +|Cascade Mask RCNN |[RegNetX-800MF-FPN](./cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py)| pytorch | 3x |4.8||42.8|37.6|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco_20210715_211616-dcbd13f4.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco_20210715_211616.log.json) | +|Cascade Mask RCNN |[RegNetX-1.6GF-FPN](./cascade_mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py)| pytorch | 3x |5.4||44.5|39.0|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/cascade_mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-1_20210715_211616-75f29a61.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-1_20210715_211616.log.json) | +|Cascade Mask RCNN |[RegNetX-3.2GF-FPN](./cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py)| pytorch | 3x |6.4||45.8|40.0|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-3_20210715_211616-b9c2c58b.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-3_20210715_211616.log.json) | +|Cascade Mask RCNN |[RegNetX-4GF-FPN](./cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py) | pytorch | 3x |6.9||45.8|40.0|[config](https://github.com/open-mmlab/mmdetection/tree/master/configs/regnet/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco_20210715_212034-cbb1be4c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco_20210715_212034.log.json) | + +### Notice + +1. The models are trained using a different weight decay, i.e., `weight_decay=5e-5` according to the setting in ImageNet training. This brings improvement of at least 0.7 AP absolute but does not improve the model using ResNet-50. +2. RetinaNets using RegNets are trained with learning rate 0.02 with gradient clip. We find that using learning rate 0.02 could improve the results by at least 0.7 AP absolute and gradient clip is necessary to stabilize the training. However, this does not improve the performance of ResNet-50-FPN RetinaNet. diff --git a/configs/regnet/cascade_mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py b/configs/regnet/cascade_mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..358d85a --- /dev/null +++ b/configs/regnet/cascade_mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py @@ -0,0 +1,17 @@ +_base_ = 'cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py' +model = dict( + backbone=dict( + type='RegNet', + arch='regnetx_1.6gf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://regnetx_1.6gf')), + neck=dict( + type='FPN', + in_channels=[72, 168, 408, 912], + out_channels=256, + num_outs=5)) diff --git a/configs/regnet/cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py b/configs/regnet/cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..8464571 --- /dev/null +++ b/configs/regnet/cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py @@ -0,0 +1,63 @@ +_base_ = [ + '../common/mstrain_3x_coco_instance.py', + '../_base_/models/cascade_mask_rcnn_r50_fpn.py' +] +model = dict( + backbone=dict( + _delete_=True, + type='RegNet', + arch='regnetx_3.2gf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://regnetx_3.2gf')), + neck=dict( + type='FPN', + in_channels=[96, 192, 432, 1008], + out_channels=256, + num_outs=5)) +img_norm_cfg = dict( + # The mean and std are used in PyCls when training RegNets + mean=[103.53, 116.28, 123.675], + std=[57.375, 57.12, 58.395], + to_rgb=False) +train_pipeline = [ + # Images are converted to float32 directly after loading in PyCls + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='range', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +data = dict( + train=dict(dataset=dict(pipeline=train_pipeline)), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) + +optimizer = dict(weight_decay=0.00005) diff --git a/configs/regnet/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py b/configs/regnet/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..2a8990a --- /dev/null +++ b/configs/regnet/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py @@ -0,0 +1,17 @@ +_base_ = 'cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py' +model = dict( + backbone=dict( + type='RegNet', + arch='regnetx_400mf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://regnetx_400mf')), + neck=dict( + type='FPN', + in_channels=[32, 64, 160, 384], + out_channels=256, + num_outs=5)) diff --git a/configs/regnet/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py b/configs/regnet/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..3157863 --- /dev/null +++ b/configs/regnet/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py @@ -0,0 +1,17 @@ +_base_ = 'cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py' +model = dict( + backbone=dict( + type='RegNet', + arch='regnetx_4.0gf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://regnetx_4.0gf')), + neck=dict( + type='FPN', + in_channels=[80, 240, 560, 1360], + out_channels=256, + num_outs=5)) diff --git a/configs/regnet/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py b/configs/regnet/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..41376ad --- /dev/null +++ b/configs/regnet/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py @@ -0,0 +1,17 @@ +_base_ = 'cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py' +model = dict( + backbone=dict( + type='RegNet', + arch='regnetx_800mf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://regnetx_800mf')), + neck=dict( + type='FPN', + in_channels=[64, 128, 288, 672], + out_channels=256, + num_outs=5)) diff --git a/configs/regnet/faster_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py b/configs/regnet/faster_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..385b5ca --- /dev/null +++ b/configs/regnet/faster_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py @@ -0,0 +1,17 @@ +_base_ = 'faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py' +model = dict( + backbone=dict( + type='RegNet', + arch='regnetx_1.6gf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://regnetx_1.6gf')), + neck=dict( + type='FPN', + in_channels=[72, 168, 408, 912], + out_channels=256, + num_outs=5)) diff --git a/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_1x_coco.py b/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_1x_coco.py new file mode 100644 index 0000000..88d270e --- /dev/null +++ b/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_1x_coco.py @@ -0,0 +1,57 @@ +_base_ = [ + '../_base_/models/faster_rcnn_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + backbone=dict( + _delete_=True, + type='RegNet', + arch='regnetx_3.2gf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://regnetx_3.2gf')), + neck=dict( + type='FPN', + in_channels=[96, 192, 432, 1008], + out_channels=256, + num_outs=5)) +img_norm_cfg = dict( + # The mean and std are used in PyCls when training RegNets + mean=[103.53, 116.28, 123.675], + std=[57.375, 57.12, 58.395], + to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.00005) diff --git a/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_2x_coco.py b/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_2x_coco.py new file mode 100644 index 0000000..612490b --- /dev/null +++ b/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_2x_coco.py @@ -0,0 +1,3 @@ +_base_ = './faster_rcnn_regnetx-3.2GF_fpn_1x_coco.py' +lr_config = dict(step=[16, 22]) +runner = dict(type='EpochBasedRunner', max_epochs=24) diff --git a/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py b/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..b7e6e1a --- /dev/null +++ b/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py @@ -0,0 +1,61 @@ +_base_ = [ + '../common/mstrain_3x_coco.py', '../_base_/models/faster_rcnn_r50_fpn.py' +] +model = dict( + backbone=dict( + _delete_=True, + type='RegNet', + arch='regnetx_3.2gf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://regnetx_3.2gf')), + neck=dict( + type='FPN', + in_channels=[96, 192, 432, 1008], + out_channels=256, + num_outs=5)) +img_norm_cfg = dict( + # The mean and std are used in PyCls when training RegNets + mean=[103.53, 116.28, 123.675], + std=[57.375, 57.12, 58.395], + to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 800)], + multiscale_mode='range', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +data = dict( + train=dict(dataset=dict(pipeline=train_pipeline)), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) + +optimizer = dict(weight_decay=0.00005) diff --git a/configs/regnet/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py b/configs/regnet/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..0a05f6e --- /dev/null +++ b/configs/regnet/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py @@ -0,0 +1,17 @@ +_base_ = 'faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py' +model = dict( + backbone=dict( + type='RegNet', + arch='regnetx_400mf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://regnetx_400mf')), + neck=dict( + type='FPN', + in_channels=[32, 64, 160, 384], + out_channels=256, + num_outs=5)) diff --git a/configs/regnet/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py b/configs/regnet/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..98b3fc2 --- /dev/null +++ b/configs/regnet/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py @@ -0,0 +1,17 @@ +_base_ = 'faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py' +model = dict( + backbone=dict( + type='RegNet', + arch='regnetx_4.0gf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://regnetx_4.0gf')), + neck=dict( + type='FPN', + in_channels=[80, 240, 560, 1360], + out_channels=256, + num_outs=5)) diff --git a/configs/regnet/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py b/configs/regnet/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..67f448b --- /dev/null +++ b/configs/regnet/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py @@ -0,0 +1,17 @@ +_base_ = 'faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py' +model = dict( + backbone=dict( + type='RegNet', + arch='regnetx_800mf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://regnetx_800mf')), + neck=dict( + type='FPN', + in_channels=[64, 128, 288, 672], + out_channels=256, + num_outs=5)) diff --git a/configs/regnet/mask_rcnn_regnetx-1.6GF_fpn_mstrain-poly_3x_coco.py b/configs/regnet/mask_rcnn_regnetx-1.6GF_fpn_mstrain-poly_3x_coco.py new file mode 100644 index 0000000..7970c3c --- /dev/null +++ b/configs/regnet/mask_rcnn_regnetx-1.6GF_fpn_mstrain-poly_3x_coco.py @@ -0,0 +1,26 @@ +_base_ = [ + '../common/mstrain-poly_3x_coco_instance.py', + '../_base_/models/mask_rcnn_r50_fpn.py' +] + +model = dict( + backbone=dict( + _delete_=True, + type='RegNet', + arch='regnetx_1.6gf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://regnetx_1.6gf')), + neck=dict( + type='FPN', + in_channels=[72, 168, 408, 912], + out_channels=256, + num_outs=5)) + +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.00005) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/regnet/mask_rcnn_regnetx-12GF_fpn_1x_coco.py b/configs/regnet/mask_rcnn_regnetx-12GF_fpn_1x_coco.py new file mode 100644 index 0000000..ce3661c --- /dev/null +++ b/configs/regnet/mask_rcnn_regnetx-12GF_fpn_1x_coco.py @@ -0,0 +1,17 @@ +_base_ = './mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='RegNet', + arch='regnetx_12gf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://regnetx_12gf')), + neck=dict( + type='FPN', + in_channels=[224, 448, 896, 2240], + out_channels=256, + num_outs=5)) diff --git a/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py b/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py new file mode 100644 index 0000000..44bf0d1 --- /dev/null +++ b/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py @@ -0,0 +1,58 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + backbone=dict( + _delete_=True, + type='RegNet', + arch='regnetx_3.2gf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://regnetx_3.2gf')), + neck=dict( + type='FPN', + in_channels=[96, 192, 432, 1008], + out_channels=256, + num_outs=5)) +img_norm_cfg = dict( + # The mean and std are used in PyCls when training RegNets + mean=[103.53, 116.28, 123.675], + std=[57.375, 57.12, 58.395], + to_rgb=False) +train_pipeline = [ + # Images are converted to float32 directly after loading in PyCls + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.00005) diff --git a/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco.py b/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco.py new file mode 100644 index 0000000..5b53428 --- /dev/null +++ b/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco.py @@ -0,0 +1,7 @@ +_base_ = 'mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py' +model = dict( + backbone=dict( + dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True), + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://regnetx_3.2gf'))) diff --git a/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py b/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py new file mode 100644 index 0000000..aca64d3 --- /dev/null +++ b/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py @@ -0,0 +1,66 @@ +_base_ = [ + '../_base_/models/mask_rcnn_r50_fpn.py', + '../_base_/datasets/coco_instance.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + backbone=dict( + _delete_=True, + type='RegNet', + arch='regnetx_3.2gf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://regnetx_3.2gf')), + neck=dict( + type='FPN', + in_channels=[96, 192, 432, 1008], + out_channels=256, + num_outs=5)) +img_norm_cfg = dict( + # The mean and std are used in PyCls when training RegNets + mean=[103.53, 116.28, 123.675], + std=[57.375, 57.12, 58.395], + to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict( + type='Resize', + img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), + (1333, 768), (1333, 800)], + multiscale_mode='value', + keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.00005) +lr_config = dict(step=[28, 34]) +runner = dict(type='EpochBasedRunner', max_epochs=36) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/regnet/mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco.py b/configs/regnet/mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco.py new file mode 100644 index 0000000..c38dfa6 --- /dev/null +++ b/configs/regnet/mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco.py @@ -0,0 +1,26 @@ +_base_ = [ + '../common/mstrain-poly_3x_coco_instance.py', + '../_base_/models/mask_rcnn_r50_fpn.py' +] + +model = dict( + backbone=dict( + _delete_=True, + type='RegNet', + arch='regnetx_400mf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://regnetx_400mf')), + neck=dict( + type='FPN', + in_channels=[32, 64, 160, 384], + out_channels=256, + num_outs=5)) + +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.00005) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/regnet/mask_rcnn_regnetx-4GF_fpn_1x_coco.py b/configs/regnet/mask_rcnn_regnetx-4GF_fpn_1x_coco.py new file mode 100644 index 0000000..874d485 --- /dev/null +++ b/configs/regnet/mask_rcnn_regnetx-4GF_fpn_1x_coco.py @@ -0,0 +1,17 @@ +_base_ = './mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='RegNet', + arch='regnetx_4.0gf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://regnetx_4.0gf')), + neck=dict( + type='FPN', + in_channels=[80, 240, 560, 1360], + out_channels=256, + num_outs=5)) diff --git a/configs/regnet/mask_rcnn_regnetx-4GF_fpn_mstrain-poly_3x_coco.py b/configs/regnet/mask_rcnn_regnetx-4GF_fpn_mstrain-poly_3x_coco.py new file mode 100644 index 0000000..f0b65ea --- /dev/null +++ b/configs/regnet/mask_rcnn_regnetx-4GF_fpn_mstrain-poly_3x_coco.py @@ -0,0 +1,26 @@ +_base_ = [ + '../common/mstrain-poly_3x_coco_instance.py', + '../_base_/models/mask_rcnn_r50_fpn.py' +] + +model = dict( + backbone=dict( + _delete_=True, + type='RegNet', + arch='regnetx_4.0gf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://regnetx_4.0gf')), + neck=dict( + type='FPN', + in_channels=[80, 240, 560, 1360], + out_channels=256, + num_outs=5)) + +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.00005) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/regnet/mask_rcnn_regnetx-6.4GF_fpn_1x_coco.py b/configs/regnet/mask_rcnn_regnetx-6.4GF_fpn_1x_coco.py new file mode 100644 index 0000000..99387d8 --- /dev/null +++ b/configs/regnet/mask_rcnn_regnetx-6.4GF_fpn_1x_coco.py @@ -0,0 +1,17 @@ +_base_ = './mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='RegNet', + arch='regnetx_6.4gf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://regnetx_6.4gf')), + neck=dict( + type='FPN', + in_channels=[168, 392, 784, 1624], + out_channels=256, + num_outs=5)) diff --git a/configs/regnet/mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco.py b/configs/regnet/mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco.py new file mode 100644 index 0000000..335ebab --- /dev/null +++ b/configs/regnet/mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco.py @@ -0,0 +1,26 @@ +_base_ = [ + '../common/mstrain-poly_3x_coco_instance.py', + '../_base_/models/mask_rcnn_r50_fpn.py' +] + +model = dict( + backbone=dict( + _delete_=True, + type='RegNet', + arch='regnetx_800mf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://regnetx_800mf')), + neck=dict( + type='FPN', + in_channels=[64, 128, 288, 672], + out_channels=256, + num_outs=5)) + +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.00005) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/regnet/mask_rcnn_regnetx-8GF_fpn_1x_coco.py b/configs/regnet/mask_rcnn_regnetx-8GF_fpn_1x_coco.py new file mode 100644 index 0000000..1e7832f --- /dev/null +++ b/configs/regnet/mask_rcnn_regnetx-8GF_fpn_1x_coco.py @@ -0,0 +1,17 @@ +_base_ = './mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='RegNet', + arch='regnetx_8.0gf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://regnetx_8.0gf')), + neck=dict( + type='FPN', + in_channels=[80, 240, 720, 1920], + out_channels=256, + num_outs=5)) diff --git a/configs/regnet/metafile.yml b/configs/regnet/metafile.yml new file mode 100644 index 0000000..5390a35 --- /dev/null +++ b/configs/regnet/metafile.yml @@ -0,0 +1,437 @@ +Collections: + - Name: RegNet + Metadata: + Training Data: COCO + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x V100 GPUs + Architecture: + - RegNet + Paper: + URL: https://arxiv.org/abs/2003.13678 + Title: 'Designing Network Design Spaces' + README: configs/regnet/README.md + Code: + URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11 + Version: v2.1.0 + +Models: + - Name: mask_rcnn_regnetx-3.2GF_fpn_1x_coco + In Collection: RegNet + Config: configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py + Metadata: + Training Memory (GB): 5.0 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.3 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 36.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco/mask_rcnn_regnetx-3.2GF_fpn_1x_coco_20200520_163141-2a9d1814.pth + + - Name: mask_rcnn_regnetx-4GF_fpn_1x_coco + In Collection: RegNet + Config: configs/regnet/mask_rcnn_regnetx-4GF_fpn_1x_coco.py + Metadata: + Training Memory (GB): 5.5 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.5 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-4GF_fpn_1x_coco/mask_rcnn_regnetx-4GF_fpn_1x_coco_20200517_180217-32e9c92d.pth + + - Name: mask_rcnn_regnetx-6.4GF_fpn_1x_coco + In Collection: RegNet + Config: configs/regnet/mask_rcnn_regnetx-6.4GF_fpn_1x_coco.py + Metadata: + Training Memory (GB): 6.1 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.0 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-6.4GF_fpn_1x_coco/mask_rcnn_regnetx-6.4GF_fpn_1x_coco_20200517_180439-3a7aae83.pth + + - Name: mask_rcnn_regnetx-8GF_fpn_1x_coco + In Collection: RegNet + Config: configs/regnet/mask_rcnn_regnetx-8GF_fpn_1x_coco.py + Metadata: + Training Memory (GB): 6.4 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.7 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-8GF_fpn_1x_coco/mask_rcnn_regnetx-8GF_fpn_1x_coco_20200517_180515-09daa87e.pth + + - Name: mask_rcnn_regnetx-12GF_fpn_1x_coco + In Collection: RegNet + Config: configs/regnet/mask_rcnn_regnetx-12GF_fpn_1x_coco.py + Metadata: + Training Memory (GB): 7.4 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.2 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38 + Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-12GF_fpn_1x_coco/mask_rcnn_regnetx-12GF_fpn_1x_coco_20200517_180552-b538bd8b.pth + + - Name: mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco + In Collection: RegNet + Config: configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco.py + Metadata: + Training Memory (GB): 5.0 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.3 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 36.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco_20200520_172726-75f40794.pth + + - Name: faster_rcnn_regnetx-3.2GF_fpn_1x_coco + In Collection: RegNet + Config: configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_1x_coco.py + Metadata: + Training Memory (GB): 4.5 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.9 + Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_1x_coco/faster_rcnn_regnetx-3.2GF_fpn_1x_coco_20200517_175927-126fd9bf.pth + + - Name: faster_rcnn_regnetx-3.2GF_fpn_2x_coco + In Collection: RegNet + Config: configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_2x_coco.py + Metadata: + Training Memory (GB): 4.5 + Epochs: 24 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_2x_coco/faster_rcnn_regnetx-3.2GF_fpn_2x_coco_20200520_223955-e2081918.pth + + - Name: retinanet_regnetx-800MF_fpn_1x_coco + In Collection: RegNet + Config: configs/regnet/retinanet_regnetx-800MF_fpn_1x_coco.py + Metadata: + Training Memory (GB): 2.5 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 35.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-800MF_fpn_1x_coco/retinanet_regnetx-800MF_fpn_1x_coco_20200517_191403-f6f91d10.pth + + - Name: retinanet_regnetx-1.6GF_fpn_1x_coco + In Collection: RegNet + Config: configs/regnet/retinanet_regnetx-1.6GF_fpn_1x_coco.py + Metadata: + Training Memory (GB): 3.3 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 37.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-1.6GF_fpn_1x_coco/retinanet_regnetx-1.6GF_fpn_1x_coco_20200517_191403-37009a9d.pth + + - Name: retinanet_regnetx-3.2GF_fpn_1x_coco + In Collection: RegNet + Config: configs/regnet/retinanet_regnetx-3.2GF_fpn_1x_coco.py + Metadata: + Training Memory (GB): 4.2 + Epochs: 12 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-3.2GF_fpn_1x_coco/retinanet_regnetx-3.2GF_fpn_1x_coco_20200520_163141-cb1509e8.pth + + - Name: faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco + In Collection: RegNet + Config: configs/regnet/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 2.3 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 37.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco_20210526_095112-e1967c37.pth + + - Name: faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco + In Collection: RegNet + Config: configs/regnet/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 2.8 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 38.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco_20210526_095118-a2c70b20.pth + + - Name: faster_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco + In Collection: RegNet + Config: configs/regnet/faster_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 3.4 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-1_20210526_095325-94aa46cc.pth + + - Name: faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco + In Collection: RegNet + Config: configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 4.4 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.3 + Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-3_20210526_095152-e16a5227.pth + + - Name: faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco + In Collection: RegNet + Config: configs/regnet/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 4.9 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.8 + Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco_20210526_095201-65eaf841.pth + + - Name: mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco + In Collection: RegNet + Config: configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 5.0 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.1 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco_20200521_202221-99879813.pth + + - Name: mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco + In Collection: RegNet + Config: configs/regnet/mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco.py + Metadata: + Training Memory (GB): 2.5 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 37.6 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 34.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco_20210601_235443-8aac57a4.pth + + - Name: mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco + In Collection: RegNet + Config: configs/regnet/mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco.py + Metadata: + Training Memory (GB): 2.9 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 39.5 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 36.1 + Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco_20210602_210641-715d51f5.pth + + - Name: mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco + In Collection: RegNet + Config: configs/regnet/mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 3.6 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 40.9 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.5 + Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-1.6GF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-1_20210602_210641-6764cff5.pth + + - Name: mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco + In Collection: RegNet + Config: configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 5.0 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.1 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 38.7 + Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-1.6GF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-1_20210602_210641-6e63e19c.pth + + - Name: mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco + In Collection: RegNet + Config: configs/regnet/mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 5.1 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 43.4 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 39.2 + Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-4GF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-4GF_fpn_mstrain-poly_3x_coco_20210602_032621-00f0331c.pth + + - Name: cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco + In Collection: RegNet + Config: configs/regnet/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 4.3 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 41.6 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 36.4 + Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco_20210715_211619-5142f449.pth + + - Name: cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco + In Collection: RegNet + Config: configs/regnet/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 4.8 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 42.8 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 37.6 + Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco_20210715_211616-dcbd13f4.pth + + - Name: cascade_mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco + In Collection: RegNet + Config: configs/regnet/cascade_mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 5.4 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 44.5 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 39.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-1_20210715_211616-75f29a61.pth + + - Name: cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco + In Collection: RegNet + Config: configs/regnet/cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 6.4 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 45.8 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 40.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-3_20210715_211616-b9c2c58b.pth + + - Name: cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco + In Collection: RegNet + Config: configs/regnet/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py + Metadata: + Training Memory (GB): 6.9 + Epochs: 36 + Results: + - Task: Object Detection + Dataset: COCO + Metrics: + box AP: 45.8 + - Task: Instance Segmentation + Dataset: COCO + Metrics: + mask AP: 40.0 + Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco_20210715_212034-cbb1be4c.pth diff --git a/configs/regnet/retinanet_regnetx-1.6GF_fpn_1x_coco.py b/configs/regnet/retinanet_regnetx-1.6GF_fpn_1x_coco.py new file mode 100644 index 0000000..7395c1b --- /dev/null +++ b/configs/regnet/retinanet_regnetx-1.6GF_fpn_1x_coco.py @@ -0,0 +1,17 @@ +_base_ = './retinanet_regnetx-3.2GF_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='RegNet', + arch='regnetx_1.6gf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://regnetx_1.6gf')), + neck=dict( + type='FPN', + in_channels=[72, 168, 408, 912], + out_channels=256, + num_outs=5)) diff --git a/configs/regnet/retinanet_regnetx-3.2GF_fpn_1x_coco.py b/configs/regnet/retinanet_regnetx-3.2GF_fpn_1x_coco.py new file mode 100644 index 0000000..f05307c --- /dev/null +++ b/configs/regnet/retinanet_regnetx-3.2GF_fpn_1x_coco.py @@ -0,0 +1,59 @@ +_base_ = [ + '../_base_/models/retinanet_r50_fpn.py', + '../_base_/datasets/coco_detection.py', + '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' +] +model = dict( + backbone=dict( + _delete_=True, + type='RegNet', + arch='regnetx_3.2gf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://regnetx_3.2gf')), + neck=dict( + type='FPN', + in_channels=[96, 192, 432, 1008], + out_channels=256, + num_outs=5)) +img_norm_cfg = dict( + # The mean and std are used in PyCls when training RegNets + mean=[103.53, 116.28, 123.675], + std=[57.375, 57.12, 58.395], + to_rgb=False) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', with_bbox=True), + dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 800), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.00005) +optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) diff --git a/configs/regnet/retinanet_regnetx-800MF_fpn_1x_coco.py b/configs/regnet/retinanet_regnetx-800MF_fpn_1x_coco.py new file mode 100644 index 0000000..f6f8989 --- /dev/null +++ b/configs/regnet/retinanet_regnetx-800MF_fpn_1x_coco.py @@ -0,0 +1,17 @@ +_base_ = './retinanet_regnetx-3.2GF_fpn_1x_coco.py' +model = dict( + backbone=dict( + type='RegNet', + arch='regnetx_800mf', + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://regnetx_800mf')), + neck=dict( + type='FPN', + in_channels=[64, 128, 288, 672], + out_channels=256, + num_outs=5)) diff --git a/configs/reppoints/README.md b/configs/reppoints/README.md new file mode 100644 index 0000000..fc58915 --- /dev/null +++ b/configs/reppoints/README.md @@ -0,0 +1,54 @@ +# RepPoints: Point Set Representation for Object Detection + +By [Ze Yang](https://yangze.tech/), [Shaohui Liu](http://b1ueber2y.me/), and [Han Hu](https://ancientmooner.github.io/). + +We provide code support and configuration files to reproduce the results in the paper for +["RepPoints: Point Set Representation for Object Detection"](https://arxiv.org/abs/1904.11490) on COCO object detection. + +## Introduction + + + +**RepPoints**, initially described in [arXiv](https://arxiv.org/abs/1904.11490), is a new representation method for visual objects, on which visual understanding tasks are typically centered. Visual object representation, aiming at both geometric description and appearance feature extraction, is conventionally achieved by `bounding box + RoIPool (RoIAlign)`. The bounding box representation is convenient to use; however, it provides only a rectangular localization of objects that lacks geometric precision and may consequently degrade feature quality. Our new representation, RepPoints, models objects by a `point set` instead of a `bounding box`, which learns to adaptively position themselves over an object in a manner that circumscribes the object’s `spatial extent` and enables `semantically aligned feature extraction`. This richer and more flexible representation maintains the convenience of bounding boxes while facilitating various visual understanding applications. This repo demonstrated the effectiveness of RepPoints for COCO object detection. + +Another feature of this repo is the demonstration of an `anchor-free detector`, which can be as effective as state-of-the-art anchor-based detection methods. The anchor-free detector can utilize either `bounding box` or `RepPoints` as the basic object representation. + +
Learning RepPoints in Object Detection.
+(@orig`GoLu_6GX pSN&tV5 WnpD;15yn