diff --git a/README.md b/README.md index 2461f75d..cbb8594c 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,7 @@ pip install ezflow - [x] [FlyingChairs](https://lmb.informatik.uni-freiburg.de/resources/datasets/FlyingChairs.en.html#flyingchairs) - [x] [HD1K](http://hci-benchmark.iwr.uni-heidelberg.de/) - [x] [KITTI](http://www.cvlibs.net/datasets/kitti/eval_scene_flow.php?benchmark=flow) +- [x] [Kubric](https://github.com/google-research/kubric) - [x] [MPI Sintel](http://sintel.is.tue.mpg.de/) - [x] [SceneFlow Monkaa](https://lmb.informatik.uni-freiburg.de/resources/datasets/SceneFlowDatasets.en.html) - [x] [SceneFlow Driving](https://lmb.informatik.uni-freiburg.de/resources/datasets/SceneFlowDatasets.en.html) diff --git a/configs/models/flownet_c.yaml b/configs/models/flownet_c.yaml index 3849a4f6..895fcec8 100644 --- a/configs/models/flownet_c.yaml +++ b/configs/models/flownet_c.yaml @@ -5,11 +5,10 @@ ENCODER: CONFIG: [64, 128, 256, 256, 512, 512, 512, 512, 1024, 1024] NORM: batch SIMILARITY: - NAME: CorrelationLayer - PAD_SIZE: 10 + NAME: IterSpatialCorrelationSampler + PAD_SIZE: 0 MAX_DISPLACEMENT: 10 DECODER: NAME: FlowNetConvDecoder IN_CHANNELS: 1024 - CONFIG: [512, 256, 128, 64] -INTERPOLATE_FLOW: True + CONFIG: [512, 256, 128, 64] \ No newline at end of file diff --git a/configs/models/pwcnet.yaml b/configs/models/pwcnet.yaml index 5f16940b..4a90eb54 100644 --- a/configs/models/pwcnet.yaml +++ b/configs/models/pwcnet.yaml @@ -3,13 +3,11 @@ ENCODER: NAME: PyramidEncoder IN_CHANNELS: 3 CONFIG: [16, 32, 64, 96, 128, 196] -SIMILARITY: - NAME: CorrelationLayer - PAD_SIZE: 4 - MAX_DISPLACEMENT: 4 DECODER: - NAME: ConvDecoder + NAME: PyramidDecoder CONFIG: [128, 128, 96, 64, 32] TO_FLOW: True -INTERPOLATE_FLOW: True -FLOW_SCALE_FACTOR: 20.0 \ No newline at end of file + SIMILARITY: + PAD_SIZE: 0 + MAX_DISPLACEMENT: 4 + FLOW_SCALE_FACTOR: 20.0 \ No newline at end of file diff --git a/configs/models/raft.yaml b/configs/models/raft.yaml index c2602334..232d089c 100644 --- a/configs/models/raft.yaml +++ b/configs/models/raft.yaml @@ -6,7 +6,7 @@ ENCODER: OUT_CHANNELS: 256 NORM: instance P_DROPOUT: 0.0 - LAYER_CONFIG: [32, 64, 96] + LAYER_CONFIG: [64, 96, 128] INTERMEDIATE_FEATURES: False CONTEXT: NAME: BasicEncoder @@ -14,7 +14,7 @@ ENCODER: OUT_CHANNELS: 256 NORM: batch P_DROPOUT: 0.0 - LAYER_CONFIG: [32, 64, 96] + LAYER_CONFIG: [64, 96, 128] INTERMEDIATE_FEATURES: False HIDDEN_DIM: 128 CONTEXT_DIM: 128 diff --git a/configs/models/raft_small.yaml b/configs/models/raft_small.yaml index 0dcb6781..74784331 100644 --- a/configs/models/raft_small.yaml +++ b/configs/models/raft_small.yaml @@ -1,10 +1,9 @@ -_BASE_: "../base.yaml" -NAME: RAFT_SMALL +NAME: RAFT ENCODER: FEATURE: - NAME: BottleneckEncoder + NAME: BasicEncoder IN_CHANNELS: 3 - OUT_CHANNELS: 256 + OUT_CHANNELS: 128 NORM: instance P_DROPOUT: 0.0 LAYER_CONFIG: [32, 64, 96] @@ -12,11 +11,20 @@ ENCODER: CONTEXT: NAME: BasicEncoder IN_CHANNELS: 3 - OUT_CHANNELS: 256 + OUT_CHANNELS: 160 NORM: batch P_DROPOUT: 0.0 LAYER_CONFIG: [32, 64, 96] INTERMEDIATE_FEATURES: False HIDDEN_DIM: 96 CONTEXT_DIM: 64 -CORR_RADIUS: 3 +SIMILARITY: + NAME: MutliScalePairwise4DCorr + NUM_LEVELS: 4 +DECODER: + NAME: RecurrentLookupUpdateBlock + INPUT_DIM: 96 +CORR_RADIUS: 4 +CORR_LEVELS: 3 +MIXED_PRECISION: False +UPDATE_ITERS: 12 \ No newline at end of file diff --git a/configs/trainers/_base_/chairs_baseline.yaml b/configs/trainers/_base_/chairs_baseline.yaml new file mode 100644 index 00000000..1e2e88c5 --- /dev/null +++ b/configs/trainers/_base_/chairs_baseline.yaml @@ -0,0 +1,89 @@ +DATA: + TRAIN_DATASET: + NAME: "flyingchairs" + ROOT_DIR: "./Datasets/FlyingChairs_release/data" + VAL_DATASET: + NAME: "flyingchairs" + ROOT_DIR: "./Datasets/FlyingChairs_release/data" + NUM_WORKERS: 4 + PIN_MEMORY: True + APPEND_VALID_MASK: False + SHUFFLE: True + AUGMENTATION: + # Augmentation Settings borrowed from RAFT + USE: True + PARAMS: + TRAINING: + COLOR_AUG_PARAMS: { + "enabled": True, + "asymmetric_color_aug_prob": 0.2, + "brightness": 0.4, + "contrast": 0.4, + "saturation": 0.4, + "hue": 0.15915494309189535 + } + ERASER_AUG_PARAMS: { + "enabled": True, + "aug_prob": 0.5, + "bounds": [50, 100] + } + NOISE_AUG_PARAMS: { + "enabled": False, + "aug_prob": 0.5, + "noise_std_range": 0.06 + } + FLIP_AUG_PARAMS: { + "enabled": True, + "h_flip_prob": 0.5, + "v_flip_prob": 0.1 + } + SPATIAL_AUG_PARAMS: { + "enabled": True, + "aug_prob": 0.8, + "stretch_prob": 0.8, + "min_scale": -0.1, + "max_scale": 1.0, + "max_stretch": 0.2, + } + ADVANCED_SPATIAL_AUG_PARAMS: { + "enabled": False, + "scale1": 0.0, + "scale2": 0.0, + "stretch": 0.0, + "rotate": 0.0, + "translate": 0.0, + "enable_out_of_boundary_crop": False + } + VALIDATION: + SPATIAL_AUG_PARAMS: {"enabled": False} + COLOR_AUG_PARAMS: {"enabled": False} + ERASER_AUG_PARAMS: {"enabled": False} + FLIP_AUG_PARAMS: {"enabled": False} + ADVANCED_SPATIAL_AUG_PARAMS : {"enabled": False} +OPTIMIZER: + NAME: AdamW + LR: 0.0004 + PARAMS: + weight_decay: 0.0001 + betas: [0.9, 0.999] + eps: 1.e-08 + amsgrad: False +GRAD_CLIP: + USE: True + VALUE: 1.0 +FREEZE_BATCH_NORM: False +TARGET_SCALE_FACTOR: 1.0 +MIXED_PRECISION: False +DEVICE: "0" +DISTRIBUTED: + USE: False + WORLD_SIZE: 2 + BACKEND: nccl + MASTER_ADDR: localhost + MASTER_PORT: "12355" +EPOCHS: null +NUM_STEPS: null +RESUME_TRAINING: + CONSOLIDATED_CKPT: null + EPOCHS: null + START_EPOCH: null \ No newline at end of file diff --git a/configs/trainers/_base_/kubric_baseline.yaml b/configs/trainers/_base_/kubric_baseline.yaml new file mode 100644 index 00000000..0f849cca --- /dev/null +++ b/configs/trainers/_base_/kubric_baseline.yaml @@ -0,0 +1,87 @@ +DATA: + TRAIN_DATASET: + NAME: "kubric" + ROOT_DIR: "./Datasets/KubricFlow" + VAL_DATASET: + NAME: "kubric" + ROOT_DIR: "./Datasets/KubricFlow" + NUM_WORKERS: 4 + PIN_MEMORY: True + APPEND_VALID_MASK: False + SHUFFLE: True + AUGMENTATION: + USE: True + PARAMS: + TRAINING: + COLOR_AUG_PARAMS: { + "enabled": True, + "asymmetric_color_aug_prob": 0.2, + "brightness": 0.4, + "contrast": 0.4, + "saturation": 0.4, + "hue": 0.15915494309189535 + } + ERASER_AUG_PARAMS: { + "enabled": True, + "aug_prob": 0.5, + "bounds": [50, 100] + } + NOISE_AUG_PARAMS: { + "enabled": False, + "aug_prob": 0.5, + "noise_std_range": 0.06 + } + FLIP_AUG_PARAMS: { + "enabled": True, + "h_flip_prob": 0.5, + "v_flip_prob": 0.1 + } + SPATIAL_AUG_PARAMS: { + "enabled": True, + "aug_prob": 0.8, + "stretch_prob": 0.8, + "min_scale": -0.1, + "max_scale": 1.0, + "max_stretch": 0.2, + } + ADVANCED_SPATIAL_AUG_PARAMS: { + "enabled": False, + "scale1": 0.0, + "scale2": 0.0, + "stretch": 0.0, + "rotate": 0.0, + "translate": 0.0, + "enable_out_of_boundary_crop": False + } + VALIDATION: + SPATIAL_AUG_PARAMS: {"enabled": False} + COLOR_AUG_PARAMS: {"enabled": False} + ERASER_AUG_PARAMS: {"enabled": False} + FLIP_AUG_PARAMS: {"enabled": False} + ADVANCED_SPATIAL_AUG_PARAMS : {"enabled": False} +OPTIMIZER: + NAME: AdamW + LR: 0.0004 + PARAMS: + weight_decay: 0.0001 + betas: [0.9, 0.999] + eps: 1.e-08 + amsgrad: False +GRAD_CLIP: + USE: True + VALUE: 1.0 +TARGET_SCALE_FACTOR: 1.0 +MIXED_PRECISION: False +DEVICE: "0" +DISTRIBUTED: + USE: False + WORLD_SIZE: 2 + BACKEND: nccl + MASTER_ADDR: localhost + MASTER_PORT: "12355" +EPOCHS: null +NUM_STEPS: null +RESUME_TRAINING: + CONSOLIDATED_CKPT: null + EPOCHS: null + START_EPOCH: null \ No newline at end of file diff --git a/configs/trainers/_base_/kubric_improved_aug.yaml b/configs/trainers/_base_/kubric_improved_aug.yaml new file mode 100644 index 00000000..ca03ffc5 --- /dev/null +++ b/configs/trainers/_base_/kubric_improved_aug.yaml @@ -0,0 +1,89 @@ +DATA: + TRAIN_DATASET: + NAME: "kubric" + ROOT_DIR: "./Datasets/KubricFlow" + VAL_DATASET: + NAME: "kubric" + ROOT_DIR: "./Datasets/KubricFlow" + NUM_WORKERS: 4 + PIN_MEMORY: True + APPEND_VALID_MASK: False + SHUFFLE: True + AUGMENTATION: + # Spatial Augmentation Settings borrowed from AutoFlow: https://github.com/google-research/opticalflow-autoflow/blob/main/src/dataset_lib/augmentations/aug_params.py + USE: True + PARAMS: + TRAINING: + COLOR_AUG_PARAMS: { + "enabled": True, + "asymmetric_color_aug_prob": 0.2, + "brightness": 0.4, + "contrast": 0.4, + "saturation": 0.4, + "hue": 0.15915494309189535 + } + ERASER_AUG_PARAMS: { + "enabled": True, + "aug_prob": 0.5, + "bounds": [50, 100] + } + NOISE_AUG_PARAMS: { + "enabled": False, + "aug_prob": 0.5, + "noise_std_range": 0.06 + } + FLIP_AUG_PARAMS: { + "enabled": True, + "h_flip_prob": 0.5, + "v_flip_prob": 0.1 + } + SPATIAL_AUG_PARAMS: { + "enabled": False, + "aug_prob": 0.0, + "stretch_prob": 0.0, + "min_scale": 0, + "max_scale": 0, + "max_stretch": 0 + } + ADVANCED_SPATIAL_AUG_PARAMS: { + "enabled": True, + "scale1": 0.3, + "scale2": 0.1, + "rotate": 0.4, + "translate": 0.4, + "stretch": 0.3, + "enable_out_of_boundary_crop": False + } + VALIDATION: + SPATIAL_AUG_PARAMS: {"enabled": False} + COLOR_AUG_PARAMS: {"enabled": False} + ERASER_AUG_PARAMS: {"enabled": False} + FLIP_AUG_PARAMS: {"enabled": False} + ADVANCED_SPATIAL_AUG_PARAMS : {"enabled": False} +OPTIMIZER: + NAME: AdamW + LR: 0.0004 + PARAMS: + weight_decay: 0.0001 + betas: [0.9, 0.999] + eps: 1.e-08 + amsgrad: False +GRAD_CLIP: + USE: True + VALUE: 1.0 +FREEZE_BATCH_NORM: False +TARGET_SCALE_FACTOR: 1.0 +MIXED_PRECISION: False +DEVICE: "0" +DISTRIBUTED: + USE: False + WORLD_SIZE: 2 + BACKEND: nccl + MASTER_ADDR: localhost + MASTER_PORT: "12355" +EPOCHS: null +NUM_STEPS: null +RESUME_TRAINING: + CONSOLIDATED_CKPT: null + EPOCHS: null + START_EPOCH: null \ No newline at end of file diff --git a/configs/trainers/_base_/things_baseline.yaml b/configs/trainers/_base_/things_baseline.yaml new file mode 100644 index 00000000..690726bd --- /dev/null +++ b/configs/trainers/_base_/things_baseline.yaml @@ -0,0 +1,89 @@ +DATA: + TRAIN_DATASET: + NAME: "flyingthings3d" + ROOT_DIR: "./Datasets/SceneFlow/FlyingThings3D" + VAL_DATASET: + NAME: "flyingthings3d" + ROOT_DIR: "./Datasets/SceneFlow/FlyingThings3D" + NUM_WORKERS: 4 + PIN_MEMORY: True + APPEND_VALID_MASK: False + SHUFFLE: True + AUGMENTATION: + # Augmentation Settings borrowed from RAFT + USE: True + PARAMS: + TRAINING: + COLOR_AUG_PARAMS: { + "enabled": True, + "asymmetric_color_aug_prob": 0.2, + "brightness": 0.4, + "contrast": 0.4, + "saturation": 0.4, + "hue": 0.15915494309189535 + } + ERASER_AUG_PARAMS: { + "enabled": True, + "aug_prob": 0.5, + "bounds": [50, 100] + } + NOISE_AUG_PARAMS: { + "enabled": False, + "aug_prob": 0.5, + "noise_std_range": 0.06 + } + FLIP_AUG_PARAMS: { + "enabled": True, + "h_flip_prob": 0.5, + "v_flip_prob": 0.1 + } + SPATIAL_AUG_PARAMS: { + "enabled": True, + "aug_prob": 0.8, + "stretch_prob": 0.8, + "min_scale": -0.4, + "max_scale": 0.8, + "max_stretch": 0.2, + } + ADVANCED_SPATIAL_AUG_PARAMS: { + "enabled": False, + "scale1": 0.0, + "scale2": 0.0, + "stretch": 0.0, + "rotate": 0.0, + "translate": 0.0, + "enable_out_of_boundary_crop": False + } + VALIDATION: + SPATIAL_AUG_PARAMS: {"enabled": False} + COLOR_AUG_PARAMS: {"enabled": False} + ERASER_AUG_PARAMS: {"enabled": False} + FLIP_AUG_PARAMS: {"enabled": False} + ADVANCED_SPATIAL_AUG_PARAMS : {"enabled": False} +OPTIMIZER: + NAME: AdamW + LR: 0.000125 + PARAMS: + weight_decay: 0.0001 + betas: [0.9, 0.999] + eps: 1.e-08 + amsgrad: False +GRAD_CLIP: + USE: True + VALUE: 1.0 +FREEZE_BATCH_NORM: False +TARGET_SCALE_FACTOR: 1.0 +MIXED_PRECISION: False +DEVICE: "0" +DISTRIBUTED: + USE: False + WORLD_SIZE: 2 + BACKEND: nccl + MASTER_ADDR: localhost + MASTER_PORT: "12355" +EPOCHS: null +NUM_STEPS: null +RESUME_TRAINING: + CONSOLIDATED_CKPT: null + EPOCHS: null + START_EPOCH: null \ No newline at end of file diff --git a/configs/trainers/base.yaml b/configs/trainers/base.yaml deleted file mode 100644 index f99e7ca4..00000000 --- a/configs/trainers/base.yaml +++ /dev/null @@ -1,68 +0,0 @@ -DATA: - TRAIN_DATASET: - NAME: "flyingchairs" - ROOT_DIR: "../../../Datasets/FlyingChairs_release/data" - VAL_DATASET: - NAME: "flyingchairs" - ROOT_DIR: "../../../Datasets/FlyingChairs_release/data" - BATCH_SIZE: 16 - NUM_WORKERS: 1 - PIN_MEMORY: True - TRAIN_CROP_SIZE: [256, 256] - VAL_CROP_SIZE: [256, 256] - AUGMENTATION: - USE: True - PARAMS: - CROP_SIZE: [256, 256] - TRAINING: - SPATIAL_AUG_PARAMS: {"min_scale": -0.1, "max_scale": 1.0, "flip": True} - VALIDATION: - SPATIAL_AUG_PARAMS: {"aug_prob": 0.0} - COLOR_AUG_PARAMS: {"aug_prob": 0.0} - ERASER_AUG_PARAMS: {"aug_prob": 0.0} -OPTIMIZER: - NAME: AdamW - LR: 0.0004 - PARAMS: - weight_decay: 0.0001 - eps: 1.e-08 -SCHEDULER: - USE: True - NAME: OneCycleLR - PARAMS: - max_lr: 0.0004 - epochs: 100 - steps_per_epoch: 2300 - pct_start: 0.05 - cycle_momentum: False - anneal_strategy: linear -CRITERION: - CUSTOM: True - NAME: MultiScaleLoss - PARAMS: - weights: [1, 0.5, 0.25, 0.125, 0.0625] -GRAD_CLIP: - USE: True - VALUE: 1.0 -TARGET_SCALE_FACTOR: 1 -APPEND_VALID_MASK: False -MIXED_PRECISION: False -DEVICE: "0" -DISTRIBUTED: - USE: False - WORLD_SIZE: 2 - BACKEND: nccl - MASTER_ADDR: localhost - MASTER_PORT: "12355" -LOG_DIR: "./logs" -LOG_ITERATIONS_INTERVAL: 100 -CKPT_DIR: "./ckpts" -CKPT_INTERVAL: 1 -VALIDATE_INTERVAL: 1 -VALIDATE_ON: metric -EPOCHS: 100 -NUM_STEPS: null -RESUME_TRAINING: - CONSOLIDATED_CKPT: null - EPOCHS: 100 - START_EPOCH: null \ No newline at end of file diff --git a/configs/trainers/dicl_default.yaml b/configs/trainers/dicl_default.yaml deleted file mode 100644 index df1b5e76..00000000 --- a/configs/trainers/dicl_default.yaml +++ /dev/null @@ -1,5 +0,0 @@ -_BASE_: "./base.yaml" -CRITERION: - NAME: "MultiScaleLoss" - PARAMS: null - \ No newline at end of file diff --git a/configs/trainers/flownetc/flownetc_chairs_baseline.yaml b/configs/trainers/flownetc/flownetc_chairs_baseline.yaml new file mode 100644 index 00000000..efa87fa3 --- /dev/null +++ b/configs/trainers/flownetc/flownetc_chairs_baseline.yaml @@ -0,0 +1,41 @@ +_BASE_: "../_base_/chairs_baseline.yaml" +TARGET_SCALE_FACTOR: 20.0 +DATA: + BATCH_SIZE: 2 # Effective Batch Size = 2 x 4 GPUs = 8 + TRAIN_CROP_SIZE: [384, 448] + VAL_CROP_SIZE: [384, 448] + APPEND_VALID_MASK: False + NORM_PARAMS: {"use": True, "mean":[0.0, 0.0, 0.0], "std":[255.0, 255.0, 255.0]} +SCHEDULER: + USE: True + NAME: OneCycleLR + PARAMS: + max_lr: 0.0004 + total_steps: 1200100 + pct_start: 0.05 + cycle_momentum: False + anneal_strategy: linear +CRITERION: + CUSTOM: True + NAME: MultiScaleLoss + PARAMS: + norm: "l2" + weights: [0.32, 0.08, 0.02, 0.01, 0.005] + average: "sum" + resize_flow: "downsample" +DEVICE: "all" +DISTRIBUTED: + USE: True + WORLD_SIZE: 4 + BACKEND: nccl + MASTER_ADDR: localhost + MASTER_PORT: "12355" + SYNC_BATCH_NORM: True +EPOCHS: null +NUM_STEPS: 1200100 +LOG_DIR: "./logs" +CKPT_DIR: "./ckpts" +LOG_ITERATIONS_INTERVAL: 100 +CKPT_INTERVAL: 100000 +VALIDATE_INTERVAL: 10000 +VALIDATE_ON: metric \ No newline at end of file diff --git a/configs/trainers/flownetc/flownetc_kubric_improved_aug.yaml b/configs/trainers/flownetc/flownetc_kubric_improved_aug.yaml new file mode 100644 index 00000000..32817e1f --- /dev/null +++ b/configs/trainers/flownetc/flownetc_kubric_improved_aug.yaml @@ -0,0 +1,41 @@ +_BASE_: "../_base_/kubric_improved_aug.yaml" +TARGET_SCALE_FACTOR: 20.0 +DATA: + BATCH_SIZE: 2 # Effective Batch Size = 2 x 4 GPUs = 8 + TRAIN_CROP_SIZE: [384, 448] + VAL_CROP_SIZE: [384, 448] + APPEND_VALID_MASK: False + NORM_PARAMS: {"use": True, "mean":[0.0, 0.0, 0.0], "std":[255.0, 255.0, 255.0]} +SCHEDULER: + USE: True + NAME: OneCycleLR + PARAMS: + max_lr: 0.0004 + total_steps: 1200100 + pct_start: 0.05 + cycle_momentum: False + anneal_strategy: linear +CRITERION: + CUSTOM: True + NAME: MultiScaleLoss + PARAMS: + norm: "l2" + weights: [0.32, 0.08, 0.02, 0.01, 0.005] + average: "sum" + resize_flow: "downsample" +DEVICE: "all" +DISTRIBUTED: + USE: True + WORLD_SIZE: 4 + BACKEND: nccl + MASTER_ADDR: localhost + MASTER_PORT: "12355" + SYNC_BATCH_NORM: True +EPOCHS: null +NUM_STEPS: 1200100 +LOG_DIR: "./logs" +CKPT_DIR: "./ckpts" +LOG_ITERATIONS_INTERVAL: 100 +CKPT_INTERVAL: 100000 +VALIDATE_INTERVAL: 10000 +VALIDATE_ON: metric \ No newline at end of file diff --git a/configs/trainers/flownetc/flownetc_things_baseline.yaml b/configs/trainers/flownetc/flownetc_things_baseline.yaml new file mode 100644 index 00000000..f4cbe016 --- /dev/null +++ b/configs/trainers/flownetc/flownetc_things_baseline.yaml @@ -0,0 +1,43 @@ +_BASE_: "../_base_/things_baseline.yaml" +TARGET_SCALE_FACTOR: 20.0 +DATA: + BATCH_SIZE: 2 # Effective Batch Size = 2 x 2 GPUs = 4 + TRAIN_CROP_SIZE: [384, 768] + VAL_CROP_SIZE: [384, 768] + APPEND_VALID_MASK: False + NORM_PARAMS: {"use": True, "mean":[0.0, 0.0, 0.0], "std":[255.0, 255.0, 255.0]} +SCHEDULER: + USE: True + NAME: OneCycleLR + PARAMS: + max_lr: 0.000125 + total_steps: 1200100 + pct_start: 0.05 + cycle_momentum: False + anneal_strategy: linear +CRITERION: + CUSTOM: True + NAME: MultiScaleLoss + PARAMS: + norm: "l2" + weights: [0.32, 0.08, 0.02, 0.01, 0.005] + average: "sum" + resize_flow: "downsample" + use_valid_range: True + valid_range: [[1000,1000],[1000,1000],[1000,1000],[1000,1000],[1000,1000]] +DEVICE: "all" +DISTRIBUTED: + USE: True + WORLD_SIZE: 2 + BACKEND: nccl + MASTER_ADDR: localhost + MASTER_PORT: "12355" + SYNC_BATCH_NORM: True +EPOCHS: null +NUM_STEPS: 1200100 +LOG_DIR: "./logs" +CKPT_DIR: "./ckpts" +LOG_ITERATIONS_INTERVAL: 100 +CKPT_INTERVAL: 100000 +VALIDATE_INTERVAL: 10000 +VALIDATE_ON: metric \ No newline at end of file diff --git a/configs/trainers/pwcnet/pwcnet_chairs_baseline.yaml b/configs/trainers/pwcnet/pwcnet_chairs_baseline.yaml new file mode 100644 index 00000000..1333a6d2 --- /dev/null +++ b/configs/trainers/pwcnet/pwcnet_chairs_baseline.yaml @@ -0,0 +1,33 @@ +_BASE_: "../_base_/chairs_baseline.yaml" +TARGET_SCALE_FACTOR: 20.0 +DATA: + BATCH_SIZE: 8 + TRAIN_CROP_SIZE: [384, 448] + VAL_CROP_SIZE: [384, 448] + APPEND_VALID_MASK: False + NORM_PARAMS: {"use": True, "mean":[0.0, 0.0, 0.0], "std":[255.0, 255.0, 255.0]} +SCHEDULER: + USE: True + NAME: OneCycleLR + PARAMS: + max_lr: 0.0004 + total_steps: 1200100 + pct_start: 0.05 + cycle_momentum: False + anneal_strategy: linear +CRITERION: + CUSTOM: True + NAME: MultiScaleLoss + PARAMS: + norm: "l2" + weights: [0.32, 0.08, 0.02, 0.01, 0.005] + average: "sum" + resize_flow: "downsample" +EPOCHS: null +NUM_STEPS: 1200100 +LOG_DIR: "./logs" +CKPT_DIR: "./ckpts" +LOG_ITERATIONS_INTERVAL: 100 +CKPT_INTERVAL: 100000 +VALIDATE_INTERVAL: 10000 +VALIDATE_ON: metric \ No newline at end of file diff --git a/configs/trainers/pwcnet/pwcnet_kubric_improved_aug.yaml b/configs/trainers/pwcnet/pwcnet_kubric_improved_aug.yaml new file mode 100644 index 00000000..f713504c --- /dev/null +++ b/configs/trainers/pwcnet/pwcnet_kubric_improved_aug.yaml @@ -0,0 +1,33 @@ +_BASE_: "../_base_/kubric_improved_aug.yaml" +TARGET_SCALE_FACTOR: 20.0 +DATA: + BATCH_SIZE: 8 + TRAIN_CROP_SIZE: [384, 448] + VAL_CROP_SIZE: [384, 448] + APPEND_VALID_MASK: False + NORM_PARAMS: {"use": True, "mean":[0.0, 0.0, 0.0], "std":[255.0, 255.0, 255.0]} +SCHEDULER: + USE: True + NAME: OneCycleLR + PARAMS: + max_lr: 0.0004 + total_steps: 1200100 + pct_start: 0.05 + cycle_momentum: False + anneal_strategy: linear +CRITERION: + CUSTOM: True + NAME: MultiScaleLoss + PARAMS: + norm: "l2" + weights: [0.32, 0.08, 0.02, 0.01, 0.005] + average: "sum" + resize_flow: "downsample" +EPOCHS: null +NUM_STEPS: 1200100 +LOG_DIR: "./logs" +CKPT_DIR: "./ckpts" +LOG_ITERATIONS_INTERVAL: 100 +CKPT_INTERVAL: 100000 +VALIDATE_INTERVAL: 10000 +VALIDATE_ON: metric \ No newline at end of file diff --git a/configs/trainers/pwcnet/pwcnet_things_baseline.yaml b/configs/trainers/pwcnet/pwcnet_things_baseline.yaml new file mode 100644 index 00000000..6a480b9e --- /dev/null +++ b/configs/trainers/pwcnet/pwcnet_things_baseline.yaml @@ -0,0 +1,35 @@ +_BASE_: "../_base_/things_baseline.yaml" +TARGET_SCALE_FACTOR: 20.0 +DATA: + BATCH_SIZE: 4 + TRAIN_CROP_SIZE: [384, 768] + VAL_CROP_SIZE: [384, 768] + APPEND_VALID_MASK: False + NORM_PARAMS: {"use": True, "mean":[0.0, 0.0, 0.0], "std":[255.0, 255.0, 255.0]} +SCHEDULER: + USE: True + NAME: OneCycleLR + PARAMS: + max_lr: 0.000125 + total_steps: 1200100 + pct_start: 0.05 + cycle_momentum: False + anneal_strategy: linear +CRITERION: + CUSTOM: True + NAME: MultiScaleLoss + PARAMS: + norm: "l2" + weights: [0.32, 0.08, 0.02, 0.01, 0.005] + average: "sum" + resize_flow: "downsample" + use_valid_range: True + valid_range: [[1000,1000],[1000,1000],[1000,1000],[1000,1000],[1000,1000]] +EPOCHS: null +NUM_STEPS: 1200100 +LOG_DIR: "./logs" +CKPT_DIR: "./ckpts" +LOG_ITERATIONS_INTERVAL: 100 +CKPT_INTERVAL: 100000 +VALIDATE_INTERVAL: 10000 +VALIDATE_ON: metric \ No newline at end of file diff --git a/configs/trainers/raft/raft_chairs_baseline.yaml b/configs/trainers/raft/raft_chairs_baseline.yaml new file mode 100644 index 00000000..7f1a192a --- /dev/null +++ b/configs/trainers/raft/raft_chairs_baseline.yaml @@ -0,0 +1,30 @@ +_BASE_: "../_base_/chairs_baseline.yaml" +DATA: + BATCH_SIZE: 10 + TRAIN_CROP_SIZE: [368, 496] + VAL_CROP_SIZE: [368, 496] + APPEND_VALID_MASK: True + NORM_PARAMS: {"use": True, "mean":[127.5, 127.5, 127.5], "std":[127.5, 127.5, 127.5]} +SCHEDULER: + USE: True + NAME: OneCycleLR + PARAMS: + max_lr: 0.0004 + total_steps: 100100 + pct_start: 0.05 + cycle_momentum: False + anneal_strategy: linear +CRITERION: + CUSTOM: True + NAME: SequenceLoss + PARAMS: + gamma: 0.8 + max_flow: 400.0 +EPOCHS: null +NUM_STEPS: 100100 +LOG_DIR: "./logs" +CKPT_DIR: "./ckpts" +LOG_ITERATIONS_INTERVAL: 100 +CKPT_INTERVAL: 20000 +VALIDATE_INTERVAL: 1000 +VALIDATE_ON: metric \ No newline at end of file diff --git a/configs/trainers/raft/raft_kubric_improved_aug.yaml b/configs/trainers/raft/raft_kubric_improved_aug.yaml new file mode 100644 index 00000000..7fed4e24 --- /dev/null +++ b/configs/trainers/raft/raft_kubric_improved_aug.yaml @@ -0,0 +1,30 @@ +_BASE_: "../_base_/kubric_improved_aug.yaml" +DATA: + BATCH_SIZE: 10 + TRAIN_CROP_SIZE: [368, 496] + VAL_CROP_SIZE: [368, 496] + APPEND_VALID_MASK: True + NORM_PARAMS: {"use": True, "mean":[127.5, 127.5, 127.5], "std":[127.5, 127.5, 127.5]} +SCHEDULER: + USE: True + NAME: OneCycleLR + PARAMS: + max_lr: 0.0004 + total_steps: 100100 + pct_start: 0.05 + cycle_momentum: False + anneal_strategy: linear +CRITERION: + CUSTOM: True + NAME: SequenceLoss + PARAMS: + gamma: 0.8 + max_flow: 400.0 +EPOCHS: null +NUM_STEPS: 100100 +LOG_DIR: "./logs" +CKPT_DIR: "./ckpts" +LOG_ITERATIONS_INTERVAL: 100 +CKPT_INTERVAL: 20000 +VALIDATE_INTERVAL: 1000 +VALIDATE_ON: metric \ No newline at end of file diff --git a/configs/trainers/raft/raft_things_baseline.yaml b/configs/trainers/raft/raft_things_baseline.yaml new file mode 100644 index 00000000..b0e77f9a --- /dev/null +++ b/configs/trainers/raft/raft_things_baseline.yaml @@ -0,0 +1,31 @@ +_BASE_: "../_base_/things_baseline.yaml" +FREEZE_BATCH_NORM: True +DATA: + BATCH_SIZE: 6 + TRAIN_CROP_SIZE: [400, 720] + VAL_CROP_SIZE: [400, 720] + APPEND_VALID_MASK: True + NORM_PARAMS: {"use": True, "mean":[127.5, 127.5, 127.5], "std":[127.5, 127.5, 127.5]} +SCHEDULER: + USE: True + NAME: OneCycleLR + PARAMS: + max_lr: 0.000125 + total_steps: 100100 + pct_start: 0.05 + cycle_momentum: False + anneal_strategy: linear +CRITERION: + CUSTOM: True + NAME: SequenceLoss + PARAMS: + gamma: 0.8 + max_flow: 400.0 +EPOCHS: null +NUM_STEPS: 100100 +LOG_DIR: "./logs" +CKPT_DIR: "./ckpts" +LOG_ITERATIONS_INTERVAL: 100 +CKPT_INTERVAL: 20000 +VALIDATE_INTERVAL: 1000 +VALIDATE_ON: metric \ No newline at end of file diff --git a/configs/trainers/raft_default.yaml b/configs/trainers/raft_default.yaml deleted file mode 100644 index e41951f4..00000000 --- a/configs/trainers/raft_default.yaml +++ /dev/null @@ -1,10 +0,0 @@ -_BASE_: "./base.yaml" -CRITERION: - NAME: SequenceLoss - PARAMS: null -OPTIMIZER: - NAME: AdamW - LR: 0.0004 - PARAMS: - weight_decay: 0.0001 - eps: 1.e-08 \ No newline at end of file diff --git a/ezflow/data/dataloader/dataloader_creator.py b/ezflow/data/dataloader/dataloader_creator.py index 14162567..3234b317 100644 --- a/ezflow/data/dataloader/dataloader_creator.py +++ b/ezflow/data/dataloader/dataloader_creator.py @@ -359,6 +359,33 @@ def add_AutoFlow(self, root_dir, augment=False, **kwargs): ) ) + def add_Kubric(self, root_dir, split="training", augment=False, **kwargs): + """ + Adds the Kubric dataset to the DataloaderCreator object. + + Parameters + ---------- + root_dir : str + path of the root directory for the Monkaa dataset in SceneFlow + augment : bool, default : True + If True, applies data augmentation + **kwargs + Arbitrary keyword arguments for augmentation + specifying crop_size and the probability of + color, eraser and spatial transformation + """ + self.dataset_list.append( + Kubric( + root_dir, + split=split, + init_seed=self.init_seed, + is_prediction=self.is_prediction, + append_valid_mask=self.append_valid_mask, + augment=augment, + **kwargs, + ) + ) + def get_dataloader(self, rank=0): """ Gets the Dataloader for the added datasets. diff --git a/ezflow/data/dataset/__init__.py b/ezflow/data/dataset/__init__.py index 98b0311d..95c809d8 100644 --- a/ezflow/data/dataset/__init__.py +++ b/ezflow/data/dataset/__init__.py @@ -5,5 +5,6 @@ from .flying_things3d import FlyingThings3D, FlyingThings3DSubset from .hd1k import HD1K from .kitti import Kitti +from .kubric import Kubric from .monkaa import Monkaa from .mpi_sintel import MPISintel diff --git a/ezflow/data/dataset/autoflow.py b/ezflow/data/dataset/autoflow.py index 306d4b85..003c912f 100644 --- a/ezflow/data/dataset/autoflow.py +++ b/ezflow/data/dataset/autoflow.py @@ -29,6 +29,8 @@ class AutoFlow(BaseDataset): If True, applies data augmentation aug_params : :obj:`dict`, optional The parameters for data augmentation + norm_params : :obj:`dict`, optional + The parameters for normalization """ def __init__( @@ -42,11 +44,12 @@ def __init__( crop_type="center", augment=True, aug_params={ - "color_aug_params": {"aug_prob": 0.2}, - "eraser_aug_params": {"aug_prob": 0.5}, - "spatial_aug_params": {"aug_prob": 0.8}, - "translate_params": {"aug_prob": 0.8}, - "rotate_params": {"aug_prob": 0.8}, + "eraser_aug_params": {"enabled": False}, + "noise_aug_params": {"enabled": False}, + "flip_aug_params": {"enabled": False}, + "color_aug_params": {"enabled": False}, + "spatial_aug_params": {"enabled": False}, + "advanced_spatial_aug_params": {"enabled": False}, }, norm_params={"use": False}, ): diff --git a/ezflow/data/dataset/base_dataset.py b/ezflow/data/dataset/base_dataset.py index 2b19997a..4782634b 100644 --- a/ezflow/data/dataset/base_dataset.py +++ b/ezflow/data/dataset/base_dataset.py @@ -31,7 +31,8 @@ class BaseDataset(data.Dataset): If True, applies data augmentation aug_params : :obj:`dict` The parameters for data augmentation - + norm_params : :obj:`dict`, optional + The parameters for normalization """ def __init__( @@ -44,11 +45,12 @@ def __init__( crop_type="center", augment=True, aug_params={ - "color_aug_params": {"aug_prob": 0.2}, - "eraser_aug_params": {"aug_prob": 0.5}, - "spatial_aug_params": {"aug_prob": 0.8}, - "translate_params": {"aug_prob": 0.8}, - "rotate_params": {"aug_prob": 0.8}, + "eraser_aug_params": {"enabled": False}, + "noise_aug_params": {"enabled": False}, + "flip_aug_params": {"enabled": False}, + "color_aug_params": {"enabled": False}, + "spatial_aug_params": {"enabled": False}, + "advanced_spatial_aug_params": {"enabled": False}, }, sparse_transform=False, norm_params={"use": False}, @@ -100,9 +102,7 @@ def __getitem__(self, index): img1 = read_image(self.image_list[index][0]) img2 = read_image(self.image_list[index][1]) - flow, valid = read_flow(self.flow_list[index]) - flow = np.array(flow).astype(np.float32) img1 = np.array(img1).astype(np.uint8) img2 = np.array(img2).astype(np.uint8) @@ -122,6 +122,9 @@ def __getitem__(self, index): return img1, img2 + flow, valid = read_flow(self.flow_list[index]) + flow = np.array(flow).astype(np.float32) + if self.augment is True and self.augmentor is not None: img1, img2, flow, valid = self.augmentor(img1, img2, flow, valid) diff --git a/ezflow/data/dataset/driving.py b/ezflow/data/dataset/driving.py index 4899e345..3f47ae0f 100644 --- a/ezflow/data/dataset/driving.py +++ b/ezflow/data/dataset/driving.py @@ -29,6 +29,8 @@ class Driving(BaseDataset): If True, applies data augmentation aug_params : :obj:`dict`, optional The parameters for data augmentation + norm_params : :obj:`dict`, optional + The parameters for normalization """ def __init__( diff --git a/ezflow/data/dataset/flying_chairs.py b/ezflow/data/dataset/flying_chairs.py index ae350a0b..955442ad 100644 --- a/ezflow/data/dataset/flying_chairs.py +++ b/ezflow/data/dataset/flying_chairs.py @@ -33,7 +33,8 @@ class FlyingChairs(BaseDataset): If True, applies data augmentation aug_params : :obj:`dict`, optional The parameters for data augmentation - + norm_params : :obj:`dict`, optional + The parameters for normalization """ def __init__( @@ -48,11 +49,12 @@ def __init__( crop_type="center", augment=True, aug_params={ - "color_aug_params": {"aug_prob": 0.2}, - "eraser_aug_params": {"aug_prob": 0.5}, - "spatial_aug_params": {"aug_prob": 0.8}, - "translate_params": {"aug_prob": 0.8}, - "rotate_params": {"aug_prob": 0.8}, + "eraser_aug_params": {"enabled": False}, + "noise_aug_params": {"enabled": False}, + "flip_aug_params": {"enabled": False}, + "color_aug_params": {"enabled": False}, + "spatial_aug_params": {"enabled": False}, + "advanced_spatial_aug_params": {"enabled": False}, }, norm_params={"use": False}, ): diff --git a/ezflow/data/dataset/flying_things3d.py b/ezflow/data/dataset/flying_things3d.py index d77dd0b1..06eaafdd 100644 --- a/ezflow/data/dataset/flying_things3d.py +++ b/ezflow/data/dataset/flying_things3d.py @@ -33,6 +33,8 @@ class FlyingThings3D(BaseDataset): If True, applies data augmentation aug_params : :obj:`dict`, optional The parameters for data augmentation + norm_params : :obj:`dict`, optional + The parameters for normalization """ def __init__( @@ -48,11 +50,12 @@ def __init__( crop_type="center", augment=True, aug_params={ - "color_aug_params": {"aug_prob": 0.2}, - "eraser_aug_params": {"aug_prob": 0.5}, - "spatial_aug_params": {"aug_prob": 0.8}, - "translate_params": {"aug_prob": 0.8}, - "rotate_params": {"aug_prob": 0.8}, + "eraser_aug_params": {"enabled": False}, + "noise_aug_params": {"enabled": False}, + "flip_aug_params": {"enabled": False}, + "color_aug_params": {"enabled": False}, + "spatial_aug_params": {"enabled": False}, + "advanced_spatial_aug_params": {"enabled": False}, }, norm_params={"use": False}, ): diff --git a/ezflow/data/dataset/hd1k.py b/ezflow/data/dataset/hd1k.py index 4c48462e..5638bb1c 100644 --- a/ezflow/data/dataset/hd1k.py +++ b/ezflow/data/dataset/hd1k.py @@ -29,6 +29,8 @@ class HD1K(BaseDataset): If True, applies data augmentation aug_params : :obj:`dict`, optional The parameters for data augmentation + norm_params : :obj:`dict`, optional + The parameters for normalization """ def __init__( @@ -42,11 +44,12 @@ def __init__( crop_type="center", augment=True, aug_params={ - "color_aug_params": {"aug_prob": 0.2}, - "eraser_aug_params": {"aug_prob": 0.5}, - "spatial_aug_params": {"aug_prob": 0.8}, - "translate_params": {"aug_prob": 0.8}, - "rotate_params": {"aug_prob": 0.8}, + "eraser_aug_params": {"enabled": False}, + "noise_aug_params": {"enabled": False}, + "flip_aug_params": {"enabled": False}, + "color_aug_params": {"enabled": False}, + "spatial_aug_params": {"enabled": False}, + "advanced_spatial_aug_params": {"enabled": False}, }, norm_params={"use": False}, ): diff --git a/ezflow/data/dataset/kitti.py b/ezflow/data/dataset/kitti.py index 060f95e3..42afdbba 100644 --- a/ezflow/data/dataset/kitti.py +++ b/ezflow/data/dataset/kitti.py @@ -31,7 +31,8 @@ class Kitti(BaseDataset): If True, applies data augmentation aug_params : :obj:`dict`, optional The parameters for data augmentation - + norm_params : :obj:`dict`, optional + The parameters for normalization """ def __init__( @@ -46,11 +47,12 @@ def __init__( crop_type="center", augment=True, aug_params={ - "color_aug_params": {"aug_prob": 0.2}, - "eraser_aug_params": {"aug_prob": 0.5}, - "spatial_aug_params": {"aug_prob": 0.8}, - "translate_params": {"aug_prob": 0.8}, - "rotate_params": {"aug_prob": 0.8}, + "eraser_aug_params": {"enabled": False}, + "noise_aug_params": {"enabled": False}, + "flip_aug_params": {"enabled": False}, + "color_aug_params": {"enabled": False}, + "spatial_aug_params": {"enabled": False}, + "advanced_spatial_aug_params": {"enabled": False}, }, norm_params={"use": False}, ): diff --git a/ezflow/data/dataset/kubric.py b/ezflow/data/dataset/kubric.py new file mode 100644 index 00000000..f77ecfde --- /dev/null +++ b/ezflow/data/dataset/kubric.py @@ -0,0 +1,224 @@ +import os +import os.path as osp +import random +from glob import glob + +import numpy as np +import torch +import torch.utils.data as data + +from ...functional import FlowAugmentor, Normalize, crop +from ...utils import read_flow, read_image +from .base_dataset import BaseDataset + + +class Kubric(BaseDataset): + """ + Dataset Class for preparing the Kubric 'movi-f' split of + optical flow synthetic dataset for training and validation. + https://arxiv.org/abs/2203.03570 + https://github.com/google-research/kubric/tree/main/challenges/optical_flow + + + Note that in order to use this dataset class the Kubric Dataset + must be in the Sintel directory structure. Please follow the script + provided in the repository mentioned below to convert .tfrecords to + images and flow fields arranged in the Sintel Directory structure. + https://github.com/prajnan93/kubric-flow + + The tfrecords conversion is not provided with the ezflow package + as it requires tensorflow installation. + + + Parameters + ---------- + root_dir : str + path of the root directory for the MPI Sintel datasets + split : str, default : "training" + specify the training or validation split + swap_column_to_row : bool, default : True + If True, swaps column major to row major of the flow map. + The optical flow fields were rendered in column major in the earlier versions. + Set this parameter to False if newer versions are available in row major. + More info in GitHub issue:https://github.com/google-research/kubric/issues/152 + use_backward_flow : bool, default : False + returns backward optical flow field + is_prediction : bool, default : False + If True, only image data are loaded for prediction otherwise both images and flow data are loaded + init_seed : bool, default : False + If True, sets random seed to worker + append_valid_mask : bool, default : False + If True, appends the valid flow mask to the original flow mask at dim=0 + crop: bool, default : True + Whether to perform cropping + crop_size : :obj:`tuple` of :obj:`int` + The size of the image crop + crop_type : :obj:`str`, default : 'center' + The type of croppping to be performed, one of "center", "random" + augment : bool, default : True + If True, applies data augmentation + aug_params : :obj:`dict`, optional + The parameters for data augmentation + norm_params : :obj:`dict`, optional + The parameters for normalization + """ + + def __init__( + self, + root_dir, + split="training", + swap_column_to_row=True, + use_backward_flow=False, + is_prediction=False, + init_seed=False, + append_valid_mask=False, + crop=False, + crop_size=(256, 256), + crop_type="center", + augment=True, + aug_params={ + "eraser_aug_params": {"enabled": False}, + "noise_aug_params": {"enabled": False}, + "flip_aug_params": {"enabled": False}, + "color_aug_params": {"enabled": False}, + "spatial_aug_params": {"enabled": False}, + "advanced_spatial_aug_params": {"enabled": False}, + }, + norm_params={"use": False}, + ): + super(Kubric, self).__init__( + init_seed=init_seed, + is_prediction=is_prediction, + append_valid_mask=append_valid_mask, + crop=crop, + crop_size=crop_size, + crop_type=crop_type, + augment=augment, + aug_params=aug_params, + sparse_transform=False, + norm_params=norm_params, + ) + + assert ( + split.lower() == "training" or split.lower() == "validation" + ), "Incorrect split values. Accepted split values: training, validation" + + self.is_prediction = is_prediction + self.append_valid_mask = append_valid_mask + self.swap = swap_column_to_row + + if augment: + self.augmentor = FlowAugmentor(crop_size=crop_size, **aug_params) + + split = split.lower() + + image_root = osp.join(root_dir, split, "images") + + if use_backward_flow: + flow_root = osp.join(root_dir, split, "backward_flow") + else: + flow_root = osp.join(root_dir, split, "forward_flow") + + for scene in os.listdir(image_root): + image_list = sorted(glob(osp.join(image_root, scene, "*.png"))) + for i in range(len(image_list) - 1): + self.image_list += [[image_list[i], image_list[i + 1]]] + + if not self.is_prediction: + self.flow_list += sorted(glob(osp.join(flow_root, scene, "*.flo"))) + + def __getitem__(self, index): + """ + Returns the corresponding images and the flow between them. + + Parameters + ---------- + index : int + specify the index location for access to Dataset item + + Returns + ------- + tuple + A tuple consisting of ((img1, img2), flow) + + img1 and img2 of shape 3 x H x W. + flow of shape 2 x H x W if append_valid_mask is False. + flow of shape 3 x H x W if append_valid_mask is True. + """ + + if not self.init_seed: + worker_info = torch.utils.data.get_worker_info() + if worker_info is not None: + torch.manual_seed(worker_info.id) + np.random.seed(worker_info.id) + random.seed(worker_info.id) + self.init_seed = True + + index = index % len(self.image_list) + + img1 = read_image(self.image_list[index][0]) + img2 = read_image(self.image_list[index][1]) + flow, valid = read_flow(self.flow_list[index]) + + flow = np.array(flow).astype(np.float32) + img1 = np.array(img1).astype(np.uint8) + img2 = np.array(img2).astype(np.uint8) + + if self.swap: + flow_temp = np.zeros_like(flow) + + # Swap column major to row_major + + flow_temp[..., 0] = flow[..., 1] + flow_temp[..., 1] = flow[..., 0] + + del flow + flow = flow_temp + + if len(img1.shape) == 2: # grayscale images + img1 = np.tile(img1[..., None], (1, 1, 3)) + img2 = np.tile(img2[..., None], (1, 1, 3)) + else: + img1 = img1[..., :3] + img2 = img2[..., :3] + + if self.is_prediction: + + img1 = torch.from_numpy(img1).permute(2, 0, 1).float() + img2 = torch.from_numpy(img2).permute(2, 0, 1).float() + + img1, img2 = self.normalize(img1, img2) + + return img1, img2 + + if self.augment is True and self.augmentor is not None: + img1, img2, flow, valid = self.augmentor(img1, img2, flow, valid) + + if self.crop is True: + img1, img2, flow, valid = crop( + img1, + img2, + flow, + valid=valid, + crop_size=self.crop_size, + crop_type=self.crop_type, + sparse_transform=self.sparse_transform, + ) + + img1 = torch.from_numpy(img1).permute(2, 0, 1).float() + img2 = torch.from_numpy(img2).permute(2, 0, 1).float() + flow = torch.from_numpy(flow).permute(2, 0, 1).float() + + img1, img2 = self.normalize(img1, img2) + + if self.append_valid_mask: + if valid is not None: + valid = torch.from_numpy(valid) + else: + valid = (flow[0].abs() < 1000) & (flow[1].abs() < 1000) + + valid = valid.float() + valid = torch.unsqueeze(valid, dim=0) + flow = torch.cat([flow, valid], dim=0) + + return (img1, img2), flow diff --git a/ezflow/data/dataset/monkaa.py b/ezflow/data/dataset/monkaa.py index 5b93e2c9..c3cda9c5 100644 --- a/ezflow/data/dataset/monkaa.py +++ b/ezflow/data/dataset/monkaa.py @@ -29,6 +29,8 @@ class Monkaa(BaseDataset): If True, applies data augmentation aug_params : :obj:`dict`, optional The parameters for data augmentation + norm_params : :obj:`dict`, optional + The parameters for normalization """ def __init__( @@ -42,11 +44,12 @@ def __init__( crop_type="center", augment=True, aug_params={ - "color_aug_params": {"aug_prob": 0.2}, - "eraser_aug_params": {"aug_prob": 0.5}, - "spatial_aug_params": {"aug_prob": 0.8}, - "translate_params": {"aug_prob": 0.8}, - "rotate_params": {"aug_prob": 0.8}, + "eraser_aug_params": {"enabled": False}, + "noise_aug_params": {"enabled": False}, + "flip_aug_params": {"enabled": False}, + "color_aug_params": {"enabled": False}, + "spatial_aug_params": {"enabled": False}, + "advanced_spatial_aug_params": {"enabled": False}, }, norm_params={"use": False}, ): diff --git a/ezflow/data/dataset/mpi_sintel.py b/ezflow/data/dataset/mpi_sintel.py index dce09c8b..020857d4 100644 --- a/ezflow/data/dataset/mpi_sintel.py +++ b/ezflow/data/dataset/mpi_sintel.py @@ -34,6 +34,8 @@ class MPISintel(BaseDataset): If True, applies data augmentation aug_params : :obj:`dict`, optional The parameters for data augmentation + norm_params : :obj:`dict`, optional + The parameters for normalization """ def __init__( @@ -49,11 +51,12 @@ def __init__( crop_type="center", augment=True, aug_params={ - "color_aug_params": {"aug_prob": 0.2}, - "eraser_aug_params": {"aug_prob": 0.5}, - "spatial_aug_params": {"aug_prob": 0.8}, - "translate_params": {"aug_prob": 0.8}, - "rotate_params": {"aug_prob": 0.8}, + "eraser_aug_params": {"enabled": False}, + "noise_aug_params": {"enabled": False}, + "flip_aug_params": {"enabled": False}, + "color_aug_params": {"enabled": False}, + "spatial_aug_params": {"enabled": False}, + "advanced_spatial_aug_params": {"enabled": False}, }, norm_params={"use": False}, ): @@ -83,6 +86,7 @@ def __init__( split = split.lower() if split == "validation": split = "test" + self.is_prediction = True image_root = osp.join(root_dir, split, dstype) flow_root = osp.join(root_dir, split, "flow") diff --git a/ezflow/decoder/__init__.py b/ezflow/decoder/__init__.py index 8a018058..a6b006cf 100644 --- a/ezflow/decoder/__init__.py +++ b/ezflow/decoder/__init__.py @@ -1,5 +1,7 @@ from .build import DECODER_REGISTRY, build_decoder +from .context import ContextNetwork from .conv_decoder import ConvDecoder, FlowNetConvDecoder from .iterative import * from .noniterative import * +from .pyramid import PyramidDecoder from .separable_conv import Butterfly4D, SeparableConv4D diff --git a/ezflow/decoder/context.py b/ezflow/decoder/context.py new file mode 100644 index 00000000..67fc6ad5 --- /dev/null +++ b/ezflow/decoder/context.py @@ -0,0 +1,63 @@ +import torch +import torch.nn as nn + +from ..config import configurable +from ..modules import conv +from .build import DECODER_REGISTRY + + +@DECODER_REGISTRY.register() +class ContextNetwork(nn.Module): + """ + PWCNet Context Network decoder + + Parameters + ---------- + in_channels: int, default: 565 + Number of input channels + config : List[int], default : [128, 128, 96, 64, 32] + List containing all output channels of the decoder. + """ + + @configurable + def __init__(self, in_channels=565, config=[128, 128, 96, 64, 32]): + super(ContextNetwork, self).__init__() + + self.context_net = nn.ModuleList( + [ + conv( + in_channels, + config[0], + kernel_size=3, + stride=1, + padding=1, + dilation=1, + ), + ] + ) + self.context_net.append( + conv(config[0], config[0], kernel_size=3, stride=1, padding=2, dilation=2) + ) + self.context_net.append( + conv(config[0], config[1], kernel_size=3, stride=1, padding=4, dilation=4) + ) + self.context_net.append( + conv(config[1], config[2], kernel_size=3, stride=1, padding=8, dilation=8) + ) + self.context_net.append( + conv(config[2], config[3], kernel_size=3, stride=1, padding=16, dilation=16) + ) + self.context_net.append( + conv(config[3], config[4], kernel_size=3, stride=1, padding=1, dilation=1) + ) + self.context_net.append( + nn.Conv2d(config[4], 2, kernel_size=3, stride=1, padding=1, bias=True) + ) + self.context_net = nn.Sequential(*self.context_net) + + @classmethod + def from_config(self, cfg): + return {"in_channels": cfg.IN_CHANNELS, "config": cfg.CONFIG} + + def forward(self, x): + return self.context_net(x) diff --git a/ezflow/decoder/pyramid.py b/ezflow/decoder/pyramid.py new file mode 100644 index 00000000..679e1c96 --- /dev/null +++ b/ezflow/decoder/pyramid.py @@ -0,0 +1,154 @@ +import torch +import torch.nn as nn + +from ..config import configurable +from ..modules import deconv +from ..similarity import IterSpatialCorrelationSampler as SpatialCorrelationSampler +from ..utils import warp +from .build import DECODER_REGISTRY +from .conv_decoder import ConvDecoder + + +@DECODER_REGISTRY.register() +class PyramidDecoder(nn.Module): + """ + Applies a 2D Convolutional decoder to regress the optical flow + from the intermediate outputs convolutions of the encoder. + Used in **PWCNet** (https://arxiv.org/abs/1709.02371) + + Parameters + ---------- + config : List[int], default : [128, 128, 96, 64, 32] + List containing all output channels of the decoder. + to_flow : bool, default : True + If True, regresses the flow of shape N x 2 x H x W. + max_displacement: int, default: 4 + Maximum displacement for cost volume computation. + pad_size: int, default: 0 + Pad size for cost volume computation. + flow_scale_factor: float, default: 20.0 + Scale factor for upscaling flow predictions. + """ + + @configurable + def __init__( + self, + config=[128, 128, 96, 64, 32], + to_flow=True, + max_displacement=4, + pad_size=0, + flow_scale_factor=20.0, + ): + super(PyramidDecoder, self).__init__() + self.config = config + self.flow_scale_factor = flow_scale_factor + + self.correlation_layer = SpatialCorrelationSampler( + kernel_size=1, patch_size=2 * max_displacement + 1, padding=pad_size + ) + self.leaky_relu = nn.LeakyReLU(negative_slope=0.1, inplace=False) + + search_range = (2 * max_displacement + 1) ** 2 + + self.decoder_layers = nn.ModuleList() + + self.up_feature_layers = nn.ModuleList() + self.deconv_layers = nn.ModuleList() + + for i in range(len(config)): + + if i == 0: + concat_channels = search_range + else: + concat_channels = search_range + config[i] + max_displacement + + self.decoder_layers.append( + ConvDecoder( + config=config, + to_flow=to_flow, + concat_channels=concat_channels, + ) + ) + + if i < len(config) - 1: + self.deconv_layers.append( + deconv(2, 2, kernel_size=4, stride=2, padding=1) + ) + + self.up_feature_layers.append( + deconv( + concat_channels + sum(config), + 2, + kernel_size=4, + stride=2, + padding=1, + ) + ) + + @classmethod + def from_config(self, cfg): + return { + "config": cfg.CONFIG, + "to_flow": cfg.TO_FLOW, + "max_displacement": cfg.SIMILARITY.MAX_DISPLACEMENT, + "pad_size": cfg.SIMILARITY.PAD_SIZE, + "flow_scale_factor": cfg.FLOW_SCALE_FACTOR, + } + + def _corr_relu(self, features1, features2): + + corr = self.correlation_layer(features1, features2) + corr = corr.view(corr.shape[0], -1, corr.shape[3], corr.shape[4]) + return self.leaky_relu(corr) + + def forward(self, feature_pyramid1, feature_pyramid2): + """ + Performs forward pass. + + Parameters + ---------- + feature_pyramid1 : torch.Tensor + Input feature map of image 1 + + feature_pyramid2 : torch.Tensor + Input feature map of image 2 + + Returns + ------- + List[torch.Tensor] + A List containing tensors of shape N x 2 x H x W representing the flow + + List[torch.Tensor] + A List containing tensors of shape N x output_channel x H x W + """ + + up_flow, up_features = None, None + up_flow_scale = self.flow_scale_factor * 2 ** (-(len(self.config))) + + flow_preds = [] + + for i in range(len(self.decoder_layers)): + + if i == 0: + corr = self._corr_relu(feature_pyramid1[i], feature_pyramid2[i]) + concatenated_features = corr + + else: + + warped_features = warp(feature_pyramid2[i], up_flow * up_flow_scale) + up_flow_scale *= 2 + + corr = self._corr_relu(feature_pyramid1[i], warped_features) + + concatenated_features = torch.cat( + [corr, feature_pyramid1[i], up_flow, up_features], dim=1 + ) + + flow, features = self.decoder_layers[i](concatenated_features) + flow_preds.append(flow) + + if i < len(self.decoder_layers) - 1: + up_flow = self.deconv_layers[i](flow) + up_features = self.up_feature_layers[i](features) + + return flow_preds, features diff --git a/ezflow/encoder/pyramid.py b/ezflow/encoder/pyramid.py index 19952b38..e921f0b6 100644 --- a/ezflow/encoder/pyramid.py +++ b/ezflow/encoder/pyramid.py @@ -45,6 +45,19 @@ def from_config(self, cfg): } def forward(self, img): + """ + Performs forward pass. + + Parameters + ---------- + img : torch.Tensor + Input tensor + + Returns + ------- + List[torch.Tensor], + List of all the output convolutions from each encoder layer + """ feature_pyramid = [] x = img @@ -54,6 +67,4 @@ def forward(self, img): x = self.encoder[i](x) feature_pyramid.append(x) - feature_pyramid.reverse() - return feature_pyramid diff --git a/ezflow/engine/eval.py b/ezflow/engine/eval.py index 718aadff..d0e00e70 100644 --- a/ezflow/engine/eval.py +++ b/ezflow/engine/eval.py @@ -92,7 +92,7 @@ def run_inference(model, dataloader, device, metric_fn, flow_scale=1.0, pad_divi start_time = time.time() - pred = model(img1, img2) + output = model(img1, img2) if torch.cuda.is_available(): torch.cuda.synchronize() @@ -100,10 +100,10 @@ def run_inference(model, dataloader, device, metric_fn, flow_scale=1.0, pad_divi end_time = time.time() times.append(end_time - start_time) - pred = padder.unpad(pred) - flow = pred * flow_scale + pred = padder.unpad(output["flow_upsampled"]) + pred = pred * flow_scale - metric = metric_fn(flow, target) + metric = metric_fn(pred, target) metric_meter.update(metric) avg_inference_time = sum(times) / len(times) @@ -193,7 +193,7 @@ def profile_inference( start_time = time.time() with record_function(profiler.model_name): - pred = model(img1, img2) + output = model(img1, img2) if torch.cuda.is_available(): torch.cuda.synchronize() @@ -203,10 +203,10 @@ def profile_inference( prof.step() - pred = padder.unpad(pred) - flow = pred * flow_scale + pred = padder.unpad(output["flow_upsampled"]) + pred = pred * flow_scale - metric = metric_fn(flow, target) + metric = metric_fn(pred, target) metric_meter.update(metric) print( diff --git a/ezflow/engine/trainer.py b/ezflow/engine/trainer.py index 6e55e813..3c6a9797 100644 --- a/ezflow/engine/trainer.py +++ b/ezflow/engine/trainer.py @@ -60,7 +60,7 @@ def _is_main_process(self): raise NotImplementedError def _setup_training(self, rank=0, loss_fn=None, optimizer=None, scheduler=None): - if loss_fn is None: + if loss_fn is None and self.loss_fn is None: if self.cfg.CRITERION.CUSTOM: loss = FUNCTIONAL_REGISTRY.get(self.cfg.CRITERION.NAME) @@ -73,7 +73,9 @@ def _setup_training(self, rank=0, loss_fn=None, optimizer=None, scheduler=None): else: loss_fn = loss() - if optimizer is None: + print(f"Loss function: {self.cfg.CRITERION.NAME} is initialized!") + + if optimizer is None and self.optimizer is None: opt = optimizers.get(self.cfg.OPTIMIZER.NAME) @@ -87,7 +89,9 @@ def _setup_training(self, rank=0, loss_fn=None, optimizer=None, scheduler=None): else: optimizer = opt(self.model.parameters(), lr=self.cfg.OPTIMIZER.LR) - if scheduler is None: + print(f"Optimizer: {self.cfg.OPTIMIZER.NAME} is initialized!") + + if scheduler is None and self.scheduler is None: if self.cfg.SCHEDULER.USE: sched = schedulers.get(self.cfg.SCHEDULER.NAME) @@ -101,9 +105,16 @@ def _setup_training(self, rank=0, loss_fn=None, optimizer=None, scheduler=None): else: scheduler = sched(optimizer) - self.loss_fn = loss_fn - self.optimizer = optimizer - self.scheduler = scheduler + print(f"Scheduler: {self.cfg.SCHEDULER.NAME} is initialized!") + + if self.loss_fn is None: + self.loss_fn = loss_fn + + if self.optimizer is None: + self.optimizer = optimizer + + if self.scheduler is None: + self.scheduler = scheduler if rank == 0: """ @@ -148,6 +159,9 @@ def _epoch_trainer(self, n_epochs=None, start_epoch=None): print(f"\nEpoch {epoch+1} of {start_epoch+n_epochs}") print("-" * 80) + if self.model_parallel: + self.train_loader.sampler.set_epoch(epoch) + loss_meter.reset() for iteration, (inp, target) in enumerate(self.train_loader): @@ -165,13 +179,21 @@ def _epoch_trainer(self, n_epochs=None, start_epoch=None): "epochs_training_loss", loss_meter.sum, epoch + 1 ) - if epoch % self.cfg.VALIDATE_INTERVAL == 0: + if epoch % self.cfg.VALIDATE_INTERVAL == 0 and self._is_main_process(): self._validate_model(iter_type="Epoch", iterations=epoch + 1) if epoch % self.cfg.CKPT_INTERVAL == 0 and self._is_main_process(): self._save_checkpoints(ckpt_type="epoch", ckpt_number=epoch + 1) - self.writer.close() + # Synchronize all processes in multi gpu after validation and checkpoint + if ( + epoch % self.cfg.VALIDATE_INTERVAL == 0 + or epoch % self.cfg.CKPT_INTERVAL == 0 + ) and self.model_parallel: + dist.barrier() + + if self._is_main_process(): + self.writer.close() def _step_trainer(self, n_steps=None, start_step=None): self.model.train() @@ -187,11 +209,15 @@ def _step_trainer(self, n_steps=None, start_step=None): if start_step is not None: print(f"Resuming training from step {start_step}\n") total_steps = start_step - n_steps += start_step + n_steps += start_step - 1 else: start_step = total_steps = 1 n_steps += start_step + if self.model_parallel: + epoch = 0 + self.train_loader.sampler.set_epoch(epoch) + train_iter = iter(self.train_loader) print(f"\nStarting step {total_steps} of {n_steps}") @@ -200,6 +226,10 @@ def _step_trainer(self, n_steps=None, start_step=None): try: inp, target = next(train_iter) except: + if self.model_parallel: + epoch += 1 + self.train_loader.sampler.set_epoch(epoch) + # Handle exception if there is no data # left in train iterator to continue training. train_iter = iter(self.train_loader) @@ -210,20 +240,24 @@ def _step_trainer(self, n_steps=None, start_step=None): self._log_step(step, total_steps, loss_meter) - if self._is_main_process(): - self.writer.add_scalar( - "steps_training_loss", loss_meter.sum, total_steps - ) - - if step % self.cfg.VALIDATE_INTERVAL == 0: + if step % self.cfg.VALIDATE_INTERVAL == 0 and self._is_main_process(): self._validate_model(iter_type="Iteration", iterations=total_steps) + print("-" * 80) if step % self.cfg.CKPT_INTERVAL == 0 and self._is_main_process(): self._save_checkpoints(ckpt_type="step", ckpt_number=total_steps) + # Synchronize all processes in multi gpu after validation and checkpoint + if ( + step % self.cfg.VALIDATE_INTERVAL == 0 + or step % self.cfg.CKPT_INTERVAL == 0 + ) and self.model_parallel: + dist.barrier() + total_steps += 1 - self.writer.close() + if self._is_main_process(): + self.writer.close() def _run_step(self, inp, target): img1, img2 = inp @@ -232,14 +266,17 @@ def _run_step(self, inp, target): img2.to(self.device), target.to(self.device), ) - target = target / self.cfg.TARGET_SCALE_FACTOR if self._is_main_process(): start_time = time.time() with autocast(enabled=self.cfg.MIXED_PRECISION): - pred = self.model(img1, img2) - loss = self.loss_fn(pred, target) + output = self.model(img1, img2) + loss = self.loss_fn( + output["flow_preds"], target / self.cfg.TARGET_SCALE_FACTOR + ) + + del output self.optimizer.zero_grad() self.scaler.scale(loss).backward() @@ -285,22 +322,33 @@ def _validate_model(self, iter_type, iterations): img2.to(self.device), target.to(self.device), ) - target = target / self.cfg.TARGET_SCALE_FACTOR - pred = self.model(img1, img2) - loss = self.loss_fn(pred, target) + if self.model_parallel: + output = self.model.module(img1, img2) + else: + output = self.model(img1, img2) + + loss = self.loss_fn( + output["flow_preds"], target / self.cfg.TARGET_SCALE_FACTOR + ) + loss_meter.update(loss.item()) - metric = self._calculate_metric(pred, target) + + """ + Predicted upsampled flow should be scaled for EPE calculation. + """ + metric = self._calculate_metric( + output["flow_upsampled"] * self.cfg.TARGET_SCALE_FACTOR, target + ) metric_meter.update(metric) + del output + new_avg_val_loss, new_avg_val_metric = loss_meter.avg, metric_meter.avg print("\n", "-" * 80) - if self._is_main_process(): - self.writer.add_scalar("avg_validation_loss", new_avg_val_loss, iterations) - self.writer.add_scalar( - "avg_validation_metric", new_avg_val_metric, iterations - ) + self.writer.add_scalar("avg_validation_loss", new_avg_val_loss, iterations) + self.writer.add_scalar("avg_validation_metric", new_avg_val_metric, iterations) print( f"\n{iter_type} {iterations}: Average validation loss = {new_avg_val_loss}" @@ -384,6 +432,9 @@ def _reload_trainer_states( scheduler_ckpt=None, use_cfg=False, ): + + self._setup_device() + consolidated_ckpt = ( self.cfg.RESUME_TRAINING.CONSOLIDATED_CKPT if use_cfg is True @@ -392,7 +443,7 @@ def _reload_trainer_states( if consolidated_ckpt is not None: - ckpt = torch.load(consolidated_ckpt, map_location=torch.device("cpu")) + ckpt = torch.load(consolidated_ckpt, map_location=self.device) model_state_dict = ckpt["model_state_dict"] optimizer_state_dict = ckpt["optimizer_state_dict"] @@ -401,7 +452,10 @@ def _reload_trainer_states( scheduler_state_dict = ckpt["scheduler_state_dict"] if "epochs" in ckpt.keys(): - start_epoch = ckpt["epochs"] + 1 + start_iteration = ckpt["epochs"] + 1 + + if "step" in ckpt.keys(): + start_iteration = ckpt["step"] + 1 else: @@ -409,24 +463,26 @@ def _reload_trainer_states( model_ckpt is not None and optimizer_ckpt is not None ), "Must provide a consolidated ckpt or model and optimizer ckpts separately" - model_state_dict = torch.load(model_ckpt, map_location=torch.device("cpu")) - optimizer_state_dict = torch.load( - optimizer_ckpt, map_location=torch.device("cpu") - ) + model_state_dict = torch.load(model_ckpt, map_location=self.device) + optimizer_state_dict = torch.load(optimizer_ckpt, map_location=self.device) if scheduler_ckpt is not None: scheduler_state_dict = torch.load( - scheduler_ckpt, map_location=torch.device("cpu") + scheduler_ckpt, map_location=self.device ) + self._setup_model() self.model.load_state_dict(model_state_dict) + print("Model state loaded!!") self._setup_training() self.optimizer.load_state_dict(optimizer_state_dict) + print("Optimizer state loaded!!") if self.scheduler is not None: self.scheduler.load_state_dict(scheduler_state_dict) + print("Scheduler state loaded!!") if total_iterations is None and use_cfg: total_iterations = ( @@ -485,7 +541,17 @@ def resume_training( use_cfg=use_cfg, ) - self.train(total_iterations=total_iterations, start_iteration=start_iteration) + os.makedirs(self.cfg.CKPT_DIR, exist_ok=True) + os.makedirs(self.cfg.LOG_DIR, exist_ok=True) + + print("Training config:\n") + print(self.cfg) + print("-" * 80) + + self._trainer(total_iterations, start_iteration) + + print("Training complete!") + print(f"Total training time: {str(timedelta(seconds=sum(self.times)))}") class Trainer(BaseTrainer): @@ -615,7 +681,10 @@ def __init__(self, cfg, model, train_loader_creator, val_loader_creator): self.val_loader = None self.train_loader_creator = train_loader_creator - self.val_loader_creator = val_loader_creator + + # Validate model only on the main process. + val_loader_creator.distributed = False + self.val_loader = val_loader_creator.get_dataloader() self._validate_ddp_config() @@ -664,6 +733,7 @@ def _setup_device(self, rank): self.device = torch.device(rank) self.local_rank = rank torch.cuda.empty_cache() + torch.cuda.set_device(rank) def _setup_ddp(self, rank): os.environ["MASTER_ADDR"] = self.cfg.DISTRIBUTED.MASTER_ADDR @@ -679,19 +749,22 @@ def _setup_ddp(self, rank): ) print(f"{rank + 1}/{self.cfg.DISTRIBUTED.WORLD_SIZE} process initialized.") + # synchronizes all the threads to reach this point before moving on + dist.barrier() + def _is_main_process(self): return self.local_rank == 0 def _setup_model(self, rank): + if self.cfg.DISTRIBUTED.SYNC_BATCH_NORM: + self.model = nn.SyncBatchNorm.convert_sync_batchnorm(self.model) + self.model = DDP( self.model.cuda(rank), device_ids=[rank], ) - if self.cfg.DISTRIBUTED.SYNC_BATCH_NORM: - self.model = nn.SyncBatchNorm.convert_sync_batchnorm(self.model) - self.model = self.model.to(self.device) def _cleanup(self): @@ -710,7 +783,7 @@ def _main_worker( self._setup_ddp(rank) self._setup_model(rank) self.train_loader = self.train_loader_creator.get_dataloader(rank=rank) - self.val_loader = self.val_loader_creator.get_dataloader(rank=rank) + self._setup_training( rank=rank, loss_fn=loss_fn, optimizer=optimizer, scheduler=scheduler ) @@ -718,6 +791,8 @@ def _main_worker( os.makedirs(self.cfg.CKPT_DIR, exist_ok=True) os.makedirs(self.cfg.LOG_DIR, exist_ok=True) + # synchronizes all the threads to reach this point before moving on + dist.barrier() self._trainer(total_iterations, start_iteration) if self._is_main_process(): diff --git a/ezflow/functional/criterion/multiscale.py b/ezflow/functional/criterion/multiscale.py index c13d44f5..c19bb1b9 100644 --- a/ezflow/functional/criterion/multiscale.py +++ b/ezflow/functional/criterion/multiscale.py @@ -14,10 +14,22 @@ class MultiScaleLoss(nn.Module): Parameters ---------- - norm : str + norm : str, default: "l1" The norm to use for the loss. Can be either "l2", "l1" or "robust" + q : float, default: 0.4 + This parameter is used in robust loss for fine tuning. q < 1 gives less penalty to outliers + eps : float, default: 0.01 + This parameter is a small constant used in robust loss to stabilize fine tuning. weights : list The weights to use for each scale + average : str, default: "mean" + The mode to set the average of the EPE map. + If "mean", the mean of the EPE map is returned. + If "sum", the EPE map is summed and divided by the batch size. + resize_flow : str, default: "upsample" + The mode to resize flow. + If "upsample", predicted flow will be upsampled to the size of the ground truth. + If "downsample", ground truth flow will be downsampled to the size of the predicted flow. extra_mask : torch.Tensor A mask to apply to the loss. Useful for removing the loss on the background use_valid_range : bool @@ -30,26 +42,45 @@ class MultiScaleLoss(nn.Module): def __init__( self, norm="l1", + q=0.4, + eps=1e-2, weights=(1, 0.5, 0.25), + average="mean", + resize_flow="upsample", extra_mask=None, use_valid_range=True, valid_range=None, ): super(MultiScaleLoss, self).__init__() - self.norm = norm.lower() - assert self.norm in ("l1", "l2", "robust"), "Norm must be one of L1, L2, Robust" + assert norm.lower() in ( + "l1", + "l2", + "robust", + ), "Norm must be one of L1, L2, Robust" + assert resize_flow.lower() in ( + "upsample", + "downsample", + ), "Resize flow must be one of upsample or downsample" + assert average.lower() in ("mean", "sum"), "Average must be one of mean or sum" + self.norm = norm.lower() + self.q = q + self.eps = eps self.weights = weights self.extra_mask = extra_mask self.use_valid_range = use_valid_range self.valid_range = valid_range + self.average = average.lower() + self.resize_flow = resize_flow.lower() @classmethod def from_config(cls, cfg): return { "norm": cfg.NORM, "weights": cfg.WEIGHTS, + "average": cfg.AVERAGE, + "resize_flow": cfg.RESIZE_FLOW, "extra_mask": cfg.EXTRA_MASK, "use_valid_range": cfg.USE_VALID_RANGE, "valid_range": cfg.VALID_RANGE, @@ -59,10 +90,11 @@ def forward(self, pred, label): if label.shape[1] == 3: """Ignore valid mask for Multiscale Loss.""" + mask = label[:, 2:, :, :] label = label[:, :2, :, :] loss = 0 - h, w = label.size()[-2:] + b, c, h, w = label.size() if ( (type(pred) is not tuple) @@ -73,42 +105,58 @@ def forward(self, pred, label): for i, level_pred in enumerate(pred): - real_flow = F.interpolate( - level_pred, (h, w), mode="bilinear", align_corners=True - ) - real_flow[:, 0, :, :] = real_flow[:, 0, :, :] * (w / level_pred.shape[3]) - real_flow[:, 1, :, :] = real_flow[:, 1, :, :] * (h / level_pred.shape[2]) + if self.resize_flow.lower() == "upsample": + real_flow = F.interpolate( + level_pred, (h, w), mode="bilinear", align_corners=True + ) + real_flow[:, 0, :, :] = real_flow[:, 0, :, :] * ( + w / level_pred.shape[3] + ) + real_flow[:, 1, :, :] = real_flow[:, 1, :, :] * ( + h / level_pred.shape[2] + ) + target = label + + elif self.resize_flow.lower() == "downsample": + # down sample ground truth following irr solution + # https://github.com/visinf/irr/blob/master/losses.py#L16 + b, c, h, w = level_pred.shape + + target = F.adaptive_avg_pool2d(label, [h, w]) + real_flow = level_pred if self.norm == "l2": - loss_value = torch.norm(real_flow - label, p=2, dim=1) + loss_value = torch.norm(real_flow - target, p=2, dim=1) elif self.norm == "robust": - loss_value = (real_flow - label).abs().sum(dim=1) + 1e-8 - loss_value = loss_value**0.4 + loss_value = torch.norm(real_flow - target, p=1, dim=1) + loss_value = (loss_value + self.eps) ** self.q elif self.norm == "l1": - loss_value = (real_flow - label).abs().sum(dim=1) + loss_value = torch.norm(real_flow - target, p=1, dim=1) if self.use_valid_range and self.valid_range is not None: with torch.no_grad(): - mask = (label[:, 0, :, :].abs() <= self.valid_range[i][1]) & ( - label[:, 1, :, :].abs() <= self.valid_range[i][0] + mask = (target[:, 0, :, :].abs() <= self.valid_range[i][1]) & ( + target[:, 1, :, :].abs() <= self.valid_range[i][0] ) else: with torch.no_grad(): - mask = torch.ones(label[:, 0, :, :].shape).type_as(label) + mask = torch.ones(target[:, 0, :, :].shape).type_as(target) loss_value = loss_value * mask.float() if self.extra_mask is not None: val = self.extra_mask > 0 loss_value = loss_value[val] - level_loss = loss_value.mean() * self.weights[i] - else: + if self.average.lower() == "mean": level_loss = loss_value.mean() * self.weights[i] + elif self.average.lower() == "sum": + level_loss = loss_value.sum() / b * self.weights[i] + loss += level_loss loss = loss / len(pred) diff --git a/ezflow/functional/data_augmentation/augmentor.py b/ezflow/functional/data_augmentation/augmentor.py index ab3abea3..260d71fa 100644 --- a/ezflow/functional/data_augmentation/augmentor.py +++ b/ezflow/functional/data_augmentation/augmentor.py @@ -11,30 +11,73 @@ class FlowAugmentor: ---------- crop_size : int Size of the crop to be applied to the images. - color_aug_params : dict - Parameters for the color augmentation. eraser_aug_params : dict Parameters for the eraser augmentation. + noise_aug_params : dict + Parameters for the noise augmentation. + flip_aug_params : dict + Parameters for the flip augmentation. + color_aug_params : dict + Parameters for the color augmentation. spatial_aug_params : dict Parameters for the spatial augmentation. + advanced_spatial_aug_params : dict + Parameters for the spatial augmentation. + If both spatial and advanced_spatial augmentations are enabled, the advanced spatial augmentations are used. """ def __init__( self, crop_size, - color_aug_params={"aug_prob": 0.2}, - eraser_aug_params={"aug_prob": 0.5}, - spatial_aug_params={"aug_prob": 0.8}, - translate_params={"aug_prob": 0.8}, - rotate_params={"aug_prob": 0.8}, + eraser_aug_params={"enabled": False, "aug_prob": 0.5, "bounds": [50, 100]}, + noise_aug_params={"enabled": False, "aug_prob": 0.5, "noise_std_range": 0.06}, + flip_aug_params={"enabled": False, "h_flip_prob": 0.5, "v_flip_prob": 0.1}, + color_aug_params={ + "enabled": False, + "asymmetric_color_aug_prob": 0.2, + "brightness": 0.4, + "contrast": 0.4, + "saturation": 0.4, + "hue": 0.15915494309189535, + }, + spatial_aug_params={ + "enabled": False, + "aug_prob": 0.8, + "stretch_prob": 0.8, + "min_scale": -0.1, + "max_scale": 1.0, + "max_stretch": 0.2, + }, + advanced_spatial_aug_params={ + "enabled": False, + "scale1": 0.3, + "scale2": 0.1, + "rotate": 0.4, + "translate": 0.4, + "stretch": 0.3, + "enable_out_of_boundary_crop": False, + }, ): self.crop_size = crop_size self.color_aug_params = color_aug_params self.eraser_aug_params = eraser_aug_params self.spatial_aug_params = spatial_aug_params - self.translate_params = translate_params - self.rotate_params = rotate_params + self.noise_aug_params = noise_aug_params + self.flip_aug_params = flip_aug_params + + self.advanced_spatial_aug_params = advanced_spatial_aug_params + self.advanced_spatial_aug_params["h_flip_prob"] = ( + flip_aug_params["h_flip_prob"] if "h_flip_prob" in flip_aug_params else 0.0 + ) + self.advanced_spatial_transform = AdvancedSpatialTransform( + crop=self.crop_size, **self.advanced_spatial_aug_params + ) + + if self.advanced_spatial_aug_params["enabled"]: + # Disable spatial transform and horizontal flip if advanced spatial transforms are used + self.spatial_aug_params["enabled"] = False + self.flip_aug_params["h_flip_prob"] = 0.0 def __call__(self, img1, img2, flow, valid=None): """ @@ -64,14 +107,17 @@ def __call__(self, img1, img2, flow, valid=None): """ img1, img2 = color_transform(img1, img2, **self.color_aug_params) - img1, img2 = eraser_transform(img1, img2, **self.eraser_aug_params) + + img1, img2, flow = self.advanced_spatial_transform(img1, img2, flow) + img1, img2, flow = spatial_transform( img1, img2, flow, self.crop_size, **self.spatial_aug_params ) - img1, img2, flow = translate_transform( - img1, img2, flow, **self.translate_params - ) - img1, img2, flow = rotate_transform(img1, img2, flow, **self.rotate_params) + + img1, img2, flow = flip_transform(img1, img2, flow, **self.flip_aug_params) + + img1, img2 = noise_transform(img1, img2, **self.noise_aug_params) + img1, img2 = eraser_transform(img1, img2, **self.eraser_aug_params) img1 = np.ascontiguousarray(img1) img2 = np.ascontiguousarray(img2) diff --git a/ezflow/functional/data_augmentation/operations.py b/ezflow/functional/data_augmentation/operations.py index 1ed5325c..e81416f7 100644 --- a/ezflow/functional/data_augmentation/operations.py +++ b/ezflow/functional/data_augmentation/operations.py @@ -1,8 +1,17 @@ +from __future__ import division + +import numbers +import pdb +import random + import cv2 import numpy as np import scipy.ndimage as ndimage +import torch +import torchvision import torchvision.transforms as transforms from PIL import Image +from torch.nn import functional as F from torchvision.transforms import ColorJitter @@ -87,14 +96,15 @@ def crop( def color_transform( img1, img2, - aug_prob=0.2, + enabled=False, + asymmetric_color_aug_prob=0.2, brightness=0.4, contrast=0.4, saturation=0.4, hue=0.5 / 3.14, ): """ - Photometric augmentation + Photometric augmentation borrowed from RAFT https://github.com/princeton-vl/RAFT/blob/master/core/utils/augmentor.py Parameters ----------- @@ -102,8 +112,10 @@ def color_transform( First of the pair of images img2 : PIL Image or numpy.ndarray Second of the pair of images - aug_prob : float - Probability of applying the augmentation + enabled : bool, default: False + If True, applies color transform + asymmetric_color_aug_prob : float + Probability of applying asymetric color jitter augmentation brightness : float Brightness augmentation factor contrast : float @@ -120,12 +132,14 @@ def color_transform( img2 : PIL Image or numpy.ndarray Augmented image 2 """ + if not enabled: + return img1, img2 aug = ColorJitter( brightness=brightness, contrast=contrast, saturation=saturation, hue=hue ) - if np.random.rand() < aug_prob: + if np.random.rand() < asymmetric_color_aug_prob: img1 = np.array(aug(Image.fromarray(img1)), dtype=np.uint8) img2 = np.array(aug(Image.fromarray(img2)), dtype=np.uint8) @@ -137,15 +151,18 @@ def color_transform( return img1, img2 -def eraser_transform(img1, img2, bounds=[50, 100], aug_prob=0.5): +def eraser_transform(img1, img2, enabled=False, bounds=[50, 100], aug_prob=0.5): """ - Occlusion augmentation + Occlusion augmentation borrowed from RAFT https://github.com/princeton-vl/RAFT/blob/master/core/utils/augmentor.py + Parameters ----------- img1 : PIL Image or numpy.ndarray First of the pair of images img2 : PIL Image or numpy.ndarray Second of the pair of images + enabled : bool, default: False + If True, applies eraser transform bounds : :obj:`list` of :obj:`int` Bounds of the eraser aug_prob : float @@ -158,6 +175,8 @@ def eraser_transform(img1, img2, bounds=[50, 100], aug_prob=0.5): img2 : PIL Image or numpy.ndarray Augmented image 2 """ + if not enabled: + return img1, img2 H, W = img1.shape[:2] @@ -180,17 +199,17 @@ def spatial_transform( img2, flow, crop_size, + enabled=False, aug_prob=0.8, stretch_prob=0.8, max_stretch=0.2, min_scale=-0.2, max_scale=0.5, - flip=True, - h_flip_prob=0.5, - v_flip_prob=0.1, ): """ - Spatial augmentation + Simple set of spatial augmentation borrowed from RAFT https://github.com/princeton-vl/RAFT/blob/master/core/utils/augmentor.py + + Includes random scaling and stretch. Parameters ----------- @@ -202,6 +221,8 @@ def spatial_transform( Flow field crop_size : :obj:`list` of :obj:`int` Size of the crop + enabled : bool, default: False + If True, applies spatial transform aug_prob : float Probability of applying the augmentation stretch_prob : float @@ -212,12 +233,6 @@ def spatial_transform( Minimum scale factor max_scale : float Maximum scale factor - flip : bool - Whether to apply the flip transform - h_flip_prob : float - Probability of applying the horizontal flip transform - v_flip_prob : float - Probability of applying the vertical flip transform Returns ------- @@ -228,9 +243,10 @@ def spatial_transform( flow : numpy.ndarray Augmented flow field """ + if not enabled: + return img1, img2, flow H, W = img1.shape[:2] - min_scale = np.maximum((crop_size[0] + 8) / float(H), (crop_size[1] + 8) / float(W)) scale = 2 ** np.random.uniform(min_scale, max_scale) scale_x = scale @@ -240,6 +256,8 @@ def spatial_transform( scale_x *= 2 ** np.random.uniform(-max_stretch, max_stretch) scale_y *= 2 ** np.random.uniform(-max_stretch, max_stretch) + min_scale = np.maximum((crop_size[0] + 8) / float(H), (crop_size[1] + 8) / float(W)) + scale_x = np.clip(scale_x, min_scale, None) scale_y = np.clip(scale_y, min_scale, None) @@ -256,16 +274,50 @@ def spatial_transform( ) flow = flow * [scale_x, scale_y] - if flip: - if np.random.rand() < h_flip_prob: - img1 = img1[:, ::-1] - img2 = img2[:, ::-1] - flow = flow[:, ::-1] * [-1.0, 1.0] + return img1, img2, flow + - if np.random.rand() < v_flip_prob: - img1 = img1[::-1, :] - img2 = img2[::-1, :] - flow = flow[::-1, :] * [1.0, -1.0] +def flip_transform(img1, img2, flow, enabled=False, h_flip_prob=0.5, v_flip_prob=0.1): + """ + Flip augmentation borrowed from RAFT https://github.com/princeton-vl/RAFT/blob/master/core/utils/augmentor.py + + Parameters + ----------- + img1 : PIL Image or numpy.ndarray + First of the pair of images + img2 : PIL Image or numpy.ndarray + Second of the pair of images + flow : numpy.ndarray + Flow field + enabled : bool, default: False + If True, applies flip transform + h_flip_prob : float, default=0.5 + Probability of applying the horizontal flip transform + v_flip_prob : float, default=0.1 + Probability of applying the vertical flip transform + + Returns + ------- + img1 : PIL Image or numpy.ndarray + Flipped image 1 + img2 : PIL Image or numpy.ndarray + Flipped image 2 + flow : numpy.ndarray + Flipped flow field + """ + + if not enabled: + return img1, img2, flow + + if np.random.rand() < h_flip_prob: + img1 = img1[:, ::-1] + img2 = img2[:, ::-1] + flow = flow[:, ::-1] * [-1.0, 1.0] + + if np.random.rand() < v_flip_prob: + img1 = img1[::-1, :] + img2 = img2[::-1, :] + flow = flow[::-1, :] * [1.0, -1.0] return img1, img2, flow @@ -332,6 +384,7 @@ def sparse_spatial_transform( flow, valid, crop_size, + enabled=False, aug_prob=0.8, min_scale=-0.2, max_scale=0.5, @@ -377,6 +430,9 @@ def sparse_spatial_transform( valid : numpy.ndarray Valid flow field """ + if not enabled: + return img1, img2, flow, valid + H, W = img1.shape[:2] min_scale = np.maximum((crop_size[0] + 1) / float(H), (crop_size[1] + 1) / float(W)) @@ -404,74 +460,40 @@ def sparse_spatial_transform( return img1, img2, flow, valid -def translate_transform( - img1, - img2, - flow, - aug_prob=0.8, - translate=10, -): +class Normalize: """ - Translation augmentation. + A class to return Normalized Image. Parameters ----------- - img1 : PIL Image or numpy.ndarray - First of the pair of images - img2 : PIL Image or numpy.ndarray - Second of the pair of images - flow : numpy.ndarray - Flow field - aug_prob : float - Probability of applying the augmentation - translate : int - Pixels by which image will be translated - - Returns - ------- - img1 : PIL Image or numpy.ndarray - Augmented image 1 - img2 : PIL Image or numpy.ndarray - Augmented image 2 - flow : numpy.ndarray - Augmented flow field + use : boolean + Whether to normalize image or not + mean : list + The list of mean values to be substracted from each image channel + std : list + The list of std values with which to divide each image channel by """ - H, W = img1.shape[:2] - - max_t_x = translate - max_t_y = translate - - t_x = np.random.randint(-1 * max_t_x, max_t_x) - t_y = np.random.randint(-1 * max_t_y, max_t_y) - - if t_x == 0 and t_y == 0: - return img1, img2, flow - - if np.random.rand() < aug_prob: - x1, x2, x3, x4 = max(0, t_x), min(W + t_x, W), max(0, -t_x), min(W - t_x, W) - y1, y2, y3, y4 = max(0, t_y), min(H + t_y, H), max(0, -t_y), min(H - t_y, H) - - img1 = img1[y1:y2, x1:x2] - img2 = img2[y3:y4, x3:x4] - flow = flow[y1:y2, x1:x2] - flow[:, :, 0] += t_x - flow[:, :, 1] += t_y + def __init__(self, use=False, mean=[0, 0, 0], std=[255.0, 255.0, 255.0]): + self.use = use + self.mean = mean + self.std = std + self.normalize = transforms.Compose( + [ + transforms.Normalize(mean=self.mean, std=self.std), + ] + ) - return img1, img2, flow + def __call__(self, img1, img2): + if self.use: + return self.normalize(img1), self.normalize(img2) + return img1, img2 -def rotate_transform( - img1, - img2, - flow, - aug_prob=0.8, - degrees=10, - delta=0, -): +def noise_transform(img1, img2, enabled=False, aug_prob=0.5, noise_std_range=0.06): """ - Rotation augmentation. - (Referenced from Clement Picard) + Applies random noise augmentation from a gaussian distribution borrowed from VCN: + https://github.com/gengshan-y/VCN/blob/master/dataloader/flow_transforms.py Parameters ----------- @@ -479,14 +501,12 @@ def rotate_transform( First of the pair of images img2 : PIL Image or numpy.ndarray Second of the pair of images - flow : numpy.ndarray - Flow field + enabled : bool, default: False + If True, applies noise transform aug_prob : float Probability of applying the augmentation - degrees : int - Angle by which image is to rotated - delta: int - Assigns angle range of degrees-delta to degrees+delta + noise_std_range : float + Standard deviation of the noise Returns ------- @@ -494,79 +514,363 @@ def rotate_transform( Augmented image 1 img2 : PIL Image or numpy.ndarray Augmented image 2 - flow : numpy.ndarray - Augmented flow field """ - angle = np.random.uniform(-degrees, degrees) - diff = np.random.uniform(-delta, delta) - angle1 = angle - diff / 2 - angle2 = angle + diff / 2 - angle1_rad = angle1 * np.pi / 180 - diff_rad = diff * np.pi / 180 - - H, W = img1.shape[:2] - - warped_coords = np.mgrid[:W, :H].T + flow - warped_coords -= np.array([W / 2, H / 2]) - - warped_coords_rot = np.zeros_like(flow) - - warped_coords_rot[..., 0] = (np.cos(diff_rad) - 1) * warped_coords[..., 0] + np.sin( - diff_rad - ) * warped_coords[..., 1] - - warped_coords_rot[..., 1] = ( - -np.sin(diff_rad) * warped_coords[..., 0] - + (np.cos(diff_rad) - 1) * warped_coords[..., 1] - ) + if not enabled: + return img1, img2 if np.random.rand() < aug_prob: + noise = np.random.uniform(0, noise_std_range * 255.0) - flow += warped_coords_rot + img1 = img1.astype(np.float64) + img2 = img2.astype(np.float64) - img1 = ndimage.interpolation.rotate(img1, angle1, reshape=False, order=2) - img2 = ndimage.interpolation.rotate(img2, angle2, reshape=False, order=2) - flow = ndimage.interpolation.rotate(flow, angle1, reshape=False, order=2) + img1 += np.random.normal(0, noise, img1.shape) + img2 += np.random.normal(0, noise, img2.shape) - target_ = np.copy(flow) - flow[:, :, 0] = ( - np.cos(angle1_rad) * target_[:, :, 0] - + np.sin(angle1_rad) * target_[:, :, 1] - ) - flow[:, :, 1] = ( - -np.sin(angle1_rad) * target_[:, :, 0] - + np.cos(angle1_rad) * target_[:, :, 1] - ) + img1 = np.clip(img1, 0.0, 255.0) + img2 = np.clip(img2, 0.0, 255.0) - return img1, img2, flow + return img1, img2 -class Normalize: +class AdvancedSpatialTransform(object): """ - A class to return Normalized Image. + Advanced set of spatial transformations borrowed from: + + 1. VCN: https://github.com/gengshan-y/VCN/blob/master/dataloader/flow_transforms.py + 2. Autoflow: https://github.com/google-research/opticalflow-autoflow/blob/main/src/dataset_lib/augmentations/spatial_aug.py + + This set of augmentations include random scaling, stretch, rotation, translation and out-of-boundary cropping. Parameters ----------- - use : boolean - Whether to normalize image or not - mean : list - The list of mean values to be substracted from each image channel - std : list - The list of std values with which to divide each image channel by + crop_size : :obj:`list` of :obj:`int` + Size of the crop + enabled : bool, default: False + If True, applies flip transform + scale1 : float, default : 0.3 + Scale factor 1 + scale1 : float, default : 0.1 + Scale factor 2 + rotate : float, default : 0.4 + Rotate factor + translate : float, default : 0.4 + Translate factor + stretch : float, default : 0.3 + Stretch factor + h_flip_prob : float, default=0.5 + Probability of applying the horizontal flip transform + + Returns + ------- + img1 : PIL Image or numpy.ndarray + Flipped image 1 + img2 : PIL Image or numpy.ndarray + Flipped image 2 + flow : numpy.ndarray + Flipped flow field """ - def __init__(self, use=False, mean=[0, 0, 0], std=[255.0, 255.0, 255.0]): - self.use = use - self.mean = mean - self.std = std - self.normalize = transforms.Compose( + def __init__( + self, + crop, + enabled=False, + scale1=0.3, + scale2=0.1, + rotate=0.4, + translate=0.4, + stretch=0.3, + h_flip_prob=0.5, + schedule_coeff=1, + order=1, + enable_out_of_boundary_crop=False, + ): + self.enabled = enabled + self.crop = crop + self.scale = [scale1, 0.03, scale2] + self.rot = [rotate, 0.03] if rotate != 0 else None + self.trans = [translate, 0.03] if translate != 0 else None + self.squeeze = [stretch, 0.0] if stretch != 0 else None + self.h_flip_prob = h_flip_prob + self.t = np.zeros(6) + self.schedule_coeff = schedule_coeff + self.order = order + self.black = enable_out_of_boundary_crop + + def to_identity(self): + self.t[0] = 1 + self.t[2] = 0 + self.t[4] = 0 + self.t[1] = 0 + self.t[3] = 1 + self.t[5] = 0 + + def left_multiply(self, u0, u1, u2, u3, u4, u5): + result = np.zeros(6) + result[0] = self.t[0] * u0 + self.t[1] * u2 + result[1] = self.t[0] * u1 + self.t[1] * u3 + + result[2] = self.t[2] * u0 + self.t[3] * u2 + result[3] = self.t[2] * u1 + self.t[3] * u3 + + result[4] = self.t[4] * u0 + self.t[5] * u2 + u4 + result[5] = self.t[4] * u1 + self.t[5] * u3 + u5 + self.t = result + + def inverse(self): + result = np.zeros(6) + a = self.t[0] + c = self.t[2] + e = self.t[4] + b = self.t[1] + d = self.t[3] + f = self.t[5] + + denom = a * d - b * c + + result[0] = d / denom + result[1] = -b / denom + result[2] = -c / denom + result[3] = a / denom + result[4] = (c * f - d * e) / denom + result[5] = (b * e - a * f) / denom + + return result + + def grid_transform(self, meshgrid, t, normalize=True, gridsize=None): + if gridsize is None: + h, w = meshgrid[0].shape + else: + h, w = gridsize + vgrid = torch.cat( [ - transforms.Normalize(mean=self.mean, std=self.std), - ] + (meshgrid[0] * t[0] + meshgrid[1] * t[2] + t[4])[:, :, np.newaxis], + (meshgrid[0] * t[1] + meshgrid[1] * t[3] + t[5])[:, :, np.newaxis], + ], + -1, ) + if normalize: + vgrid[:, :, 0] = 2.0 * vgrid[:, :, 0] / max(w - 1, 1) - 1.0 + vgrid[:, :, 1] = 2.0 * vgrid[:, :, 1] / max(h - 1, 1) - 1.0 + return vgrid + + def __call__(self, img1, img2, target): + """ + Parameters + ----------- + img1 : PIL Image or numpy.ndarray + First of the pair of images + img2 : PIL Image or numpy.ndarray + Second of the pair of images + target : numpy.ndarray + Flow field - def __call__(self, img1, img2): - if self.use: - return self.normalize(img1), self.normalize(img2) - return img1, img2 + Returns + ------- + img1 : PIL Image or numpy.ndarray + Flipped image 1 + img2 : PIL Image or numpy.ndarray + Flipped image 2 + flow : numpy.ndarray + Flipped flow field + """ + if not self.enabled: + return img1, img2, target + + inputs = [img1, img2] + h, w, _ = inputs[0].shape + th, tw = self.crop + meshgrid = torch.meshgrid([torch.Tensor(range(th)), torch.Tensor(range(tw))])[ + ::-1 + ] + cornergrid = torch.meshgrid( + [torch.Tensor([0, th - 1]), torch.Tensor([0, tw - 1])] + )[::-1] + + for i in range(50): + # im0 + self.to_identity() + + if np.random.binomial(1, self.h_flip_prob): + mirror = True + else: + mirror = False + + if mirror: + self.left_multiply(-1, 0, 0, 1, 0.5 * tw, -0.5 * th) + else: + self.left_multiply(1, 0, 0, 1, -0.5 * tw, -0.5 * th) + scale0 = 1 + scale1 = 1 + squeeze0 = 1 + squeeze1 = 1 + if not self.rot is None: + rot0 = np.random.uniform(-self.rot[0], +self.rot[0]) + rot1 = ( + np.random.uniform( + -self.rot[1] * self.schedule_coeff, + self.rot[1] * self.schedule_coeff, + ) + + rot0 + ) + self.left_multiply( + np.cos(rot0), np.sin(rot0), -np.sin(rot0), np.cos(rot0), 0, 0 + ) + if not self.trans is None: + trans0 = np.random.uniform(-self.trans[0], +self.trans[0], 2) + trans1 = ( + np.random.uniform( + -self.trans[1] * self.schedule_coeff, + +self.trans[1] * self.schedule_coeff, + 2, + ) + + trans0 + ) + self.left_multiply(1, 0, 0, 1, trans0[0] * tw, trans0[1] * th) + if not self.squeeze is None: + squeeze0 = np.exp(np.random.uniform(-self.squeeze[0], self.squeeze[0])) + squeeze1 = ( + np.exp( + np.random.uniform( + -self.squeeze[1] * self.schedule_coeff, + self.squeeze[1] * self.schedule_coeff, + ) + ) + * squeeze0 + ) + if not self.scale is None: + scale0 = np.exp( + np.random.uniform( + self.scale[2] - self.scale[0], self.scale[2] + self.scale[0] + ) + ) + scale1 = ( + np.exp( + np.random.uniform( + -self.scale[1] * self.schedule_coeff, + self.scale[1] * self.schedule_coeff, + ) + ) + * scale0 + ) + self.left_multiply( + 1.0 / (scale0 * squeeze0), 0, 0, 1.0 / (scale0 / squeeze0), 0, 0 + ) + + self.left_multiply(1, 0, 0, 1, 0.5 * w, 0.5 * h) + transmat0 = self.t.copy() + + # im1 + self.to_identity() + if mirror: + self.left_multiply(-1, 0, 0, 1, 0.5 * tw, -0.5 * th) + else: + self.left_multiply(1, 0, 0, 1, -0.5 * tw, -0.5 * th) + if not self.rot is None: + self.left_multiply( + np.cos(rot1), np.sin(rot1), -np.sin(rot1), np.cos(rot1), 0, 0 + ) + if not self.trans is None: + self.left_multiply(1, 0, 0, 1, trans1[0] * tw, trans1[1] * th) + self.left_multiply( + 1.0 / (scale1 * squeeze1), 0, 0, 1.0 / (scale1 / squeeze1), 0, 0 + ) + self.left_multiply(1, 0, 0, 1, 0.5 * w, 0.5 * h) + transmat1 = self.t.copy() + transmat1_inv = self.inverse() + + if self.black: + # black augmentation, allowing 0 values in the input images + # https://github.com/lmb-freiburg/flownet2/blob/master/src/caffe/layers/black_augmentation_layer.cu + break + else: + if ( + ( + self.grid_transform( + cornergrid, transmat0, gridsize=[float(h), float(w)] + ).abs() + > 1 + ).sum() + + ( + self.grid_transform( + cornergrid, transmat1, gridsize=[float(h), float(w)] + ).abs() + > 1 + ).sum() + ) == 0: + break + if i == 49: + # print("max_iter in augmentation") + self.to_identity() + self.left_multiply(1, 0, 0, 1, -0.5 * tw, -0.5 * th) + self.left_multiply(1, 0, 0, 1, 0.5 * w, 0.5 * h) + transmat0 = self.t.copy() + transmat1 = self.t.copy() + + # do the real work + vgrid = self.grid_transform(meshgrid, transmat0, gridsize=[float(h), float(w)]) + inputs_0 = F.grid_sample( + torch.Tensor(inputs[0]).permute(2, 0, 1)[np.newaxis], vgrid[np.newaxis] + )[0].permute(1, 2, 0) + if self.order == 0: + target_0 = F.grid_sample( + torch.Tensor(target).permute(2, 0, 1)[np.newaxis], + vgrid[np.newaxis], + mode="nearest", + )[0].permute(1, 2, 0) + else: + target_0 = F.grid_sample( + torch.Tensor(target).permute(2, 0, 1)[np.newaxis], vgrid[np.newaxis] + )[0].permute(1, 2, 0) + + mask_0 = target[:, :, 2:3].copy() + mask_0[mask_0 == 0] = np.nan + if self.order == 0: + mask_0 = F.grid_sample( + torch.Tensor(mask_0).permute(2, 0, 1)[np.newaxis], + vgrid[np.newaxis], + mode="nearest", + )[0].permute(1, 2, 0) + else: + mask_0 = F.grid_sample( + torch.Tensor(mask_0).permute(2, 0, 1)[np.newaxis], vgrid[np.newaxis] + )[0].permute(1, 2, 0) + mask_0[torch.isnan(mask_0)] = 0 + + vgrid = self.grid_transform(meshgrid, transmat1, gridsize=[float(h), float(w)]) + inputs_1 = F.grid_sample( + torch.Tensor(inputs[1]).permute(2, 0, 1)[np.newaxis], vgrid[np.newaxis] + )[0].permute(1, 2, 0) + + # flow + pos = target_0[:, :, :2] + self.grid_transform( + meshgrid, transmat0, normalize=False + ) + pos = self.grid_transform(pos.permute(2, 0, 1), transmat1_inv, normalize=False) + if target_0.shape[2] >= 4: + # scale + exp = target_0[:, :, 3:] * scale1 / scale0 + target = torch.cat( + [ + (pos[:, :, 0] - meshgrid[0]).unsqueeze(-1), + (pos[:, :, 1] - meshgrid[1]).unsqueeze(-1), + mask_0, + exp, + ], + -1, + ) + else: + target = torch.cat( + [ + (pos[:, :, 0] - meshgrid[0]).unsqueeze(-1), + (pos[:, :, 1] - meshgrid[1]).unsqueeze(-1), + mask_0, + ], + -1, + ) + # target_0[:,:,2].unsqueeze(-1) ], -1) + inputs = [np.asarray(inputs_0), np.asarray(inputs_1)] + target = np.asarray(target) + + return inputs[0], inputs[1], target diff --git a/ezflow/models/dicl.py b/ezflow/models/dicl.py index 5a48c96e..418f6840 100644 --- a/ezflow/models/dicl.py +++ b/ezflow/models/dicl.py @@ -250,8 +250,9 @@ def forward(self, img1, img2): Returns ------- - torch.Tensor - Flow from img1 to img2 + :class:`dict` + torch.Tensor : intermediate flow predications from img1 to img2 + torch.Tensor : if model is in eval state, return upsampled flow """ _, x2, x3, x4, x5, x6 = self.feature_net(img1) @@ -313,10 +314,10 @@ def forward(self, img1, img2): self.scale_contexts[0], ) + output = {"flow_preds": [flow2, flow3, flow4, flow5, flow6]} if self.training: - if self.cfg.SUP_RAW_FLOW: - return ( + output["flow_preds"] = [ flow2, raw_flow2, flow3, @@ -327,12 +328,14 @@ def forward(self, img1, img2): raw_flow5, flow6, raw_flow6, - ) + ] - return (flow2, flow3, flow4, flow5, flow6) + return output - else: - _, _, height, width = img1.size() - return F.interpolate( - flow2, (height, width), mode="bilinear", align_corners=True - ) + _, _, height, width = img1.size() + flow_up = F.interpolate( + flow2, (height, width), mode="bilinear", align_corners=True + ) + + output["flow_upsampled"] = flow_up + return output diff --git a/ezflow/models/flownet_c.py b/ezflow/models/flownet_c.py index dcfa9c5c..d8252ee6 100644 --- a/ezflow/models/flownet_c.py +++ b/ezflow/models/flownet_c.py @@ -6,7 +6,7 @@ from ..decoder import build_decoder from ..encoder import BasicConvEncoder, build_encoder from ..modules import BaseModule, conv -from ..similarity import CorrelationLayer +from ..similarity import IterSpatialCorrelationSampler as SpatialCorrelationSampler from .build import MODEL_REGISTRY @@ -32,10 +32,13 @@ def __init__(self, cfg): self.feature_encoder = build_encoder(cfg.ENCODER) - self.correlation_layer = CorrelationLayer( - pad_size=cfg.SIMILARITY.PAD_SIZE, - max_displacement=cfg.SIMILARITY.MAX_DISPLACEMENT, + self.correlation_layer = SpatialCorrelationSampler( + kernel_size=1, + patch_size=2 * cfg.SIMILARITY.MAX_DISPLACEMENT + 1, + padding=cfg.SIMILARITY.PAD_SIZE, + dilation_patch=2, ) + self.corr_activation = nn.LeakyReLU(negative_slope=0.1, inplace=True) self.conv_redirect = conv( @@ -70,8 +73,9 @@ def forward(self, img1, img2): Returns ------- - torch.Tensor - Flow from img1 to img2 + :class:`dict` + torch.Tensor : intermediate flow predications from img1 to img2 + torch.Tensor : if model is in eval state, return upsampled flow """ H, W = img1.shape[-2:] @@ -80,6 +84,9 @@ def forward(self, img1, img2): conv_outputs2 = self.feature_encoder(img2) corr_output = self.correlation_layer(conv_outputs1[-1], conv_outputs2[-1]) + corr_output = corr_output.view( + corr_output.shape[0], -1, corr_output.shape[3], corr_output.shape[4] + ) corr_output = self.corr_activation(corr_output) # Redirect final feature output of img1 @@ -93,22 +100,18 @@ def forward(self, img1, img2): conv_outputs = [conv_outputs1[0], conv_outputs1[1]] + conv_outputs flow_preds = self.decoder(conv_outputs) - flow_preds.reverse() + + output = {"flow_preds": flow_preds} if self.training: - return flow_preds + return output - else: + flow_up = flow_preds[-1] - flow = flow_preds[0] + flow_up = F.interpolate( + flow_up, size=(H, W), mode="bilinear", align_corners=False + ) - if self.cfg.INTERPOLATE_FLOW: - H_, W_ = flow.shape[-2:] - flow = F.interpolate( - flow, img1.shape[-2:], mode="bilinear", align_corners=True - ) - flow_u = flow[:, 0, :, :] * (W / W_) - flow_v = flow[:, 1, :, :] * (H / H_) - flow = torch.stack([flow_u, flow_v], dim=1) + output["flow_upsampled"] = flow_up - return flow + return output diff --git a/ezflow/models/flownet_s.py b/ezflow/models/flownet_s.py index b0208be5..50418b3a 100644 --- a/ezflow/models/flownet_s.py +++ b/ezflow/models/flownet_s.py @@ -52,8 +52,9 @@ def forward(self, img1, img2): Returns ------- - torch.Tensor - Flow from img1 to img2 + :class:`dict` + torch.Tensor : intermediate flow predications from img1 to img2 + torch.Tensor : if model is in eval state, return upsampled flow """ H, W = img1.shape[-2:] @@ -65,20 +66,20 @@ def forward(self, img1, img2): flow_preds = self.decoder(conv_outputs) flow_preds.reverse() - if self.training: - return flow_preds + output = {"flow_preds": flow_preds} - else: + if self.training: + return output - flow = flow_preds[0] + flow = flow_preds[0] - if self.cfg.INTERPOLATE_FLOW: - H_, W_ = flow.shape[-2:] - flow = F.interpolate( - flow, img1.shape[-2:], mode="bilinear", align_corners=True - ) - flow_u = flow[:, 0, :, :] * (W / W_) - flow_v = flow[:, 1, :, :] * (H / H_) - flow = torch.stack([flow_u, flow_v], dim=1) + H_, W_ = flow.shape[-2:] + flow = F.interpolate( + flow, img1.shape[-2:], mode="bilinear", align_corners=False + ) + flow_u = flow[:, 0, :, :] * (W / W_) + flow_v = flow[:, 1, :, :] * (H / H_) + flow = torch.stack([flow_u, flow_v], dim=1) - return flow + output["flow_upsampled"] = flow + return output diff --git a/ezflow/models/predictor.py b/ezflow/models/predictor.py index f073aba1..aed5738a 100644 --- a/ezflow/models/predictor.py +++ b/ezflow/models/predictor.py @@ -105,8 +105,7 @@ def __call__(self, img1, img2): padder = InputPadder(img1.shape, divisor=self.pad_divisor) img1, img2 = padder.pad(img1, img2) - flow_pred = self.model(img1, img2) - flow_pred = padder.unpad(flow_pred) + output = self.model(img1, img2) + flow_pred = padder.unpad(output["flow_upsampled"]) flow_pred = flow_pred * self.flow_scale - return flow_pred diff --git a/ezflow/models/pwcnet.py b/ezflow/models/pwcnet.py index 27d6b8b5..975289ea 100644 --- a/ezflow/models/pwcnet.py +++ b/ezflow/models/pwcnet.py @@ -2,11 +2,9 @@ import torch.nn as nn import torch.nn.functional as F -from ..decoder import ConvDecoder +from ..decoder import ContextNetwork, build_decoder from ..encoder import build_encoder -from ..modules import BaseModule, conv, deconv -from ..similarity import CorrelationLayer -from ..utils import warp +from ..modules import BaseModule from .build import MODEL_REGISTRY @@ -27,105 +25,17 @@ def __init__(self, cfg): self.cfg = cfg self.encoder = build_encoder(cfg.ENCODER) - self.correlation_layer = CorrelationLayer( - pad_size=cfg.SIMILARITY.PAD_SIZE, - max_displacement=cfg.SIMILARITY.MAX_DISPLACEMENT, - ) - search_range = (2 * cfg.SIMILARITY.MAX_DISPLACEMENT + 1) ** 2 - - self.decoder_layers = nn.ModuleList() - decoder_cfg = cfg.DECODER.CONFIG - - self.up_feature_layers = nn.ModuleList() - - for i in range(len(decoder_cfg)): - - if i == 0: - concat_channels = search_range - else: - concat_channels = ( - search_range + decoder_cfg[i] + cfg.SIMILARITY.MAX_DISPLACEMENT - ) - - self.decoder_layers.append( - ConvDecoder( - config=decoder_cfg, - to_flow=True, - concat_channels=concat_channels, - ) - ) - - self.up_feature_layers.append( - deconv( - concat_channels + sum(decoder_cfg), - 2, - kernel_size=4, - stride=2, - padding=1, - ) - ) - - self.deconv_layers = nn.ModuleList() - for i in range(len(decoder_cfg)): - self.deconv_layers.append(deconv(2, 2, kernel_size=4, stride=2, padding=1)) - - self.dc_conv = nn.ModuleList( - [ - conv( - search_range - + cfg.SIMILARITY.MAX_DISPLACEMENT - + decoder_cfg[-1] - + sum(decoder_cfg), - 128, - kernel_size=3, - stride=1, - padding=1, - dilation=1, - ), - ] - ) - self.dc_conv.append( - conv( - decoder_cfg[0], - decoder_cfg[0], - kernel_size=3, - stride=1, - padding=2, - dilation=2, - ) - ) + self.decoder = build_decoder(cfg.DECODER) - padding = 4 - dilation = 4 - for i in range(len(decoder_cfg) - 2): - self.dc_conv.append( - conv( - decoder_cfg[i], - decoder_cfg[i + 1], - kernel_size=3, - stride=1, - padding=padding, - dilation=dilation, - ) - ) - padding *= 2 - dilation *= 2 - - self.dc_conv.append( - conv( - decoder_cfg[3], - decoder_cfg[4], - kernel_size=3, - stride=1, - padding=1, - dilation=1, - ) + search_range = (2 * cfg.DECODER.SIMILARITY.MAX_DISPLACEMENT + 1) ** 2 + self.context_net = ContextNetwork( + in_channels=search_range + + cfg.DECODER.SIMILARITY.MAX_DISPLACEMENT + + cfg.DECODER.CONFIG[-1] + + sum(cfg.DECODER.CONFIG), + config=cfg.DECODER.CONFIG, ) - self.dc_conv.append( - nn.Conv2d(32, 2, kernel_size=3, stride=1, padding=1, bias=True) - ) - self.dc_conv = nn.Sequential(*self.dc_conv) self._init_weights() @@ -137,12 +47,6 @@ def _init_weights(self): if m.bias is not None: m.bias.data.zero_() - def _corr_relu(self, features1, features2): - - corr = self.correlation_layer(features1, features2) - - return F.leaky_relu(corr, negative_slope=0.1) - def forward(self, img1, img2): """ Performs forward pass of the network @@ -156,8 +60,9 @@ def forward(self, img1, img2): Returns ------- - torch.Tensor - Flow from img1 to img2 + :class:`dict` + torch.Tensor : intermediate flow predications from img1 to img2 + torch.Tensor : if model is in eval state, return upsampled flow """ H, W = img1.shape[-2:] @@ -165,55 +70,24 @@ def forward(self, img1, img2): feature_pyramid1 = self.encoder(img1) feature_pyramid2 = self.encoder(img2) - up_flow, up_features = None, None - up_flow_scale = 0.625 - - flow_preds = [] - - for i in range(len(self.decoder_layers)): + feature_pyramid1.reverse() + feature_pyramid2.reverse() - if i == 0: - corr = self._corr_relu(feature_pyramid1[i], feature_pyramid2[i]) - concatenated_features = corr + flow_preds, features = self.decoder(feature_pyramid1, feature_pyramid2) - else: + flow_preds[-1] += self.context_net(features) - warped_features = warp(feature_pyramid2[i], up_flow * up_flow_scale) - up_flow_scale *= 2 - - corr = self._corr_relu(feature_pyramid1[i], warped_features) - - concatenated_features = torch.cat( - [corr, feature_pyramid1[i], up_flow, up_features], dim=1 - ) - - flow, features = self.decoder_layers[i](concatenated_features) - flow_preds.append(flow) - - up_flow = self.deconv_layers[i](flow) - up_features = self.up_feature_layers[i](features) - - flow_preds.reverse() - flow_preds[0] += self.dc_conv(features) + output = {"flow_preds": flow_preds} if self.training: - return flow_preds - - else: + return output - flow = flow_preds[0] + flow_up = flow_preds[-1] - if self.cfg.INTERPOLATE_FLOW: - - H_, W_ = flow.shape[-2:] - flow = F.interpolate( - flow, img1.shape[-2:], mode="bilinear", align_corners=True - ) - flow_u = flow[:, 0, :, :] * (W / W_) - flow_v = flow[:, 1, :, :] * (H / H_) - flow = torch.stack([flow_u, flow_v], dim=1) + flow_up = F.interpolate( + flow_up, size=(H, W), mode="bilinear", align_corners=False + ) - if self.cfg.FLOW_SCALE_FACTOR is not None: - flow *= self.cfg.FLOW_SCALE_FACTOR + output["flow_upsampled"] = flow_up - return flow + return output diff --git a/ezflow/models/raft.py b/ezflow/models/raft.py index 5efd19e1..240cfa75 100644 --- a/ezflow/models/raft.py +++ b/ezflow/models/raft.py @@ -80,13 +80,7 @@ def _upsample_flow(self, flow, mask): return up_flow.reshape(N, 2, 8 * H, 8 * W) - def forward( - self, - img1, - img2, - flow_init=None, - only_flow=True, - ): + def forward(self, img1, img2, flow_init=None): """ Performs forward pass of the network @@ -99,11 +93,10 @@ def forward( Returns ------- - torch.Tensor - Flow from img1 to img2 + :class:`dict` + torch.Tensor : intermediate flow predications from img1 to img2 + torch.Tensor : if model is in eval state, return upsampled flow """ - img1 = 2 * (img1 / 255.0) - 1.0 - img2 = 2 * (img2 / 255.0) - 1.0 img1 = img1.contiguous() img2 = img2.contiguous() @@ -149,11 +142,10 @@ def forward( flow_predictions.append(flow_up) - if not self.training: + output = {"flow_preds": flow_predictions} - if only_flow: - return flow_up + if self.training: + return output - return coords1 - coords0, flow_up - - return flow_predictions + output["flow_upsampled"] = flow_up + return output diff --git a/ezflow/models/vcn.py b/ezflow/models/vcn.py index 70aa2b5c..c3e4086f 100644 --- a/ezflow/models/vcn.py +++ b/ezflow/models/vcn.py @@ -315,12 +315,14 @@ def forward(self, img1, img2): flow_preds[i], [img1.shape[2], img1.shape[3]], mode="bilinear", - align_corners=True, + align_corners=False, ) flow_preds[i] = flow_preds[i] * scale scale *= 2 + output = {"flow_preds": flow_preds} if self.training: - return flow_preds + return output - return flow_preds[0] + output["flow_upsampled"] = flow_preds[0] + return output diff --git a/ezflow/modules/blocks.py b/ezflow/modules/blocks.py index c5b79a57..bbfcf569 100644 --- a/ezflow/modules/blocks.py +++ b/ezflow/modules/blocks.py @@ -68,6 +68,7 @@ def __init__( if stride != 1: norm3 = nn.Identity() + # Change required if activation.lower() == "leakyrelu": self.activation = nn.LeakyReLU(negative_slope=0.1, inplace=True) else: diff --git a/requirements.txt b/requirements.txt index 0b0b91b9..88be8516 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ absl-py==0.13.0 alabaster==0.7.12 -antlr4-python3-runtime==4.9 +antlr4-python3-runtime==4.8 appdirs==1.4.4 argh==0.26.2 arrow==0.15.1 diff --git a/tests/test_engine.py b/tests/test_engine.py index 4dd8ad16..47721434 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -228,9 +228,11 @@ def test_setup_training_with_custom_loss_fn( @mock.patch("ezflow.engine.trainer.SummaryWriter") @mock.patch("ezflow.engine.trainer.os") def test_epoch_trainer(self, mock_os, mock_writer, mock_save_model): - trainer = Trainer( - self.training_cfg, self.mock_model, self.train_loader, self.val_loader + cfg = get_training_cfg( + cfg_path="./tests/configs/custom_loss_trainer.yaml", custom=True ) + + trainer = Trainer(cfg, self.mock_model, self.train_loader, self.val_loader) trainer._trainer = Trainer._epoch_trainer trainer.train() @@ -241,7 +243,6 @@ def test_epoch_trainer(self, mock_os, mock_writer, mock_save_model): del trainer - cfg = self.training_cfg cfg.VALIDATE_ON = "loss" trainer = Trainer(cfg, self.mock_model, self.train_loader, self.val_loader) trainer._trainer = Trainer._epoch_trainer @@ -258,7 +259,9 @@ def test_epoch_trainer(self, mock_os, mock_writer, mock_save_model): @mock.patch("ezflow.engine.trainer.SummaryWriter") @mock.patch("ezflow.engine.trainer.os") def test_step_trainer(self, mock_os, mock_writer, mock_save_model): - cfg = self.training_cfg + cfg = get_training_cfg( + cfg_path="./tests/configs/custom_loss_trainer.yaml", custom=True + ) cfg.NUM_STEPS = 1 trainer = Trainer(cfg, self.mock_model, self.train_loader, self.val_loader) @@ -309,9 +312,11 @@ def test_validate_ddp_config(self, mock_os, mock_utils, mock_cuda_device_count): del trainer + @mock.patch.object(torch.distributed, "barrier") @mock.patch.object(torch.distributed, "init_process_group") @mock.patch.object(torch, "device", return_value=torch.device) @mock.patch.object(torch.cuda, "is_available", return_value=True) + @mock.patch.object(torch.cuda, "set_device") @mock.patch.object(torch.cuda, "empty_cache") @mock.patch.object(torch.cuda, "device_count", return_value=2) @mock.patch("ezflow.engine.trainer.os") @@ -320,9 +325,11 @@ def test_setup_ddp( mock_os, mock_device_count, mock_empty_cache, + mock_torch_cuda_set_device, mock_cuda_available, mock_torch_device, mock_init_process_group, + mock_dist_barrier, ): trainer = DistributedTrainer( self.training_cfg, @@ -332,7 +339,7 @@ def test_setup_ddp( ) trainer._setup_device(rank=0) - + mock_torch_cuda_set_device.assert_called_with(0) mock_torch_device.assert_called_with(0) assert trainer.local_rank == 0 @@ -347,6 +354,7 @@ def test_setup_ddp( del trainer @mock.patch.object(torch.distributed, "destroy_process_group") + @mock.patch.object(torch.distributed, "barrier") @mock.patch.object(DistributedTrainer, "_setup_model") @mock.patch.object(DistributedTrainer, "_setup_training") @mock.patch.object(DistributedTrainer, "_epoch_trainer") @@ -363,6 +371,7 @@ def test_main_worker( mock_trainer, mock_setup_training, mock_setup_model, + mock_dist_barrier, mock_dist_cleanup, ): trainer = DistributedTrainer( @@ -383,6 +392,7 @@ def test_main_worker( rank=0, loss_fn=None, optimizer=None, scheduler=None ) mock_trainer.assert_called_with(None, None) + mock_dist_barrier.assert_called() mock_dist_cleanup.assert_called() del trainer diff --git a/tests/test_functional.py b/tests/test_functional.py index 67be0773..b8c98fc2 100644 --- a/tests/test_functional.py +++ b/tests/test_functional.py @@ -42,30 +42,74 @@ def test_FlowAugmentor(): augmentor = FlowAugmentor( crop_size=(224, 224), - color_aug_params={"aug_prob": 1.0}, - eraser_aug_params={"aug_prob": 1.0}, - spatial_aug_params={ - "aug_prob": 1.0, - "h_flip_prob": 1.0, - "v_flip_prob": 1.0, - "stretch_prob": 1.0, + noise_aug_params={"enabled": True, "aug_prob": 1.0}, + eraser_aug_params={"enabled": True, "aug_prob": 1.0}, + color_aug_params={"enabled": True, "asymmetric_color_aug_prob": 1.0}, + flip_aug_params={"enabled": True, "h_flip_prob": 1.0, "v_flip_prob": 1.0}, + spatial_aug_params={"enabled": True, "aug_prob": 1.0, "stretch_prob": 1.0}, + advanced_spatial_aug_params={"enabled": False}, + ) + _ = augmentor(img1, img2, flow) + + augmentor = FlowAugmentor( + crop_size=(224, 224), + noise_aug_params={"enabled": True, "aug_prob": 1.0}, + eraser_aug_params={"enabled": True, "aug_prob": 1.0}, + color_aug_params={"enabled": True, "asymmetric_color_aug_prob": 0.0}, + flip_aug_params={"enabled": True, "h_flip_prob": 1.0, "v_flip_prob": 1.0}, + spatial_aug_params={"enabled": False}, + advanced_spatial_aug_params={ + "enabled": True, + "scale1": 0.3, + "scale2": 0.1, + "rotate": 0.4, + "translate": 0.4, + "stretch": 0.3, + "enable_out_of_boundary_crop": False, }, ) _ = augmentor(img1, img2, flow) augmentor = FlowAugmentor( crop_size=(224, 224), - color_aug_params={"aug_prob": 0.0}, - eraser_aug_params={"aug_prob": 0.0}, - spatial_aug_params={ - "aug_prob": 0.0, - "h_flip_prob": 0.0, - "v_flip_prob": 0.0, - "stretch_prob": 0.0, + noise_aug_params={"enabled": True, "aug_prob": 1.0}, + eraser_aug_params={"enabled": True, "aug_prob": 1.0}, + color_aug_params={"enabled": True, "asymmetric_color_aug_prob": 0.0}, + flip_aug_params={"enabled": True, "h_flip_prob": 1.0, "v_flip_prob": 1.0}, + spatial_aug_params={"enabled": False}, + advanced_spatial_aug_params={ + "enabled": True, + "scale1": 0.3, + "scale2": 0.1, + "rotate": 0.4, + "translate": 0.4, + "stretch": 0.3, + "enable_out_of_boundary_crop": True, }, ) _ = augmentor(img1, img2, flow) + augmentor = FlowAugmentor( + crop_size=(224, 224), + color_aug_params={"enabled": False}, + eraser_aug_params={"enabled": False}, + noise_aug_params={"enabled": False}, + flip_aug_params={"enabled": False}, + spatial_aug_params={"enabled": False}, + advanced_spatial_aug_params={"enabled": False}, + ) + _ = augmentor(img1, img2, flow) + + augmentor = FlowAugmentor( + crop_size=(224, 224), + noise_aug_params={"enabled": True, "aug_prob": 0.0}, + eraser_aug_params={"enabled": True, "aug_prob": 0.0}, + color_aug_params={"enabled": True, "asymmetric_color_aug_prob": 0.0}, + flip_aug_params={"enabled": True, "h_flip_prob": 0.0, "v_flip_prob": 0.0}, + spatial_aug_params={"enabled": True, "aug_prob": 0.0, "stretch_prob": 0.0}, + ) + _ = augmentor(img1, img2, flow) + del augmentor @@ -75,17 +119,17 @@ def test_SparseFlowAugmentor(): augmentor = SparseFlowAugmentor( crop_size=(224, 224), - color_aug_params={"aug_prob": 1.0}, - eraser_aug_params={"aug_prob": 1.0}, - spatial_aug_params={"aug_prob": 1.0, "h_flip_prob": 1.0}, + color_aug_params={"enabled": True, "asymmetric_color_aug_prob": 1.0}, + eraser_aug_params={"enabled": True, "aug_prob": 1.0}, + spatial_aug_params={"enabled": True, "aug_prob": 1.0, "h_flip_prob": 1.0}, ) _ = augmentor(img1, img2, flow, valid) augmentor = SparseFlowAugmentor( crop_size=(224, 224), - color_aug_params={"aug_prob": 0.0}, - eraser_aug_params={"aug_prob": 0.0}, - spatial_aug_params={"aug_prob": 0.0, "h_flip_prob": 0.0}, + color_aug_params={"enabled": True, "asymmetric_color_aug_prob": 0.0}, + eraser_aug_params={"enabled": True, "aug_prob": 0.0}, + spatial_aug_params={"enabled": True, "aug_prob": 0.0, "h_flip_prob": 0.0}, ) _ = augmentor(img1, img2, flow, valid) @@ -104,61 +148,40 @@ def test_SequenceLoss(): def test_MultiScaleLoss(): - loss_fn = MultiScaleLoss() + loss_fn = MultiScaleLoss(norm="l1") _ = loss_fn(flow_pred, flow_gt) del loss_fn valid_mask = torch.randn(4, 1, 256, 256) flow_target = torch.cat([flow_gt, valid_mask], dim=1) - loss_fn = MultiScaleLoss() + loss_fn = MultiScaleLoss(norm="l1") _ = loss_fn(flow_pred, flow_target) del loss_fn + loss_fn = MultiScaleLoss(norm="l2") + _ = loss_fn(flow_pred, flow_gt) + del loss_fn -def test_Augmentor(): - augmentor = FlowAugmentor( - crop_size=(224, 224), - color_aug_params={"aug_prob": 1.0}, - eraser_aug_params={"aug_prob": 1.0}, - spatial_aug_params={ - "aug_prob": 1.0, - "h_flip_prob": 1.0, - "v_flip_prob": 1.0, - "stretch_prob": 1.0, - }, - translate_params={ - "aug_prob": 1.0, - "translate": 20, - }, - rotate_params={ - "aug_prob": 1.0, - "degrees": 20, - "delta": 5, - }, - ) - _ = augmentor(img1, img2, flow) + loss_fn = MultiScaleLoss(norm="robust") + _ = loss_fn(flow_pred, flow_gt) + del loss_fn - augmentor = FlowAugmentor( - crop_size=(224, 224), - color_aug_params={"aug_prob": 0.0}, - eraser_aug_params={"aug_prob": 0.0}, - spatial_aug_params={ - "aug_prob": 0.0, - "h_flip_prob": 0.0, - "v_flip_prob": 0.0, - "stretch_prob": 0.0, - }, - translate_params={ - "aug_prob": 0.0, - }, - rotate_params={ - "aug_prob": 0.0, - }, - ) - _ = augmentor(img1, img2, flow) + loss_fn = MultiScaleLoss(resize_flow="upsample") + _ = loss_fn(flow_pred, flow_gt) + del loss_fn - del augmentor + loss_fn = MultiScaleLoss(resize_flow="downsample") + _ = loss_fn(flow_pred, flow_gt) + del loss_fn + + loss_fn = MultiScaleLoss(average="mean") + _ = loss_fn(flow_pred, flow_gt) + del loss_fn + + loss_fn = MultiScaleLoss(average="sum") + _ = loss_fn(flow_pred, flow_gt) + del loss_fn def test_normalize(): diff --git a/tests/test_models.py b/tests/test_models.py index cce6c876..7d3e5590 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -25,15 +25,17 @@ def test_Predictor(): def test_RAFT(): model = build_model("RAFT", "raft.yaml") - flow_preds = model(img1, img2) - assert isinstance(flow_preds, tuple) or isinstance(flow_preds, list) + output = model(img1, img2) + assert isinstance(output, dict) + assert isinstance(output["flow_preds"], tuple) or isinstance( + output["flow_preds"], list + ) model.eval() - _ = model(img1, img2, only_flow=False) - flow = model(img1, img2) - assert flow.shape == (2, 2, 256, 256) + output = model(img1, img2) + assert output["flow_upsampled"].shape == (2, 2, 256, 256) - del model, flow, flow_preds + del model, output _ = build_model("RAFT", default=True) @@ -41,14 +43,17 @@ def test_RAFT(): def test_DICL(): model = build_model("DICL", "dicl.yaml") - flow_preds = model(img1, img2) - assert isinstance(flow_preds, tuple) or isinstance(flow_preds, list) + output = model(img1, img2) + assert isinstance(output, dict) + assert isinstance(output["flow_preds"], tuple) or isinstance( + output["flow_preds"], list + ) model.eval() - flow = model(img1, img2) - assert flow.shape == (2, 2, 256, 256) + output = model(img1, img2) + assert output["flow_upsampled"].shape == (2, 2, 256, 256) - del model, flow, flow_preds + del model, output _ = build_model("DICL", default=True) @@ -56,14 +61,17 @@ def test_DICL(): def test_PWCNet(): model = build_model("PWCNet", "pwcnet.yaml") - flow_preds = model(img1, img2) - assert isinstance(flow_preds, tuple) or isinstance(flow_preds, list) + output = model(img1, img2) + assert isinstance(output, dict) + assert isinstance(output["flow_preds"], tuple) or isinstance( + output["flow_preds"], list + ) model.eval() - flow = model(img1, img2) - assert flow.shape == (2, 2, 256, 256) + output = model(img1, img2) + assert output["flow_upsampled"].shape == (2, 2, 256, 256) - del model, flow, flow_preds + del model, output _ = build_model("PWCNet", default=True) @@ -71,14 +79,17 @@ def test_PWCNet(): def test_FlowNetS(): model = build_model("FlowNetS", "flownet_s.yaml") - flow_preds = model(img1, img2) - assert isinstance(flow_preds, tuple) or isinstance(flow_preds, list) + output = model(img1, img2) + assert isinstance(output, dict) + assert isinstance(output["flow_preds"], tuple) or isinstance( + output["flow_preds"], list + ) model.eval() - flow = model(img1, img2) - assert flow.shape == (2, 2, 256, 256) + output = model(img1, img2) + assert output["flow_upsampled"].shape == (2, 2, 256, 256) - del model, flow, flow_preds + del model, output _ = build_model("FlowNetS", default=True) @@ -86,14 +97,17 @@ def test_FlowNetS(): def test_FlowNetC(): model = build_model("FlowNetC", "flownet_c.yaml") - flow_preds = model(img1, img2) - assert isinstance(flow_preds, tuple) or isinstance(flow_preds, list) + output = model(img1, img2) + assert isinstance(output, dict) + assert isinstance(output["flow_preds"], tuple) or isinstance( + output["flow_preds"], list + ) model.eval() - flow = model(img1, img2) - assert flow.shape == (2, 2, 256, 256) + output = model(img1, img2) + assert output["flow_upsampled"].shape == (2, 2, 256, 256) - del model, flow, flow_preds + del model, output _ = build_model("FlowNetC", default=True) @@ -104,12 +118,14 @@ def test_VCN(): img = torch.randn(16, 3, 256, 256) - flow_preds = model(img, img) - assert isinstance(flow_preds, tuple) or isinstance(flow_preds, list) - assert flow_preds[0].shape == (16, 2, 256, 256) + output = model(img, img) + assert isinstance(output, dict) + assert isinstance(output["flow_preds"], tuple) or isinstance( + output["flow_preds"], list + ) model.eval() - flow = model(img, img) - assert flow.shape == (16, 2, 256, 256) + output = model(img, img) + assert output["flow_upsampled"].shape == (16, 2, 256, 256) - del model, flow, flow_preds + del model, output diff --git a/tests/utils/mock_model.py b/tests/utils/mock_model.py index 70203126..77dd2f9a 100644 --- a/tests/utils/mock_model.py +++ b/tests/utils/mock_model.py @@ -1,4 +1,5 @@ import torch +import torch.nn.functional as F from torch import nn from ezflow.modules import BaseModule @@ -13,5 +14,10 @@ def __init__(self, img_channels): def forward(self, img1, img2): x = torch.cat([img1, img2], dim=-3) + mock_flow_prediction = self.model(x) - return self.model(x) + flow_up = F.interpolate( + mock_flow_prediction, img1.shape[-2:], mode="bilinear", align_corners=True + ) + output = {"flow_preds": [mock_flow_prediction], "flow_upsampled": flow_up} + return output diff --git a/tools/train.py b/tools/train.py index db727d1c..0af63ca7 100644 --- a/tools/train.py +++ b/tools/train.py @@ -1,5 +1,5 @@ from ezflow.data import DataloaderCreator -from ezflow.engine import DistributedTrainer, Trainer, get_training_cfg +from ezflow.engine import DistributedTrainer, Trainer, get_cfg from ezflow.models import build_model @@ -7,32 +7,70 @@ def main(args): # Load training configuration - cfg = get_training_cfg(args.train_cfg) + cfg = get_cfg(args.train_cfg) if args.device: cfg.DEVICE = args.device + cfg.DATA.TRAIN_DATASET.ROOT_DIR = args.train_data_dir + cfg.DATA.VAL_DATASET.ROOT_DIR = args.val_data_dir + + if args.n_steps is not None: + cfg.NUM_STEPS = args.n_steps + + if cfg.SCHEDULER.NAME == "OneCycleLR": + cfg.SCHEDULER.PARAMS.total_steps = cfg.NUM_STEPS + # Create dataloaders - aug_params = None + train_aug_params = None + val_aug_params = None if cfg.DATA.AUGMENTATION.USE and cfg.DATA.AUGMENTATION.PARAMS: - aug_params = cfg.DATA.AUGMENTATION.PARAMS.to_dict() + train_aug_params = cfg.DATA.AUGMENTATION.PARAMS.TRAINING.to_dict() + val_aug_params = cfg.DATA.AUGMENTATION.PARAMS.VALIDATION.to_dict() train_loader_creator = DataloaderCreator( - cfg.DATA.BATCH_SIZE, num_workers=cfg.NUM_WORKERS - ) - train_loader_creator.add_FlyingChairs( - root_dir=args.data_dir, augment=cfg.DATA.AUGMENTATION.USE, aug_params=aug_params + batch_size=cfg.DATA.BATCH_SIZE, + num_workers=cfg.DATA.NUM_WORKERS, + pin_memory=cfg.DATA.PIN_MEMORY, + distributed=cfg.DISTRIBUTED.USE, + world_size=cfg.DISTRIBUTED.WORLD_SIZE, + append_valid_mask=cfg.DATA.APPEND_VALID_MASK, + shuffle=cfg.DATA.SHUFFLE, ) val_loader_creator = DataloaderCreator( - cfg.DATA.BATCH_SIZE, num_workers=cfg.NUM_WORKERS + batch_size=cfg.DATA.BATCH_SIZE, + num_workers=cfg.DATA.NUM_WORKERS, + pin_memory=cfg.DATA.PIN_MEMORY, + distributed=cfg.DISTRIBUTED.USE, + world_size=cfg.DISTRIBUTED.WORLD_SIZE, + append_valid_mask=cfg.DATA.APPEND_VALID_MASK, + shuffle=cfg.DATA.SHUFFLE, ) - val_loader_creator.add_FlyingChairs( - root_dir=args.data_dir, - split="validation", + + # TODO: Create a Dataloader Registry + train_loader_creator.add_FlyingChairs( + root_dir=cfg.DATA.TRAIN_DATASET.ROOT_DIR, + crop=True, + crop_type="random", + crop_size=cfg.DATA.TRAIN_CROP_SIZE, augment=cfg.DATA.AUGMENTATION.USE, - aug_params=aug_params, + aug_params=train_aug_params, + norm_params=cfg.DATA.NORM_PARAMS, + ) + + val_loader_creator.add_FlyingChairs( + val_loader_creator.add_FlyingChairs( + root_dir=cfg.DATA.VAL_DATASET.ROOT_DIR, + split="validation", + crop=True, + crop_type="center", + crop_size=cfg.DATA.VAL_CROP_SIZE, + augment=cfg.DATA.AUGMENTATION.USE, + aug_params=val_aug_params, + norm_params=cfg.DATA.NORM_PARAMS, + ) ) # Build model @@ -40,8 +78,7 @@ def main(args): model = build_model(args.model, default=True) # Create trainer - - if training_cfg.DISTRIBUTED.USE is True: + if cfg.DISTRIBUTED.USE is True: trainer = DistributedTrainer( cfg, model, @@ -74,32 +111,29 @@ def main(args): help="Path to the training configuration file", ) parser.add_argument( - "--data_dir", type=str, required=True, help="Path to the root data directory" + "--train_data_dir", + type=str, + required=True, + help="Path to the root data directory", + ) + parser.add_argument( + "--val_data_dir", + type=str, + required=True, + help="Path to the root data directory", ) parser.add_argument( "--model", type=str, required=True, help="Name of the model to train" ) parser.add_argument( - "--n_epochs", type=int, default=None, help="Number of epochs to train" + "--n_steps", type=int, default=None, help="Number of iterations to train" ) parser.add_argument( "--device", type=str, - default=None, + default="0", help="Device(s) to train on separated by commas. -1 for CPU", ) - parser.add_argument( - "--distributed", - type=bool, - default=False, - help="Whether to do distributed training", - ) - parser.add_argument( - "--distributed_backend", - type=str, - default="nccl", - help="Backend to use for distributed computing", - ) args = parser.parse_args()