diff --git a/README.md b/README.md
index 2461f75d..cbb8594c 100644
--- a/README.md
+++ b/README.md
@@ -56,6 +56,7 @@ pip install ezflow
 - [x] [FlyingChairs](https://lmb.informatik.uni-freiburg.de/resources/datasets/FlyingChairs.en.html#flyingchairs)
 - [x] [HD1K](http://hci-benchmark.iwr.uni-heidelberg.de/)
 - [x] [KITTI](http://www.cvlibs.net/datasets/kitti/eval_scene_flow.php?benchmark=flow)
+- [x] [Kubric](https://github.com/google-research/kubric)
 - [x] [MPI Sintel](http://sintel.is.tue.mpg.de/)
 - [x] [SceneFlow Monkaa](https://lmb.informatik.uni-freiburg.de/resources/datasets/SceneFlowDatasets.en.html)
 - [x] [SceneFlow Driving](https://lmb.informatik.uni-freiburg.de/resources/datasets/SceneFlowDatasets.en.html)
diff --git a/configs/models/flownet_c.yaml b/configs/models/flownet_c.yaml
index 3849a4f6..895fcec8 100644
--- a/configs/models/flownet_c.yaml
+++ b/configs/models/flownet_c.yaml
@@ -5,11 +5,10 @@ ENCODER:
   CONFIG: [64, 128, 256, 256, 512, 512, 512, 512, 1024, 1024]
   NORM: batch
 SIMILARITY:
-  NAME: CorrelationLayer
-  PAD_SIZE: 10
+  NAME: IterSpatialCorrelationSampler
+  PAD_SIZE: 0
   MAX_DISPLACEMENT: 10
 DECODER:
   NAME: FlowNetConvDecoder
   IN_CHANNELS: 1024
-  CONFIG: [512, 256, 128, 64]
-INTERPOLATE_FLOW: True
+  CONFIG: [512, 256, 128, 64]
\ No newline at end of file
diff --git a/configs/models/pwcnet.yaml b/configs/models/pwcnet.yaml
index 5f16940b..4a90eb54 100644
--- a/configs/models/pwcnet.yaml
+++ b/configs/models/pwcnet.yaml
@@ -3,13 +3,11 @@ ENCODER:
   NAME: PyramidEncoder
   IN_CHANNELS: 3
   CONFIG: [16, 32, 64, 96, 128, 196]
-SIMILARITY:
-  NAME: CorrelationLayer
-  PAD_SIZE: 4
-  MAX_DISPLACEMENT: 4
 DECODER:
-  NAME: ConvDecoder
+  NAME: PyramidDecoder
   CONFIG: [128, 128, 96, 64, 32]
   TO_FLOW: True
-INTERPOLATE_FLOW: True
-FLOW_SCALE_FACTOR: 20.0
\ No newline at end of file
+  SIMILARITY:
+    PAD_SIZE: 0
+    MAX_DISPLACEMENT: 4
+  FLOW_SCALE_FACTOR: 20.0
\ No newline at end of file
diff --git a/configs/models/raft.yaml b/configs/models/raft.yaml
index c2602334..232d089c 100644
--- a/configs/models/raft.yaml
+++ b/configs/models/raft.yaml
@@ -6,7 +6,7 @@ ENCODER:
     OUT_CHANNELS: 256
     NORM: instance
     P_DROPOUT: 0.0
-    LAYER_CONFIG: [32, 64, 96]
+    LAYER_CONFIG: [64, 96, 128]
     INTERMEDIATE_FEATURES: False
   CONTEXT:
     NAME: BasicEncoder
@@ -14,7 +14,7 @@ ENCODER:
     OUT_CHANNELS: 256
     NORM: batch
     P_DROPOUT: 0.0
-    LAYER_CONFIG: [32, 64, 96]
+    LAYER_CONFIG: [64, 96, 128]
     INTERMEDIATE_FEATURES: False
 HIDDEN_DIM: 128
 CONTEXT_DIM: 128
diff --git a/configs/models/raft_small.yaml b/configs/models/raft_small.yaml
index 0dcb6781..74784331 100644
--- a/configs/models/raft_small.yaml
+++ b/configs/models/raft_small.yaml
@@ -1,10 +1,9 @@
-_BASE_: "../base.yaml"
-NAME: RAFT_SMALL
+NAME: RAFT
 ENCODER:
   FEATURE: 
-    NAME: BottleneckEncoder
+    NAME: BasicEncoder
     IN_CHANNELS: 3
-    OUT_CHANNELS: 256
+    OUT_CHANNELS: 128
     NORM: instance
     P_DROPOUT: 0.0
     LAYER_CONFIG: [32, 64, 96]
@@ -12,11 +11,20 @@ ENCODER:
   CONTEXT:
     NAME: BasicEncoder
     IN_CHANNELS: 3
-    OUT_CHANNELS: 256
+    OUT_CHANNELS: 160
     NORM: batch
     P_DROPOUT: 0.0
     LAYER_CONFIG: [32, 64, 96]
     INTERMEDIATE_FEATURES: False
 HIDDEN_DIM: 96
 CONTEXT_DIM: 64
-CORR_RADIUS: 3
+SIMILARITY:
+  NAME: MutliScalePairwise4DCorr
+  NUM_LEVELS: 4
+DECODER:
+  NAME: RecurrentLookupUpdateBlock
+  INPUT_DIM: 96
+CORR_RADIUS: 4
+CORR_LEVELS: 3
+MIXED_PRECISION: False
+UPDATE_ITERS: 12
\ No newline at end of file
diff --git a/configs/trainers/_base_/chairs_baseline.yaml b/configs/trainers/_base_/chairs_baseline.yaml
new file mode 100644
index 00000000..1e2e88c5
--- /dev/null
+++ b/configs/trainers/_base_/chairs_baseline.yaml
@@ -0,0 +1,89 @@
+DATA:
+  TRAIN_DATASET:
+    NAME: "flyingchairs"
+    ROOT_DIR: "./Datasets/FlyingChairs_release/data"
+  VAL_DATASET:
+    NAME: "flyingchairs"
+    ROOT_DIR: "./Datasets/FlyingChairs_release/data" 
+  NUM_WORKERS: 4
+  PIN_MEMORY: True
+  APPEND_VALID_MASK: False
+  SHUFFLE: True
+  AUGMENTATION:
+    # Augmentation Settings borrowed from RAFT
+    USE: True
+    PARAMS:
+      TRAINING:
+        COLOR_AUG_PARAMS: {
+          "enabled": True,
+          "asymmetric_color_aug_prob": 0.2, 
+          "brightness": 0.4, 
+          "contrast": 0.4, 
+          "saturation": 0.4, 
+          "hue": 0.15915494309189535
+        }
+        ERASER_AUG_PARAMS: {
+          "enabled": True,
+          "aug_prob": 0.5,
+          "bounds": [50, 100]
+        }
+        NOISE_AUG_PARAMS: {
+          "enabled": False,
+          "aug_prob": 0.5,
+          "noise_std_range": 0.06 
+        }
+        FLIP_AUG_PARAMS: {
+          "enabled": True, 
+          "h_flip_prob": 0.5, 
+          "v_flip_prob": 0.1
+        }
+        SPATIAL_AUG_PARAMS: {
+          "enabled": True,
+          "aug_prob": 0.8, 
+          "stretch_prob": 0.8, 
+          "min_scale": -0.1, 
+          "max_scale": 1.0, 
+          "max_stretch": 0.2, 
+        }
+        ADVANCED_SPATIAL_AUG_PARAMS: {
+          "enabled": False,
+          "scale1": 0.0,
+          "scale2": 0.0,
+          "stretch": 0.0,
+          "rotate": 0.0,
+          "translate": 0.0,
+          "enable_out_of_boundary_crop": False
+        }
+      VALIDATION:
+        SPATIAL_AUG_PARAMS: {"enabled": False}
+        COLOR_AUG_PARAMS: {"enabled": False}
+        ERASER_AUG_PARAMS: {"enabled": False}
+        FLIP_AUG_PARAMS: {"enabled": False}
+        ADVANCED_SPATIAL_AUG_PARAMS : {"enabled": False}
+OPTIMIZER:
+  NAME: AdamW
+  LR: 0.0004
+  PARAMS:
+    weight_decay: 0.0001
+    betas: [0.9, 0.999]
+    eps: 1.e-08
+    amsgrad: False
+GRAD_CLIP: 
+  USE: True
+  VALUE: 1.0
+FREEZE_BATCH_NORM: False
+TARGET_SCALE_FACTOR: 1.0
+MIXED_PRECISION: False
+DEVICE: "0"
+DISTRIBUTED:
+  USE: False
+  WORLD_SIZE: 2
+  BACKEND: nccl
+  MASTER_ADDR: localhost
+  MASTER_PORT: "12355"
+EPOCHS: null
+NUM_STEPS: null
+RESUME_TRAINING:
+  CONSOLIDATED_CKPT: null
+  EPOCHS: null
+  START_EPOCH: null
\ No newline at end of file
diff --git a/configs/trainers/_base_/kubric_baseline.yaml b/configs/trainers/_base_/kubric_baseline.yaml
new file mode 100644
index 00000000..0f849cca
--- /dev/null
+++ b/configs/trainers/_base_/kubric_baseline.yaml
@@ -0,0 +1,87 @@
+DATA:
+  TRAIN_DATASET:
+    NAME: "kubric"
+    ROOT_DIR: "./Datasets/KubricFlow"
+  VAL_DATASET:
+    NAME: "kubric"
+    ROOT_DIR: "./Datasets/KubricFlow" 
+  NUM_WORKERS: 4
+  PIN_MEMORY: True
+  APPEND_VALID_MASK: False
+  SHUFFLE: True
+  AUGMENTATION:
+    USE: True
+    PARAMS:
+      TRAINING:
+        COLOR_AUG_PARAMS: {
+          "enabled": True,
+          "asymmetric_color_aug_prob": 0.2, 
+          "brightness": 0.4, 
+          "contrast": 0.4, 
+          "saturation": 0.4, 
+          "hue": 0.15915494309189535
+        }
+        ERASER_AUG_PARAMS: {
+          "enabled": True,
+          "aug_prob": 0.5,
+          "bounds": [50, 100]
+        }
+        NOISE_AUG_PARAMS: {
+          "enabled": False,
+          "aug_prob": 0.5,
+          "noise_std_range": 0.06 
+        }
+        FLIP_AUG_PARAMS: {
+          "enabled": True, 
+          "h_flip_prob": 0.5, 
+          "v_flip_prob": 0.1
+        }
+        SPATIAL_AUG_PARAMS: {
+          "enabled": True,
+          "aug_prob": 0.8, 
+          "stretch_prob": 0.8, 
+          "min_scale": -0.1, 
+          "max_scale": 1.0, 
+          "max_stretch": 0.2, 
+        }
+        ADVANCED_SPATIAL_AUG_PARAMS: {
+          "enabled": False,
+          "scale1": 0.0,
+          "scale2": 0.0,
+          "stretch": 0.0,
+          "rotate": 0.0,
+          "translate": 0.0,
+          "enable_out_of_boundary_crop": False
+        }
+      VALIDATION:
+        SPATIAL_AUG_PARAMS: {"enabled": False}
+        COLOR_AUG_PARAMS: {"enabled": False}
+        ERASER_AUG_PARAMS: {"enabled": False}
+        FLIP_AUG_PARAMS: {"enabled": False}
+        ADVANCED_SPATIAL_AUG_PARAMS : {"enabled": False}
+OPTIMIZER:
+  NAME: AdamW
+  LR: 0.0004
+  PARAMS:
+    weight_decay: 0.0001
+    betas: [0.9, 0.999]
+    eps: 1.e-08
+    amsgrad: False
+GRAD_CLIP: 
+  USE: True
+  VALUE: 1.0
+TARGET_SCALE_FACTOR: 1.0
+MIXED_PRECISION: False
+DEVICE: "0"
+DISTRIBUTED:
+  USE: False
+  WORLD_SIZE: 2
+  BACKEND: nccl
+  MASTER_ADDR: localhost
+  MASTER_PORT: "12355"
+EPOCHS: null
+NUM_STEPS: null
+RESUME_TRAINING:
+  CONSOLIDATED_CKPT: null
+  EPOCHS: null
+  START_EPOCH: null
\ No newline at end of file
diff --git a/configs/trainers/_base_/kubric_improved_aug.yaml b/configs/trainers/_base_/kubric_improved_aug.yaml
new file mode 100644
index 00000000..ca03ffc5
--- /dev/null
+++ b/configs/trainers/_base_/kubric_improved_aug.yaml
@@ -0,0 +1,89 @@
+DATA:
+  TRAIN_DATASET:
+    NAME: "kubric"
+    ROOT_DIR: "./Datasets/KubricFlow"
+  VAL_DATASET:
+    NAME: "kubric"
+    ROOT_DIR: "./Datasets/KubricFlow" 
+  NUM_WORKERS: 4
+  PIN_MEMORY: True
+  APPEND_VALID_MASK: False
+  SHUFFLE: True
+  AUGMENTATION:
+    # Spatial Augmentation Settings borrowed from AutoFlow: https://github.com/google-research/opticalflow-autoflow/blob/main/src/dataset_lib/augmentations/aug_params.py
+    USE: True
+    PARAMS:
+      TRAINING:
+        COLOR_AUG_PARAMS: {
+          "enabled": True,
+          "asymmetric_color_aug_prob": 0.2, 
+          "brightness": 0.4, 
+          "contrast": 0.4, 
+          "saturation": 0.4, 
+          "hue": 0.15915494309189535
+        }
+        ERASER_AUG_PARAMS: {
+          "enabled": True,
+          "aug_prob": 0.5,
+          "bounds": [50, 100]
+        }
+        NOISE_AUG_PARAMS: {
+          "enabled": False,
+          "aug_prob": 0.5,
+          "noise_std_range": 0.06 
+        }
+        FLIP_AUG_PARAMS: {
+          "enabled": True, 
+          "h_flip_prob": 0.5, 
+          "v_flip_prob": 0.1
+        }
+        SPATIAL_AUG_PARAMS: {
+          "enabled": False,
+          "aug_prob": 0.0, 
+          "stretch_prob": 0.0, 
+          "min_scale": 0, 
+          "max_scale": 0, 
+          "max_stretch": 0
+        }
+        ADVANCED_SPATIAL_AUG_PARAMS: {
+          "enabled": True,
+          "scale1": 0.3,
+          "scale2": 0.1,
+          "rotate": 0.4,
+          "translate": 0.4,
+          "stretch": 0.3,
+          "enable_out_of_boundary_crop": False
+        }
+      VALIDATION:
+        SPATIAL_AUG_PARAMS: {"enabled": False}
+        COLOR_AUG_PARAMS: {"enabled": False}
+        ERASER_AUG_PARAMS: {"enabled": False}
+        FLIP_AUG_PARAMS: {"enabled": False}
+        ADVANCED_SPATIAL_AUG_PARAMS : {"enabled": False}
+OPTIMIZER:
+  NAME: AdamW
+  LR: 0.0004
+  PARAMS:
+    weight_decay: 0.0001
+    betas: [0.9, 0.999]
+    eps: 1.e-08
+    amsgrad: False
+GRAD_CLIP: 
+  USE: True
+  VALUE: 1.0
+FREEZE_BATCH_NORM: False
+TARGET_SCALE_FACTOR: 1.0
+MIXED_PRECISION: False
+DEVICE: "0"
+DISTRIBUTED:
+  USE: False
+  WORLD_SIZE: 2
+  BACKEND: nccl
+  MASTER_ADDR: localhost
+  MASTER_PORT: "12355"
+EPOCHS: null
+NUM_STEPS: null
+RESUME_TRAINING:
+  CONSOLIDATED_CKPT: null
+  EPOCHS: null
+  START_EPOCH: null
\ No newline at end of file
diff --git a/configs/trainers/_base_/things_baseline.yaml b/configs/trainers/_base_/things_baseline.yaml
new file mode 100644
index 00000000..690726bd
--- /dev/null
+++ b/configs/trainers/_base_/things_baseline.yaml
@@ -0,0 +1,89 @@
+DATA:
+  TRAIN_DATASET:
+    NAME: "flyingthings3d"
+    ROOT_DIR: "./Datasets/SceneFlow/FlyingThings3D"
+  VAL_DATASET:
+    NAME: "flyingthings3d"
+    ROOT_DIR: "./Datasets/SceneFlow/FlyingThings3D"
+  NUM_WORKERS: 4
+  PIN_MEMORY: True
+  APPEND_VALID_MASK: False
+  SHUFFLE: True
+  AUGMENTATION:
+    # Augmentation Settings borrowed from RAFT
+    USE: True
+    PARAMS:
+      TRAINING:
+        COLOR_AUG_PARAMS: {
+          "enabled": True,
+          "asymmetric_color_aug_prob": 0.2, 
+          "brightness": 0.4, 
+          "contrast": 0.4, 
+          "saturation": 0.4, 
+          "hue": 0.15915494309189535
+        }
+        ERASER_AUG_PARAMS: {
+          "enabled": True,
+          "aug_prob": 0.5,
+          "bounds": [50, 100]
+        }
+        NOISE_AUG_PARAMS: {
+          "enabled": False,
+          "aug_prob": 0.5,
+          "noise_std_range": 0.06 
+        }
+        FLIP_AUG_PARAMS: {
+          "enabled": True, 
+          "h_flip_prob": 0.5, 
+          "v_flip_prob": 0.1
+        }
+        SPATIAL_AUG_PARAMS: {
+          "enabled": True,
+          "aug_prob": 0.8, 
+          "stretch_prob": 0.8, 
+          "min_scale": -0.4, 
+          "max_scale": 0.8, 
+          "max_stretch": 0.2, 
+        }
+        ADVANCED_SPATIAL_AUG_PARAMS: {
+          "enabled": False,
+          "scale1": 0.0,
+          "scale2": 0.0,
+          "stretch": 0.0,
+          "rotate": 0.0,
+          "translate": 0.0,
+          "enable_out_of_boundary_crop": False
+        }
+      VALIDATION:
+        SPATIAL_AUG_PARAMS: {"enabled": False}
+        COLOR_AUG_PARAMS: {"enabled": False}
+        ERASER_AUG_PARAMS: {"enabled": False}
+        FLIP_AUG_PARAMS: {"enabled": False}
+        ADVANCED_SPATIAL_AUG_PARAMS : {"enabled": False}
+OPTIMIZER:
+  NAME: AdamW
+  LR: 0.000125
+  PARAMS:
+    weight_decay: 0.0001
+    betas: [0.9, 0.999]
+    eps: 1.e-08
+    amsgrad: False
+GRAD_CLIP: 
+  USE: True
+  VALUE: 1.0
+FREEZE_BATCH_NORM: False
+TARGET_SCALE_FACTOR: 1.0
+MIXED_PRECISION: False
+DEVICE: "0"
+DISTRIBUTED:
+  USE: False
+  WORLD_SIZE: 2
+  BACKEND: nccl
+  MASTER_ADDR: localhost
+  MASTER_PORT: "12355"
+EPOCHS: null
+NUM_STEPS: null
+RESUME_TRAINING:
+  CONSOLIDATED_CKPT: null
+  EPOCHS: null
+  START_EPOCH: null
\ No newline at end of file
diff --git a/configs/trainers/base.yaml b/configs/trainers/base.yaml
deleted file mode 100644
index f99e7ca4..00000000
--- a/configs/trainers/base.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-DATA:
-  TRAIN_DATASET:
-    NAME: "flyingchairs"
-    ROOT_DIR: "../../../Datasets/FlyingChairs_release/data"
-  VAL_DATASET:
-    NAME: "flyingchairs"
-    ROOT_DIR: "../../../Datasets/FlyingChairs_release/data"
-  BATCH_SIZE: 16
-  NUM_WORKERS: 1
-  PIN_MEMORY: True
-  TRAIN_CROP_SIZE: [256, 256]
-  VAL_CROP_SIZE: [256, 256]
-  AUGMENTATION:
-    USE: True
-    PARAMS:
-      CROP_SIZE: [256, 256]
-      TRAINING:
-        SPATIAL_AUG_PARAMS: {"min_scale": -0.1, "max_scale": 1.0, "flip": True}
-      VALIDATION:
-        SPATIAL_AUG_PARAMS: {"aug_prob": 0.0}
-        COLOR_AUG_PARAMS: {"aug_prob": 0.0}
-        ERASER_AUG_PARAMS: {"aug_prob": 0.0}
-OPTIMIZER:
-  NAME: AdamW
-  LR: 0.0004
-  PARAMS:
-    weight_decay: 0.0001
-    eps: 1.e-08
-SCHEDULER:
-  USE: True
-  NAME: OneCycleLR
-  PARAMS:
-    max_lr: 0.0004
-    epochs: 100
-    steps_per_epoch: 2300
-    pct_start: 0.05
-    cycle_momentum: False
-    anneal_strategy: linear
-CRITERION:
-  CUSTOM: True
-  NAME: MultiScaleLoss
-  PARAMS: 
-    weights: [1, 0.5, 0.25, 0.125, 0.0625]
-GRAD_CLIP: 
-  USE: True
-  VALUE: 1.0
-TARGET_SCALE_FACTOR: 1
-APPEND_VALID_MASK: False
-MIXED_PRECISION: False
-DEVICE: "0"
-DISTRIBUTED:
-  USE: False
-  WORLD_SIZE: 2
-  BACKEND: nccl
-  MASTER_ADDR: localhost
-  MASTER_PORT: "12355"
-LOG_DIR: "./logs"
-LOG_ITERATIONS_INTERVAL: 100
-CKPT_DIR: "./ckpts"
-CKPT_INTERVAL: 1
-VALIDATE_INTERVAL: 1
-VALIDATE_ON: metric
-EPOCHS: 100
-NUM_STEPS: null
-RESUME_TRAINING:
-  CONSOLIDATED_CKPT: null
-  EPOCHS: 100
-  START_EPOCH: null
\ No newline at end of file
diff --git a/configs/trainers/dicl_default.yaml b/configs/trainers/dicl_default.yaml
deleted file mode 100644
index df1b5e76..00000000
--- a/configs/trainers/dicl_default.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-_BASE_: "./base.yaml"
-CRITERION:
-  NAME: "MultiScaleLoss"
-  PARAMS: null
-      
\ No newline at end of file
diff --git a/configs/trainers/flownetc/flownetc_chairs_baseline.yaml b/configs/trainers/flownetc/flownetc_chairs_baseline.yaml
new file mode 100644
index 00000000..efa87fa3
--- /dev/null
+++ b/configs/trainers/flownetc/flownetc_chairs_baseline.yaml
@@ -0,0 +1,41 @@
+_BASE_: "../_base_/chairs_baseline.yaml"
+TARGET_SCALE_FACTOR: 20.0
+DATA:
+  BATCH_SIZE: 2 # Effective Batch Size = 2 x 4 GPUs = 8
+  TRAIN_CROP_SIZE: [384, 448]
+  VAL_CROP_SIZE: [384, 448]
+  APPEND_VALID_MASK: False
+  NORM_PARAMS: {"use": True, "mean":[0.0, 0.0, 0.0], "std":[255.0, 255.0, 255.0]}
+SCHEDULER:
+  USE: True
+  NAME: OneCycleLR
+  PARAMS:
+    max_lr: 0.0004
+    total_steps: 1200100
+    pct_start: 0.05
+    cycle_momentum: False
+    anneal_strategy: linear
+CRITERION:
+  CUSTOM: True
+  NAME: MultiScaleLoss
+  PARAMS:
+    norm: "l2" 
+    weights: [0.32, 0.08, 0.02, 0.01, 0.005]
+    average: "sum"
+    resize_flow: "downsample"
+DEVICE: "all"
+DISTRIBUTED:
+  USE: True
+  WORLD_SIZE: 4
+  BACKEND: nccl
+  MASTER_ADDR: localhost
+  MASTER_PORT: "12355"
+  SYNC_BATCH_NORM: True
+EPOCHS: null
+NUM_STEPS: 1200100
+LOG_DIR: "./logs"
+CKPT_DIR: "./ckpts"
+LOG_ITERATIONS_INTERVAL: 100
+CKPT_INTERVAL: 100000
+VALIDATE_INTERVAL: 10000
+VALIDATE_ON: metric
\ No newline at end of file
diff --git a/configs/trainers/flownetc/flownetc_kubric_improved_aug.yaml b/configs/trainers/flownetc/flownetc_kubric_improved_aug.yaml
new file mode 100644
index 00000000..32817e1f
--- /dev/null
+++ b/configs/trainers/flownetc/flownetc_kubric_improved_aug.yaml
@@ -0,0 +1,41 @@
+_BASE_: "../_base_/kubric_improved_aug.yaml"
+TARGET_SCALE_FACTOR: 20.0
+DATA:
+  BATCH_SIZE: 2 # Effective Batch Size = 2 x 4 GPUs = 8
+  TRAIN_CROP_SIZE: [384, 448]
+  VAL_CROP_SIZE: [384, 448]
+  APPEND_VALID_MASK: False
+  NORM_PARAMS: {"use": True, "mean":[0.0, 0.0, 0.0], "std":[255.0, 255.0, 255.0]}
+SCHEDULER:
+  USE: True
+  NAME: OneCycleLR
+  PARAMS:
+    max_lr: 0.0004
+    total_steps: 1200100
+    pct_start: 0.05
+    cycle_momentum: False
+    anneal_strategy: linear
+CRITERION:
+  CUSTOM: True
+  NAME: MultiScaleLoss
+  PARAMS:
+    norm: "l2" 
+    weights: [0.32, 0.08, 0.02, 0.01, 0.005]
+    average: "sum"
+    resize_flow: "downsample"
+DEVICE: "all"
+DISTRIBUTED:
+  USE: True
+  WORLD_SIZE: 4
+  BACKEND: nccl
+  MASTER_ADDR: localhost
+  MASTER_PORT: "12355"
+  SYNC_BATCH_NORM: True
+EPOCHS: null
+NUM_STEPS: 1200100
+LOG_DIR: "./logs"
+CKPT_DIR: "./ckpts"
+LOG_ITERATIONS_INTERVAL: 100
+CKPT_INTERVAL: 100000
+VALIDATE_INTERVAL: 10000
+VALIDATE_ON: metric
\ No newline at end of file
diff --git a/configs/trainers/flownetc/flownetc_things_baseline.yaml b/configs/trainers/flownetc/flownetc_things_baseline.yaml
new file mode 100644
index 00000000..f4cbe016
--- /dev/null
+++ b/configs/trainers/flownetc/flownetc_things_baseline.yaml
@@ -0,0 +1,43 @@
+_BASE_: "../_base_/things_baseline.yaml"
+TARGET_SCALE_FACTOR: 20.0
+DATA:
+  BATCH_SIZE: 2 # Effective Batch Size = 2 x 2 GPUs = 4
+  TRAIN_CROP_SIZE: [384, 768]
+  VAL_CROP_SIZE: [384, 768]
+  APPEND_VALID_MASK: False
+  NORM_PARAMS: {"use": True, "mean":[0.0, 0.0, 0.0], "std":[255.0, 255.0, 255.0]}
+SCHEDULER:
+  USE: True
+  NAME: OneCycleLR
+  PARAMS:
+    max_lr: 0.000125
+    total_steps: 1200100
+    pct_start: 0.05
+    cycle_momentum: False
+    anneal_strategy: linear
+CRITERION:
+  CUSTOM: True
+  NAME: MultiScaleLoss
+  PARAMS:
+    norm: "l2" 
+    weights: [0.32, 0.08, 0.02, 0.01, 0.005]
+    average: "sum"
+    resize_flow: "downsample"
+    use_valid_range: True
+    valid_range: [[1000,1000],[1000,1000],[1000,1000],[1000,1000],[1000,1000]]
+DEVICE: "all"
+DISTRIBUTED:
+  USE: True
+  WORLD_SIZE: 2
+  BACKEND: nccl
+  MASTER_ADDR: localhost
+  MASTER_PORT: "12355"
+  SYNC_BATCH_NORM: True
+EPOCHS: null
+NUM_STEPS: 1200100
+LOG_DIR: "./logs"
+CKPT_DIR: "./ckpts"
+LOG_ITERATIONS_INTERVAL: 100
+CKPT_INTERVAL: 100000
+VALIDATE_INTERVAL: 10000
+VALIDATE_ON: metric
\ No newline at end of file
diff --git a/configs/trainers/pwcnet/pwcnet_chairs_baseline.yaml b/configs/trainers/pwcnet/pwcnet_chairs_baseline.yaml
new file mode 100644
index 00000000..1333a6d2
--- /dev/null
+++ b/configs/trainers/pwcnet/pwcnet_chairs_baseline.yaml
@@ -0,0 +1,33 @@
+_BASE_: "../_base_/chairs_baseline.yaml"
+TARGET_SCALE_FACTOR: 20.0
+DATA:
+  BATCH_SIZE: 8
+  TRAIN_CROP_SIZE: [384, 448]
+  VAL_CROP_SIZE: [384, 448]
+  APPEND_VALID_MASK: False
+  NORM_PARAMS: {"use": True, "mean":[0.0, 0.0, 0.0], "std":[255.0, 255.0, 255.0]}
+SCHEDULER:
+  USE: True
+  NAME: OneCycleLR
+  PARAMS:
+    max_lr: 0.0004
+    total_steps: 1200100
+    pct_start: 0.05
+    cycle_momentum: False
+    anneal_strategy: linear
+CRITERION:
+  CUSTOM: True
+  NAME: MultiScaleLoss
+  PARAMS:
+    norm: "l2" 
+    weights: [0.32, 0.08, 0.02, 0.01, 0.005]
+    average: "sum"
+    resize_flow: "downsample"
+EPOCHS: null
+NUM_STEPS: 1200100
+LOG_DIR: "./logs"
+CKPT_DIR: "./ckpts"
+LOG_ITERATIONS_INTERVAL: 100
+CKPT_INTERVAL: 100000
+VALIDATE_INTERVAL: 10000
+VALIDATE_ON: metric
\ No newline at end of file
diff --git a/configs/trainers/pwcnet/pwcnet_kubric_improved_aug.yaml b/configs/trainers/pwcnet/pwcnet_kubric_improved_aug.yaml
new file mode 100644
index 00000000..f713504c
--- /dev/null
+++ b/configs/trainers/pwcnet/pwcnet_kubric_improved_aug.yaml
@@ -0,0 +1,33 @@
+_BASE_: "../_base_/kubric_improved_aug.yaml"
+TARGET_SCALE_FACTOR: 20.0
+DATA:
+  BATCH_SIZE: 8
+  TRAIN_CROP_SIZE: [384, 448]
+  VAL_CROP_SIZE: [384, 448]
+  APPEND_VALID_MASK: False
+  NORM_PARAMS: {"use": True, "mean":[0.0, 0.0, 0.0], "std":[255.0, 255.0, 255.0]}
+SCHEDULER:
+  USE: True
+  NAME: OneCycleLR
+  PARAMS:
+    max_lr: 0.0004
+    total_steps: 1200100
+    pct_start: 0.05
+    cycle_momentum: False
+    anneal_strategy: linear
+CRITERION:
+  CUSTOM: True
+  NAME: MultiScaleLoss
+  PARAMS:
+    norm: "l2" 
+    weights: [0.32, 0.08, 0.02, 0.01, 0.005]
+    average: "sum"
+    resize_flow: "downsample"
+EPOCHS: null
+NUM_STEPS: 1200100
+LOG_DIR: "./logs"
+CKPT_DIR: "./ckpts"
+LOG_ITERATIONS_INTERVAL: 100
+CKPT_INTERVAL: 100000
+VALIDATE_INTERVAL: 10000
+VALIDATE_ON: metric
\ No newline at end of file
diff --git a/configs/trainers/pwcnet/pwcnet_things_baseline.yaml b/configs/trainers/pwcnet/pwcnet_things_baseline.yaml
new file mode 100644
index 00000000..6a480b9e
--- /dev/null
+++ b/configs/trainers/pwcnet/pwcnet_things_baseline.yaml
@@ -0,0 +1,35 @@
+_BASE_: "../_base_/things_baseline.yaml"
+TARGET_SCALE_FACTOR: 20.0
+DATA:
+  BATCH_SIZE: 4
+  TRAIN_CROP_SIZE: [384, 768]
+  VAL_CROP_SIZE: [384, 768]
+  APPEND_VALID_MASK: False
+  NORM_PARAMS: {"use": True, "mean":[0.0, 0.0, 0.0], "std":[255.0, 255.0, 255.0]}
+SCHEDULER:
+  USE: True
+  NAME: OneCycleLR
+  PARAMS:
+    max_lr: 0.000125
+    total_steps: 1200100
+    pct_start: 0.05
+    cycle_momentum: False
+    anneal_strategy: linear
+CRITERION:
+  CUSTOM: True
+  NAME: MultiScaleLoss
+  PARAMS:
+    norm: "l2" 
+    weights: [0.32, 0.08, 0.02, 0.01, 0.005]
+    average: "sum"
+    resize_flow: "downsample"
+    use_valid_range: True
+    valid_range: [[1000,1000],[1000,1000],[1000,1000],[1000,1000],[1000,1000]]
+EPOCHS: null
+NUM_STEPS: 1200100
+LOG_DIR: "./logs"
+CKPT_DIR: "./ckpts"
+LOG_ITERATIONS_INTERVAL: 100
+CKPT_INTERVAL: 100000
+VALIDATE_INTERVAL: 10000
+VALIDATE_ON: metric
\ No newline at end of file
diff --git a/configs/trainers/raft/raft_chairs_baseline.yaml b/configs/trainers/raft/raft_chairs_baseline.yaml
new file mode 100644
index 00000000..7f1a192a
--- /dev/null
+++ b/configs/trainers/raft/raft_chairs_baseline.yaml
@@ -0,0 +1,30 @@
+_BASE_: "../_base_/chairs_baseline.yaml"
+DATA:
+  BATCH_SIZE: 10
+  TRAIN_CROP_SIZE: [368, 496]
+  VAL_CROP_SIZE: [368, 496]
+  APPEND_VALID_MASK: True
+  NORM_PARAMS: {"use": True, "mean":[127.5, 127.5, 127.5], "std":[127.5, 127.5, 127.5]}
+SCHEDULER:
+  USE: True
+  NAME: OneCycleLR
+  PARAMS:
+    max_lr: 0.0004
+    total_steps: 100100
+    pct_start: 0.05
+    cycle_momentum: False
+    anneal_strategy: linear
+CRITERION:
+  CUSTOM: True
+  NAME: SequenceLoss
+  PARAMS: 
+    gamma: 0.8
+    max_flow: 400.0
+EPOCHS: null
+NUM_STEPS: 100100
+LOG_DIR: "./logs"
+CKPT_DIR: "./ckpts"
+LOG_ITERATIONS_INTERVAL: 100
+CKPT_INTERVAL: 20000
+VALIDATE_INTERVAL: 1000
+VALIDATE_ON: metric
\ No newline at end of file
diff --git a/configs/trainers/raft/raft_kubric_improved_aug.yaml b/configs/trainers/raft/raft_kubric_improved_aug.yaml
new file mode 100644
index 00000000..7fed4e24
--- /dev/null
+++ b/configs/trainers/raft/raft_kubric_improved_aug.yaml
@@ -0,0 +1,30 @@
+_BASE_: "../_base_/kubric_improved_aug.yaml"
+DATA:
+  BATCH_SIZE: 10
+  TRAIN_CROP_SIZE: [368, 496]
+  VAL_CROP_SIZE: [368, 496]
+  APPEND_VALID_MASK: True
+  NORM_PARAMS: {"use": True, "mean":[127.5, 127.5, 127.5], "std":[127.5, 127.5, 127.5]}
+SCHEDULER:
+  USE: True
+  NAME: OneCycleLR
+  PARAMS:
+    max_lr: 0.0004
+    total_steps: 100100
+    pct_start: 0.05
+    cycle_momentum: False
+    anneal_strategy: linear
+CRITERION:
+  CUSTOM: True
+  NAME: SequenceLoss
+  PARAMS: 
+    gamma: 0.8
+    max_flow: 400.0
+EPOCHS: null
+NUM_STEPS: 100100
+LOG_DIR: "./logs"
+CKPT_DIR: "./ckpts"
+LOG_ITERATIONS_INTERVAL: 100
+CKPT_INTERVAL: 20000
+VALIDATE_INTERVAL: 1000
+VALIDATE_ON: metric
\ No newline at end of file
diff --git a/configs/trainers/raft/raft_things_baseline.yaml b/configs/trainers/raft/raft_things_baseline.yaml
new file mode 100644
index 00000000..b0e77f9a
--- /dev/null
+++ b/configs/trainers/raft/raft_things_baseline.yaml
@@ -0,0 +1,31 @@
+_BASE_: "../_base_/things_baseline.yaml"
+FREEZE_BATCH_NORM: True
+DATA:
+  BATCH_SIZE: 6
+  TRAIN_CROP_SIZE: [400, 720]
+  VAL_CROP_SIZE: [400, 720]
+  APPEND_VALID_MASK: True
+  NORM_PARAMS: {"use": True, "mean":[127.5, 127.5, 127.5], "std":[127.5, 127.5, 127.5]}
+SCHEDULER:
+  USE: True
+  NAME: OneCycleLR
+  PARAMS:
+    max_lr: 0.000125
+    total_steps: 100100
+    pct_start: 0.05
+    cycle_momentum: False
+    anneal_strategy: linear
+CRITERION:
+  CUSTOM: True
+  NAME: SequenceLoss
+  PARAMS: 
+    gamma: 0.8
+    max_flow: 400.0
+EPOCHS: null
+NUM_STEPS: 100100
+LOG_DIR: "./logs"
+CKPT_DIR: "./ckpts"
+LOG_ITERATIONS_INTERVAL: 100
+CKPT_INTERVAL: 20000
+VALIDATE_INTERVAL: 1000
+VALIDATE_ON: metric
\ No newline at end of file
diff --git a/configs/trainers/raft_default.yaml b/configs/trainers/raft_default.yaml
deleted file mode 100644
index e41951f4..00000000
--- a/configs/trainers/raft_default.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-_BASE_: "./base.yaml"
-CRITERION:
-  NAME: SequenceLoss
-  PARAMS: null
-OPTIMIZER:
-  NAME: AdamW
-  LR: 0.0004
-  PARAMS:
-    weight_decay: 0.0001
-    eps: 1.e-08
\ No newline at end of file
diff --git a/ezflow/data/dataloader/dataloader_creator.py b/ezflow/data/dataloader/dataloader_creator.py
index 14162567..3234b317 100644
--- a/ezflow/data/dataloader/dataloader_creator.py
+++ b/ezflow/data/dataloader/dataloader_creator.py
@@ -359,6 +359,33 @@ def add_AutoFlow(self, root_dir, augment=False, **kwargs):
             )
         )
 
+    def add_Kubric(self, root_dir, split="training", augment=False, **kwargs):
+        """
+        Adds the Kubric dataset to the DataloaderCreator object.
+
+        Parameters
+        ----------
+        root_dir : str
+            path of the root directory for the Monkaa dataset in SceneFlow
+        augment : bool, default : True
+            If True, applies data augmentation
+        **kwargs
+            Arbitrary keyword arguments for augmentation
+            specifying crop_size and the probability of
+            color, eraser and spatial transformation
+        """
+        self.dataset_list.append(
+            Kubric(
+                root_dir,
+                split=split,
+                init_seed=self.init_seed,
+                is_prediction=self.is_prediction,
+                append_valid_mask=self.append_valid_mask,
+                augment=augment,
+                **kwargs,
+            )
+        )
+
     def get_dataloader(self, rank=0):
         """
         Gets the Dataloader for the added datasets.
diff --git a/ezflow/data/dataset/__init__.py b/ezflow/data/dataset/__init__.py
index 98b0311d..95c809d8 100644
--- a/ezflow/data/dataset/__init__.py
+++ b/ezflow/data/dataset/__init__.py
@@ -5,5 +5,6 @@
 from .flying_things3d import FlyingThings3D, FlyingThings3DSubset
 from .hd1k import HD1K
 from .kitti import Kitti
+from .kubric import Kubric
 from .monkaa import Monkaa
 from .mpi_sintel import MPISintel
diff --git a/ezflow/data/dataset/autoflow.py b/ezflow/data/dataset/autoflow.py
index 306d4b85..003c912f 100644
--- a/ezflow/data/dataset/autoflow.py
+++ b/ezflow/data/dataset/autoflow.py
@@ -29,6 +29,8 @@ class AutoFlow(BaseDataset):
         If True, applies data augmentation
     aug_params : :obj:`dict`, optional
         The parameters for data augmentation
+    norm_params : :obj:`dict`, optional
+        The parameters for normalization
     """
 
     def __init__(
@@ -42,11 +44,12 @@ def __init__(
         crop_type="center",
         augment=True,
         aug_params={
-            "color_aug_params": {"aug_prob": 0.2},
-            "eraser_aug_params": {"aug_prob": 0.5},
-            "spatial_aug_params": {"aug_prob": 0.8},
-            "translate_params": {"aug_prob": 0.8},
-            "rotate_params": {"aug_prob": 0.8},
+            "eraser_aug_params": {"enabled": False},
+            "noise_aug_params": {"enabled": False},
+            "flip_aug_params": {"enabled": False},
+            "color_aug_params": {"enabled": False},
+            "spatial_aug_params": {"enabled": False},
+            "advanced_spatial_aug_params": {"enabled": False},
         },
         norm_params={"use": False},
     ):
diff --git a/ezflow/data/dataset/base_dataset.py b/ezflow/data/dataset/base_dataset.py
index 2b19997a..4782634b 100644
--- a/ezflow/data/dataset/base_dataset.py
+++ b/ezflow/data/dataset/base_dataset.py
@@ -31,7 +31,8 @@ class BaseDataset(data.Dataset):
         If True, applies data augmentation
     aug_params : :obj:`dict`
         The parameters for data augmentation
-
+    norm_params : :obj:`dict`, optional
+        The parameters for normalization
     """
 
     def __init__(
@@ -44,11 +45,12 @@ def __init__(
         crop_type="center",
         augment=True,
         aug_params={
-            "color_aug_params": {"aug_prob": 0.2},
-            "eraser_aug_params": {"aug_prob": 0.5},
-            "spatial_aug_params": {"aug_prob": 0.8},
-            "translate_params": {"aug_prob": 0.8},
-            "rotate_params": {"aug_prob": 0.8},
+            "eraser_aug_params": {"enabled": False},
+            "noise_aug_params": {"enabled": False},
+            "flip_aug_params": {"enabled": False},
+            "color_aug_params": {"enabled": False},
+            "spatial_aug_params": {"enabled": False},
+            "advanced_spatial_aug_params": {"enabled": False},
         },
         sparse_transform=False,
         norm_params={"use": False},
@@ -100,9 +102,7 @@ def __getitem__(self, index):
 
         img1 = read_image(self.image_list[index][0])
         img2 = read_image(self.image_list[index][1])
-        flow, valid = read_flow(self.flow_list[index])
 
-        flow = np.array(flow).astype(np.float32)
         img1 = np.array(img1).astype(np.uint8)
         img2 = np.array(img2).astype(np.uint8)
 
@@ -122,6 +122,9 @@ def __getitem__(self, index):
 
             return img1, img2
 
+        flow, valid = read_flow(self.flow_list[index])
+        flow = np.array(flow).astype(np.float32)
+
         if self.augment is True and self.augmentor is not None:
             img1, img2, flow, valid = self.augmentor(img1, img2, flow, valid)
 
diff --git a/ezflow/data/dataset/driving.py b/ezflow/data/dataset/driving.py
index 4899e345..3f47ae0f 100644
--- a/ezflow/data/dataset/driving.py
+++ b/ezflow/data/dataset/driving.py
@@ -29,6 +29,8 @@ class Driving(BaseDataset):
         If True, applies data augmentation
     aug_params : :obj:`dict`, optional
         The parameters for data augmentation
+    norm_params : :obj:`dict`, optional
+        The parameters for normalization
     """
 
     def __init__(
diff --git a/ezflow/data/dataset/flying_chairs.py b/ezflow/data/dataset/flying_chairs.py
index ae350a0b..955442ad 100644
--- a/ezflow/data/dataset/flying_chairs.py
+++ b/ezflow/data/dataset/flying_chairs.py
@@ -33,7 +33,8 @@ class FlyingChairs(BaseDataset):
         If True, applies data augmentation
     aug_params : :obj:`dict`, optional
         The parameters for data augmentation
-
+    norm_params : :obj:`dict`, optional
+        The parameters for normalization
     """
 
     def __init__(
@@ -48,11 +49,12 @@ def __init__(
         crop_type="center",
         augment=True,
         aug_params={
-            "color_aug_params": {"aug_prob": 0.2},
-            "eraser_aug_params": {"aug_prob": 0.5},
-            "spatial_aug_params": {"aug_prob": 0.8},
-            "translate_params": {"aug_prob": 0.8},
-            "rotate_params": {"aug_prob": 0.8},
+            "eraser_aug_params": {"enabled": False},
+            "noise_aug_params": {"enabled": False},
+            "flip_aug_params": {"enabled": False},
+            "color_aug_params": {"enabled": False},
+            "spatial_aug_params": {"enabled": False},
+            "advanced_spatial_aug_params": {"enabled": False},
         },
         norm_params={"use": False},
     ):
diff --git a/ezflow/data/dataset/flying_things3d.py b/ezflow/data/dataset/flying_things3d.py
index d77dd0b1..06eaafdd 100644
--- a/ezflow/data/dataset/flying_things3d.py
+++ b/ezflow/data/dataset/flying_things3d.py
@@ -33,6 +33,8 @@ class FlyingThings3D(BaseDataset):
         If True, applies data augmentation
     aug_params : :obj:`dict`, optional
         The parameters for data augmentation
+    norm_params : :obj:`dict`, optional
+        The parameters for normalization
     """
 
     def __init__(
@@ -48,11 +50,12 @@ def __init__(
         crop_type="center",
         augment=True,
         aug_params={
-            "color_aug_params": {"aug_prob": 0.2},
-            "eraser_aug_params": {"aug_prob": 0.5},
-            "spatial_aug_params": {"aug_prob": 0.8},
-            "translate_params": {"aug_prob": 0.8},
-            "rotate_params": {"aug_prob": 0.8},
+            "eraser_aug_params": {"enabled": False},
+            "noise_aug_params": {"enabled": False},
+            "flip_aug_params": {"enabled": False},
+            "color_aug_params": {"enabled": False},
+            "spatial_aug_params": {"enabled": False},
+            "advanced_spatial_aug_params": {"enabled": False},
         },
         norm_params={"use": False},
     ):
diff --git a/ezflow/data/dataset/hd1k.py b/ezflow/data/dataset/hd1k.py
index 4c48462e..5638bb1c 100644
--- a/ezflow/data/dataset/hd1k.py
+++ b/ezflow/data/dataset/hd1k.py
@@ -29,6 +29,8 @@ class HD1K(BaseDataset):
         If True, applies data augmentation
     aug_params : :obj:`dict`, optional
         The parameters for data augmentation
+    norm_params : :obj:`dict`, optional
+        The parameters for normalization
     """
 
     def __init__(
@@ -42,11 +44,12 @@ def __init__(
         crop_type="center",
         augment=True,
         aug_params={
-            "color_aug_params": {"aug_prob": 0.2},
-            "eraser_aug_params": {"aug_prob": 0.5},
-            "spatial_aug_params": {"aug_prob": 0.8},
-            "translate_params": {"aug_prob": 0.8},
-            "rotate_params": {"aug_prob": 0.8},
+            "eraser_aug_params": {"enabled": False},
+            "noise_aug_params": {"enabled": False},
+            "flip_aug_params": {"enabled": False},
+            "color_aug_params": {"enabled": False},
+            "spatial_aug_params": {"enabled": False},
+            "advanced_spatial_aug_params": {"enabled": False},
         },
         norm_params={"use": False},
     ):
diff --git a/ezflow/data/dataset/kitti.py b/ezflow/data/dataset/kitti.py
index 060f95e3..42afdbba 100644
--- a/ezflow/data/dataset/kitti.py
+++ b/ezflow/data/dataset/kitti.py
@@ -31,7 +31,8 @@ class Kitti(BaseDataset):
         If True, applies data augmentation
     aug_params : :obj:`dict`, optional
         The parameters for data augmentation
-
+    norm_params : :obj:`dict`, optional
+        The parameters for normalization
     """
 
     def __init__(
@@ -46,11 +47,12 @@ def __init__(
         crop_type="center",
         augment=True,
         aug_params={
-            "color_aug_params": {"aug_prob": 0.2},
-            "eraser_aug_params": {"aug_prob": 0.5},
-            "spatial_aug_params": {"aug_prob": 0.8},
-            "translate_params": {"aug_prob": 0.8},
-            "rotate_params": {"aug_prob": 0.8},
+            "eraser_aug_params": {"enabled": False},
+            "noise_aug_params": {"enabled": False},
+            "flip_aug_params": {"enabled": False},
+            "color_aug_params": {"enabled": False},
+            "spatial_aug_params": {"enabled": False},
+            "advanced_spatial_aug_params": {"enabled": False},
         },
         norm_params={"use": False},
     ):
diff --git a/ezflow/data/dataset/kubric.py b/ezflow/data/dataset/kubric.py
new file mode 100644
index 00000000..f77ecfde
--- /dev/null
+++ b/ezflow/data/dataset/kubric.py
@@ -0,0 +1,224 @@
+import os
+import os.path as osp
+import random
+from glob import glob
+
+import numpy as np
+import torch
+import torch.utils.data as data
+
+from ...functional import FlowAugmentor, Normalize, crop
+from ...utils import read_flow, read_image
+from .base_dataset import BaseDataset
+
+
+class Kubric(BaseDataset):
+    """
+    Dataset Class for preparing the Kubric 'movi-f' split of
+    optical flow synthetic dataset  for training and validation.
+    https://arxiv.org/abs/2203.03570
+    https://github.com/google-research/kubric/tree/main/challenges/optical_flow
+
+
+    Note that in order to use this dataset class the Kubric Dataset
+    must be in the Sintel directory structure. Please follow the script
+    provided in the repository mentioned below to convert .tfrecords to
+    images and flow fields arranged in the Sintel Directory structure.
+    https://github.com/prajnan93/kubric-flow
+
+    The tfrecords conversion is not provided with the ezflow package
+    as it requires tensorflow installation.
+
+
+    Parameters
+    ----------
+    root_dir : str
+        path of the root directory for the MPI Sintel datasets
+    split : str, default : "training"
+        specify the training or validation split
+    swap_column_to_row : bool, default : True
+        If True, swaps column major to row major of the flow map.
+        The optical flow fields were rendered in column major in the earlier versions.
+        Set this parameter to False if newer versions are available in row major.
+        More info in GitHub issue:https://github.com/google-research/kubric/issues/152
+    use_backward_flow : bool, default : False
+        returns backward optical flow field
+    is_prediction : bool, default : False
+        If True, only image data are loaded for prediction otherwise both images and flow data are loaded
+    init_seed : bool, default : False
+        If True, sets random seed to worker
+    append_valid_mask : bool, default :  False
+        If True, appends the valid flow mask to the original flow mask at dim=0
+    crop: bool, default : True
+        Whether to perform cropping
+    crop_size : :obj:`tuple` of :obj:`int`
+        The size of the image crop
+    crop_type : :obj:`str`, default : 'center'
+        The type of croppping to be performed, one of "center", "random"
+    augment : bool, default : True
+        If True, applies data augmentation
+    aug_params : :obj:`dict`, optional
+        The parameters for data augmentation
+    norm_params : :obj:`dict`, optional
+        The parameters for normalization
+    """
+
+    def __init__(
+        self,
+        root_dir,
+        split="training",
+        swap_column_to_row=True,
+        use_backward_flow=False,
+        is_prediction=False,
+        init_seed=False,
+        append_valid_mask=False,
+        crop=False,
+        crop_size=(256, 256),
+        crop_type="center",
+        augment=True,
+        aug_params={
+            "eraser_aug_params": {"enabled": False},
+            "noise_aug_params": {"enabled": False},
+            "flip_aug_params": {"enabled": False},
+            "color_aug_params": {"enabled": False},
+            "spatial_aug_params": {"enabled": False},
+            "advanced_spatial_aug_params": {"enabled": False},
+        },
+        norm_params={"use": False},
+    ):
+        super(Kubric, self).__init__(
+            init_seed=init_seed,
+            is_prediction=is_prediction,
+            append_valid_mask=append_valid_mask,
+            crop=crop,
+            crop_size=crop_size,
+            crop_type=crop_type,
+            augment=augment,
+            aug_params=aug_params,
+            sparse_transform=False,
+            norm_params=norm_params,
+        )
+
+        assert (
+            split.lower() == "training" or split.lower() == "validation"
+        ), "Incorrect split values. Accepted split values: training, validation"
+
+        self.is_prediction = is_prediction
+        self.append_valid_mask = append_valid_mask
+        self.swap = swap_column_to_row
+
+        if augment:
+            self.augmentor = FlowAugmentor(crop_size=crop_size, **aug_params)
+
+        split = split.lower()
+
+        image_root = osp.join(root_dir, split, "images")
+
+        if use_backward_flow:
+            flow_root = osp.join(root_dir, split, "backward_flow")
+        else:
+            flow_root = osp.join(root_dir, split, "forward_flow")
+
+        for scene in os.listdir(image_root):
+            image_list = sorted(glob(osp.join(image_root, scene, "*.png")))
+            for i in range(len(image_list) - 1):
+                self.image_list += [[image_list[i], image_list[i + 1]]]
+
+            if not self.is_prediction:
+                self.flow_list += sorted(glob(osp.join(flow_root, scene, "*.flo")))
+
+    def __getitem__(self, index):
+        """
+        Returns the corresponding images and the flow between them.
+
+        Parameters
+        ----------
+        index : int
+            specify the index location for access to Dataset item
+
+        Returns
+        -------
+        tuple
+            A tuple consisting of ((img1, img2), flow)
+
+            img1 and img2 of shape 3 x H x W.
+            flow of shape 2 x H x W if append_valid_mask is False.
+            flow of shape 3 x H x W if append_valid_mask is True.
+        """
+
+        if not self.init_seed:
+            worker_info = torch.utils.data.get_worker_info()
+            if worker_info is not None:
+                torch.manual_seed(worker_info.id)
+                np.random.seed(worker_info.id)
+                random.seed(worker_info.id)
+                self.init_seed = True
+
+        index = index % len(self.image_list)
+
+        img1 = read_image(self.image_list[index][0])
+        img2 = read_image(self.image_list[index][1])
+        flow, valid = read_flow(self.flow_list[index])
+
+        flow = np.array(flow).astype(np.float32)
+        img1 = np.array(img1).astype(np.uint8)
+        img2 = np.array(img2).astype(np.uint8)
+
+        if self.swap:
+            flow_temp = np.zeros_like(flow)
+
+            # Swap column major to row_major
+
+            flow_temp[..., 0] = flow[..., 1]
+            flow_temp[..., 1] = flow[..., 0]
+
+            del flow
+            flow = flow_temp
+
+        if len(img1.shape) == 2:  # grayscale images
+            img1 = np.tile(img1[..., None], (1, 1, 3))
+            img2 = np.tile(img2[..., None], (1, 1, 3))
+        else:
+            img1 = img1[..., :3]
+            img2 = img2[..., :3]
+
+        if self.is_prediction:
+
+            img1 = torch.from_numpy(img1).permute(2, 0, 1).float()
+            img2 = torch.from_numpy(img2).permute(2, 0, 1).float()
+
+            img1, img2 = self.normalize(img1, img2)
+
+            return img1, img2
+
+        if self.augment is True and self.augmentor is not None:
+            img1, img2, flow, valid = self.augmentor(img1, img2, flow, valid)
+
+        if self.crop is True:
+            img1, img2, flow, valid = crop(
+                img1,
+                img2,
+                flow,
+                valid=valid,
+                crop_size=self.crop_size,
+                crop_type=self.crop_type,
+                sparse_transform=self.sparse_transform,
+            )
+
+        img1 = torch.from_numpy(img1).permute(2, 0, 1).float()
+        img2 = torch.from_numpy(img2).permute(2, 0, 1).float()
+        flow = torch.from_numpy(flow).permute(2, 0, 1).float()
+
+        img1, img2 = self.normalize(img1, img2)
+
+        if self.append_valid_mask:
+            if valid is not None:
+                valid = torch.from_numpy(valid)
+            else:
+                valid = (flow[0].abs() < 1000) & (flow[1].abs() < 1000)
+
+            valid = valid.float()
+            valid = torch.unsqueeze(valid, dim=0)
+            flow = torch.cat([flow, valid], dim=0)
+
+        return (img1, img2), flow
diff --git a/ezflow/data/dataset/monkaa.py b/ezflow/data/dataset/monkaa.py
index 5b93e2c9..c3cda9c5 100644
--- a/ezflow/data/dataset/monkaa.py
+++ b/ezflow/data/dataset/monkaa.py
@@ -29,6 +29,8 @@ class Monkaa(BaseDataset):
         If True, applies data augmentation
     aug_params : :obj:`dict`, optional
         The parameters for data augmentation
+    norm_params : :obj:`dict`, optional
+        The parameters for normalization
     """
 
     def __init__(
@@ -42,11 +44,12 @@ def __init__(
         crop_type="center",
         augment=True,
         aug_params={
-            "color_aug_params": {"aug_prob": 0.2},
-            "eraser_aug_params": {"aug_prob": 0.5},
-            "spatial_aug_params": {"aug_prob": 0.8},
-            "translate_params": {"aug_prob": 0.8},
-            "rotate_params": {"aug_prob": 0.8},
+            "eraser_aug_params": {"enabled": False},
+            "noise_aug_params": {"enabled": False},
+            "flip_aug_params": {"enabled": False},
+            "color_aug_params": {"enabled": False},
+            "spatial_aug_params": {"enabled": False},
+            "advanced_spatial_aug_params": {"enabled": False},
         },
         norm_params={"use": False},
     ):
diff --git a/ezflow/data/dataset/mpi_sintel.py b/ezflow/data/dataset/mpi_sintel.py
index dce09c8b..020857d4 100644
--- a/ezflow/data/dataset/mpi_sintel.py
+++ b/ezflow/data/dataset/mpi_sintel.py
@@ -34,6 +34,8 @@ class MPISintel(BaseDataset):
         If True, applies data augmentation
     aug_params : :obj:`dict`, optional
         The parameters for data augmentation
+    norm_params : :obj:`dict`, optional
+        The parameters for normalization
     """
 
     def __init__(
@@ -49,11 +51,12 @@ def __init__(
         crop_type="center",
         augment=True,
         aug_params={
-            "color_aug_params": {"aug_prob": 0.2},
-            "eraser_aug_params": {"aug_prob": 0.5},
-            "spatial_aug_params": {"aug_prob": 0.8},
-            "translate_params": {"aug_prob": 0.8},
-            "rotate_params": {"aug_prob": 0.8},
+            "eraser_aug_params": {"enabled": False},
+            "noise_aug_params": {"enabled": False},
+            "flip_aug_params": {"enabled": False},
+            "color_aug_params": {"enabled": False},
+            "spatial_aug_params": {"enabled": False},
+            "advanced_spatial_aug_params": {"enabled": False},
         },
         norm_params={"use": False},
     ):
@@ -83,6 +86,7 @@ def __init__(
         split = split.lower()
         if split == "validation":
             split = "test"
+            self.is_prediction = True
 
         image_root = osp.join(root_dir, split, dstype)
         flow_root = osp.join(root_dir, split, "flow")
diff --git a/ezflow/decoder/__init__.py b/ezflow/decoder/__init__.py
index 8a018058..a6b006cf 100644
--- a/ezflow/decoder/__init__.py
+++ b/ezflow/decoder/__init__.py
@@ -1,5 +1,7 @@
 from .build import DECODER_REGISTRY, build_decoder
+from .context import ContextNetwork
 from .conv_decoder import ConvDecoder, FlowNetConvDecoder
 from .iterative import *
 from .noniterative import *
+from .pyramid import PyramidDecoder
 from .separable_conv import Butterfly4D, SeparableConv4D
diff --git a/ezflow/decoder/context.py b/ezflow/decoder/context.py
new file mode 100644
index 00000000..67fc6ad5
--- /dev/null
+++ b/ezflow/decoder/context.py
@@ -0,0 +1,63 @@
+import torch
+import torch.nn as nn
+
+from ..config import configurable
+from ..modules import conv
+from .build import DECODER_REGISTRY
+
+
+@DECODER_REGISTRY.register()
+class ContextNetwork(nn.Module):
+    """
+    PWCNet Context Network decoder
+
+    Parameters
+    ----------
+    in_channels: int, default: 565
+        Number of input channels
+    config : List[int], default : [128, 128, 96, 64, 32]
+        List containing all output channels of the decoder.
+    """
+
+    @configurable
+    def __init__(self, in_channels=565, config=[128, 128, 96, 64, 32]):
+        super(ContextNetwork, self).__init__()
+
+        self.context_net = nn.ModuleList(
+            [
+                conv(
+                    in_channels,
+                    config[0],
+                    kernel_size=3,
+                    stride=1,
+                    padding=1,
+                    dilation=1,
+                ),
+            ]
+        )
+        self.context_net.append(
+            conv(config[0], config[0], kernel_size=3, stride=1, padding=2, dilation=2)
+        )
+        self.context_net.append(
+            conv(config[0], config[1], kernel_size=3, stride=1, padding=4, dilation=4)
+        )
+        self.context_net.append(
+            conv(config[1], config[2], kernel_size=3, stride=1, padding=8, dilation=8)
+        )
+        self.context_net.append(
+            conv(config[2], config[3], kernel_size=3, stride=1, padding=16, dilation=16)
+        )
+        self.context_net.append(
+            conv(config[3], config[4], kernel_size=3, stride=1, padding=1, dilation=1)
+        )
+        self.context_net.append(
+            nn.Conv2d(config[4], 2, kernel_size=3, stride=1, padding=1, bias=True)
+        )
+        self.context_net = nn.Sequential(*self.context_net)
+
+    @classmethod
+    def from_config(self, cfg):
+        return {"in_channels": cfg.IN_CHANNELS, "config": cfg.CONFIG}
+
+    def forward(self, x):
+        return self.context_net(x)
diff --git a/ezflow/decoder/pyramid.py b/ezflow/decoder/pyramid.py
new file mode 100644
index 00000000..679e1c96
--- /dev/null
+++ b/ezflow/decoder/pyramid.py
@@ -0,0 +1,154 @@
+import torch
+import torch.nn as nn
+
+from ..config import configurable
+from ..modules import deconv
+from ..similarity import IterSpatialCorrelationSampler as SpatialCorrelationSampler
+from ..utils import warp
+from .build import DECODER_REGISTRY
+from .conv_decoder import ConvDecoder
+
+
+@DECODER_REGISTRY.register()
+class PyramidDecoder(nn.Module):
+    """
+    Applies a 2D Convolutional decoder to regress the optical flow
+    from the intermediate outputs convolutions of the encoder.
+    Used in **PWCNet** (https://arxiv.org/abs/1709.02371)
+
+    Parameters
+    ----------
+    config : List[int], default : [128, 128, 96, 64, 32]
+        List containing all output channels of the decoder.
+    to_flow : bool, default : True
+        If True, regresses the flow of shape N x 2 x H x W.
+    max_displacement: int, default: 4
+        Maximum displacement for cost volume computation.
+    pad_size: int, default: 0
+        Pad size for cost volume computation.
+    flow_scale_factor: float, default: 20.0
+        Scale factor for upscaling flow predictions.
+    """
+
+    @configurable
+    def __init__(
+        self,
+        config=[128, 128, 96, 64, 32],
+        to_flow=True,
+        max_displacement=4,
+        pad_size=0,
+        flow_scale_factor=20.0,
+    ):
+        super(PyramidDecoder, self).__init__()
+        self.config = config
+        self.flow_scale_factor = flow_scale_factor
+
+        self.correlation_layer = SpatialCorrelationSampler(
+            kernel_size=1, patch_size=2 * max_displacement + 1, padding=pad_size
+        )
+        self.leaky_relu = nn.LeakyReLU(negative_slope=0.1, inplace=False)
+
+        search_range = (2 * max_displacement + 1) ** 2
+
+        self.decoder_layers = nn.ModuleList()
+
+        self.up_feature_layers = nn.ModuleList()
+        self.deconv_layers = nn.ModuleList()
+
+        for i in range(len(config)):
+
+            if i == 0:
+                concat_channels = search_range
+            else:
+                concat_channels = search_range + config[i] + max_displacement
+
+            self.decoder_layers.append(
+                ConvDecoder(
+                    config=config,
+                    to_flow=to_flow,
+                    concat_channels=concat_channels,
+                )
+            )
+
+            if i < len(config) - 1:
+                self.deconv_layers.append(
+                    deconv(2, 2, kernel_size=4, stride=2, padding=1)
+                )
+
+                self.up_feature_layers.append(
+                    deconv(
+                        concat_channels + sum(config),
+                        2,
+                        kernel_size=4,
+                        stride=2,
+                        padding=1,
+                    )
+                )
+
+    @classmethod
+    def from_config(self, cfg):
+        return {
+            "config": cfg.CONFIG,
+            "to_flow": cfg.TO_FLOW,
+            "max_displacement": cfg.SIMILARITY.MAX_DISPLACEMENT,
+            "pad_size": cfg.SIMILARITY.PAD_SIZE,
+            "flow_scale_factor": cfg.FLOW_SCALE_FACTOR,
+        }
+
+    def _corr_relu(self, features1, features2):
+
+        corr = self.correlation_layer(features1, features2)
+        corr = corr.view(corr.shape[0], -1, corr.shape[3], corr.shape[4])
+        return self.leaky_relu(corr)
+
+    def forward(self, feature_pyramid1, feature_pyramid2):
+        """
+        Performs forward pass.
+
+        Parameters
+        ----------
+        feature_pyramid1 : torch.Tensor
+            Input feature map of image 1
+
+        feature_pyramid2 : torch.Tensor
+            Input feature map of image 2
+
+        Returns
+        -------
+        List[torch.Tensor]
+            A List containing tensors of shape N x 2 x H x W representing the flow
+
+        List[torch.Tensor]
+            A List containing tensors of shape N x output_channel x H x W
+        """
+
+        up_flow, up_features = None, None
+        up_flow_scale = self.flow_scale_factor * 2 ** (-(len(self.config)))
+
+        flow_preds = []
+
+        for i in range(len(self.decoder_layers)):
+
+            if i == 0:
+                corr = self._corr_relu(feature_pyramid1[i], feature_pyramid2[i])
+                concatenated_features = corr
+
+            else:
+
+                warped_features = warp(feature_pyramid2[i], up_flow * up_flow_scale)
+                up_flow_scale *= 2
+
+                corr = self._corr_relu(feature_pyramid1[i], warped_features)
+
+                concatenated_features = torch.cat(
+                    [corr, feature_pyramid1[i], up_flow, up_features], dim=1
+                )
+
+            flow, features = self.decoder_layers[i](concatenated_features)
+            flow_preds.append(flow)
+
+            if i < len(self.decoder_layers) - 1:
+                up_flow = self.deconv_layers[i](flow)
+                up_features = self.up_feature_layers[i](features)
+
+        return flow_preds, features
diff --git a/ezflow/encoder/pyramid.py b/ezflow/encoder/pyramid.py
index 19952b38..e921f0b6 100644
--- a/ezflow/encoder/pyramid.py
+++ b/ezflow/encoder/pyramid.py
@@ -45,6 +45,19 @@ def from_config(self, cfg):
         }
 
     def forward(self, img):
+        """
+        Performs forward pass.
+
+        Parameters
+        ----------
+        img : torch.Tensor
+            Input tensor
+
+        Returns
+        -------
+        List[torch.Tensor],
+            List of all the output convolutions from each encoder layer
+        """
 
         feature_pyramid = []
         x = img
@@ -54,6 +67,4 @@ def forward(self, img):
             x = self.encoder[i](x)
             feature_pyramid.append(x)
 
-        feature_pyramid.reverse()
-
         return feature_pyramid
diff --git a/ezflow/engine/eval.py b/ezflow/engine/eval.py
index 718aadff..d0e00e70 100644
--- a/ezflow/engine/eval.py
+++ b/ezflow/engine/eval.py
@@ -92,7 +92,7 @@ def run_inference(model, dataloader, device, metric_fn, flow_scale=1.0, pad_divi
 
             start_time = time.time()
 
-            pred = model(img1, img2)
+            output = model(img1, img2)
 
             if torch.cuda.is_available():
                 torch.cuda.synchronize()
@@ -100,10 +100,10 @@ def run_inference(model, dataloader, device, metric_fn, flow_scale=1.0, pad_divi
             end_time = time.time()
             times.append(end_time - start_time)
 
-            pred = padder.unpad(pred)
-            flow = pred * flow_scale
+            pred = padder.unpad(output["flow_upsampled"])
+            pred = pred * flow_scale
 
-            metric = metric_fn(flow, target)
+            metric = metric_fn(pred, target)
             metric_meter.update(metric)
 
     avg_inference_time = sum(times) / len(times)
@@ -193,7 +193,7 @@ def profile_inference(
                 start_time = time.time()
 
                 with record_function(profiler.model_name):
-                    pred = model(img1, img2)
+                    output = model(img1, img2)
 
                 if torch.cuda.is_available():
                     torch.cuda.synchronize()
@@ -203,10 +203,10 @@ def profile_inference(
 
                 prof.step()
 
-                pred = padder.unpad(pred)
-                flow = pred * flow_scale
+                pred = padder.unpad(output["flow_upsampled"])
+                pred = pred * flow_scale
 
-                metric = metric_fn(flow, target)
+                metric = metric_fn(pred, target)
                 metric_meter.update(metric)
 
     print(
diff --git a/ezflow/engine/trainer.py b/ezflow/engine/trainer.py
index 6e55e813..3c6a9797 100644
--- a/ezflow/engine/trainer.py
+++ b/ezflow/engine/trainer.py
@@ -60,7 +60,7 @@ def _is_main_process(self):
         raise NotImplementedError
 
     def _setup_training(self, rank=0, loss_fn=None, optimizer=None, scheduler=None):
-        if loss_fn is None:
+        if loss_fn is None and self.loss_fn is None:
 
             if self.cfg.CRITERION.CUSTOM:
                 loss = FUNCTIONAL_REGISTRY.get(self.cfg.CRITERION.NAME)
@@ -73,7 +73,9 @@ def _setup_training(self, rank=0, loss_fn=None, optimizer=None, scheduler=None):
             else:
                 loss_fn = loss()
 
-        if optimizer is None:
+            print(f"Loss function: {self.cfg.CRITERION.NAME} is initialized!")
+
+        if optimizer is None and self.optimizer is None:
 
             opt = optimizers.get(self.cfg.OPTIMIZER.NAME)
 
@@ -87,7 +89,9 @@ def _setup_training(self, rank=0, loss_fn=None, optimizer=None, scheduler=None):
             else:
                 optimizer = opt(self.model.parameters(), lr=self.cfg.OPTIMIZER.LR)
 
-        if scheduler is None:
+            print(f"Optimizer: {self.cfg.OPTIMIZER.NAME} is initialized!")
+
+        if scheduler is None and self.scheduler is None:
 
             if self.cfg.SCHEDULER.USE:
                 sched = schedulers.get(self.cfg.SCHEDULER.NAME)
@@ -101,9 +105,16 @@ def _setup_training(self, rank=0, loss_fn=None, optimizer=None, scheduler=None):
                 else:
                     scheduler = sched(optimizer)
 
-        self.loss_fn = loss_fn
-        self.optimizer = optimizer
-        self.scheduler = scheduler
+                print(f"Scheduler: {self.cfg.SCHEDULER.NAME} is initialized!")
+
+        if self.loss_fn is None:
+            self.loss_fn = loss_fn
+
+        if self.optimizer is None:
+            self.optimizer = optimizer
+
+        if self.scheduler is None:
+            self.scheduler = scheduler
 
         if rank == 0:
             """
@@ -148,6 +159,9 @@ def _epoch_trainer(self, n_epochs=None, start_epoch=None):
             print(f"\nEpoch {epoch+1} of {start_epoch+n_epochs}")
             print("-" * 80)
 
+            if self.model_parallel:
+                self.train_loader.sampler.set_epoch(epoch)
+
             loss_meter.reset()
             for iteration, (inp, target) in enumerate(self.train_loader):
 
@@ -165,13 +179,21 @@ def _epoch_trainer(self, n_epochs=None, start_epoch=None):
                     "epochs_training_loss", loss_meter.sum, epoch + 1
                 )
 
-            if epoch % self.cfg.VALIDATE_INTERVAL == 0:
+            if epoch % self.cfg.VALIDATE_INTERVAL == 0 and self._is_main_process():
                 self._validate_model(iter_type="Epoch", iterations=epoch + 1)
 
             if epoch % self.cfg.CKPT_INTERVAL == 0 and self._is_main_process():
                 self._save_checkpoints(ckpt_type="epoch", ckpt_number=epoch + 1)
 
-        self.writer.close()
+            # Synchronize all processes in multi gpu after validation and checkpoint
+            if (
+                epoch % self.cfg.VALIDATE_INTERVAL == 0
+                or epoch % self.cfg.CKPT_INTERVAL == 0
+            ) and self.model_parallel:
+                dist.barrier()
+
+        if self._is_main_process():
+            self.writer.close()
 
     def _step_trainer(self, n_steps=None, start_step=None):
         self.model.train()
@@ -187,11 +209,15 @@ def _step_trainer(self, n_steps=None, start_step=None):
         if start_step is not None:
             print(f"Resuming training from step {start_step}\n")
             total_steps = start_step
-            n_steps += start_step
+            n_steps += start_step - 1
         else:
             start_step = total_steps = 1
             n_steps += start_step
 
+        if self.model_parallel:
+            epoch = 0
+            self.train_loader.sampler.set_epoch(epoch)
+
         train_iter = iter(self.train_loader)
 
         print(f"\nStarting step {total_steps} of {n_steps}")
@@ -200,6 +226,10 @@ def _step_trainer(self, n_steps=None, start_step=None):
             try:
                 inp, target = next(train_iter)
             except:
+                if self.model_parallel:
+                    epoch += 1
+                    self.train_loader.sampler.set_epoch(epoch)
+
                 # Handle exception if there is no data
                 # left in train iterator to continue training.
                 train_iter = iter(self.train_loader)
@@ -210,20 +240,24 @@ def _step_trainer(self, n_steps=None, start_step=None):
 
             self._log_step(step, total_steps, loss_meter)
 
-            if self._is_main_process():
-                self.writer.add_scalar(
-                    "steps_training_loss", loss_meter.sum, total_steps
-                )
-
-            if step % self.cfg.VALIDATE_INTERVAL == 0:
+            if step % self.cfg.VALIDATE_INTERVAL == 0 and self._is_main_process():
                 self._validate_model(iter_type="Iteration", iterations=total_steps)
+                print("-" * 80)
 
             if step % self.cfg.CKPT_INTERVAL == 0 and self._is_main_process():
                 self._save_checkpoints(ckpt_type="step", ckpt_number=total_steps)
 
+            # Synchronize all processes in multi gpu after validation and checkpoint
+            if (
+                step % self.cfg.VALIDATE_INTERVAL == 0
+                or step % self.cfg.CKPT_INTERVAL == 0
+            ) and self.model_parallel:
+                dist.barrier()
+
             total_steps += 1
 
-        self.writer.close()
+        if self._is_main_process():
+            self.writer.close()
 
     def _run_step(self, inp, target):
         img1, img2 = inp
@@ -232,14 +266,17 @@ def _run_step(self, inp, target):
             img2.to(self.device),
             target.to(self.device),
         )
-        target = target / self.cfg.TARGET_SCALE_FACTOR
 
         if self._is_main_process():
             start_time = time.time()
 
         with autocast(enabled=self.cfg.MIXED_PRECISION):
-            pred = self.model(img1, img2)
-            loss = self.loss_fn(pred, target)
+            output = self.model(img1, img2)
+            loss = self.loss_fn(
+                output["flow_preds"], target / self.cfg.TARGET_SCALE_FACTOR
+            )
+
+            del output
 
         self.optimizer.zero_grad()
         self.scaler.scale(loss).backward()
@@ -285,22 +322,33 @@ def _validate_model(self, iter_type, iterations):
                     img2.to(self.device),
                     target.to(self.device),
                 )
-                target = target / self.cfg.TARGET_SCALE_FACTOR
 
-                pred = self.model(img1, img2)
-                loss = self.loss_fn(pred, target)
+                if self.model_parallel:
+                    output = self.model.module(img1, img2)
+                else:
+                    output = self.model(img1, img2)
+
+                loss = self.loss_fn(
+                    output["flow_preds"], target / self.cfg.TARGET_SCALE_FACTOR
+                )
+
                 loss_meter.update(loss.item())
-                metric = self._calculate_metric(pred, target)
+
+                """
+                    Predicted upsampled flow should be scaled for EPE calculation.
+                """
+                metric = self._calculate_metric(
+                    output["flow_upsampled"] * self.cfg.TARGET_SCALE_FACTOR, target
+                )
                 metric_meter.update(metric)
 
+                del output
+
         new_avg_val_loss, new_avg_val_metric = loss_meter.avg, metric_meter.avg
 
         print("\n", "-" * 80)
-        if self._is_main_process():
-            self.writer.add_scalar("avg_validation_loss", new_avg_val_loss, iterations)
-            self.writer.add_scalar(
-                "avg_validation_metric", new_avg_val_metric, iterations
-            )
+        self.writer.add_scalar("avg_validation_loss", new_avg_val_loss, iterations)
+        self.writer.add_scalar("avg_validation_metric", new_avg_val_metric, iterations)
 
         print(
             f"\n{iter_type} {iterations}: Average validation loss = {new_avg_val_loss}"
@@ -384,6 +432,9 @@ def _reload_trainer_states(
         scheduler_ckpt=None,
         use_cfg=False,
     ):
+
+        self._setup_device()
+
         consolidated_ckpt = (
             self.cfg.RESUME_TRAINING.CONSOLIDATED_CKPT
             if use_cfg is True
@@ -392,7 +443,7 @@ def _reload_trainer_states(
 
         if consolidated_ckpt is not None:
 
-            ckpt = torch.load(consolidated_ckpt, map_location=torch.device("cpu"))
+            ckpt = torch.load(consolidated_ckpt, map_location=self.device)
 
             model_state_dict = ckpt["model_state_dict"]
             optimizer_state_dict = ckpt["optimizer_state_dict"]
@@ -401,7 +452,10 @@ def _reload_trainer_states(
                 scheduler_state_dict = ckpt["scheduler_state_dict"]
 
             if "epochs" in ckpt.keys():
-                start_epoch = ckpt["epochs"] + 1
+                start_iteration = ckpt["epochs"] + 1
+
+            if "step" in ckpt.keys():
+                start_iteration = ckpt["step"] + 1
 
         else:
 
@@ -409,24 +463,26 @@ def _reload_trainer_states(
                 model_ckpt is not None and optimizer_ckpt is not None
             ), "Must provide a consolidated ckpt or model and optimizer ckpts separately"
 
-            model_state_dict = torch.load(model_ckpt, map_location=torch.device("cpu"))
-            optimizer_state_dict = torch.load(
-                optimizer_ckpt, map_location=torch.device("cpu")
-            )
+            model_state_dict = torch.load(model_ckpt, map_location=self.device)
+            optimizer_state_dict = torch.load(optimizer_ckpt, map_location=self.device)
 
             if scheduler_ckpt is not None:
                 scheduler_state_dict = torch.load(
-                    scheduler_ckpt, map_location=torch.device("cpu")
+                    scheduler_ckpt, map_location=self.device
                 )
 
+        self._setup_model()
         self.model.load_state_dict(model_state_dict)
+        print("Model state loaded!!")
 
         self._setup_training()
 
         self.optimizer.load_state_dict(optimizer_state_dict)
+        print("Optimizer state loaded!!")
 
         if self.scheduler is not None:
             self.scheduler.load_state_dict(scheduler_state_dict)
+            print("Scheduler state loaded!!")
 
         if total_iterations is None and use_cfg:
             total_iterations = (
@@ -485,7 +541,17 @@ def resume_training(
             use_cfg=use_cfg,
         )
 
-        self.train(total_iterations=total_iterations, start_iteration=start_iteration)
+        os.makedirs(self.cfg.CKPT_DIR, exist_ok=True)
+        os.makedirs(self.cfg.LOG_DIR, exist_ok=True)
+
+        print("Training config:\n")
+        print(self.cfg)
+        print("-" * 80)
+
+        self._trainer(total_iterations, start_iteration)
+
+        print("Training complete!")
+        print(f"Total training time: {str(timedelta(seconds=sum(self.times)))}")
 
 
 class Trainer(BaseTrainer):
@@ -615,7 +681,10 @@ def __init__(self, cfg, model, train_loader_creator, val_loader_creator):
         self.val_loader = None
 
         self.train_loader_creator = train_loader_creator
-        self.val_loader_creator = val_loader_creator
+
+        # Validate model only on the main process.
+        val_loader_creator.distributed = False
+        self.val_loader = val_loader_creator.get_dataloader()
 
         self._validate_ddp_config()
 
@@ -664,6 +733,7 @@ def _setup_device(self, rank):
         self.device = torch.device(rank)
         self.local_rank = rank
         torch.cuda.empty_cache()
+        torch.cuda.set_device(rank)
 
     def _setup_ddp(self, rank):
         os.environ["MASTER_ADDR"] = self.cfg.DISTRIBUTED.MASTER_ADDR
@@ -679,19 +749,22 @@ def _setup_ddp(self, rank):
         )
         print(f"{rank + 1}/{self.cfg.DISTRIBUTED.WORLD_SIZE} process initialized.")
 
+        # synchronizes all the threads to reach this point before moving on
+        dist.barrier()
+
     def _is_main_process(self):
         return self.local_rank == 0
 
     def _setup_model(self, rank):
 
+        if self.cfg.DISTRIBUTED.SYNC_BATCH_NORM:
+            self.model = nn.SyncBatchNorm.convert_sync_batchnorm(self.model)
+
         self.model = DDP(
             self.model.cuda(rank),
             device_ids=[rank],
         )
 
-        if self.cfg.DISTRIBUTED.SYNC_BATCH_NORM:
-            self.model = nn.SyncBatchNorm.convert_sync_batchnorm(self.model)
-
         self.model = self.model.to(self.device)
 
     def _cleanup(self):
@@ -710,7 +783,7 @@ def _main_worker(
         self._setup_ddp(rank)
         self._setup_model(rank)
         self.train_loader = self.train_loader_creator.get_dataloader(rank=rank)
-        self.val_loader = self.val_loader_creator.get_dataloader(rank=rank)
+
         self._setup_training(
             rank=rank, loss_fn=loss_fn, optimizer=optimizer, scheduler=scheduler
         )
@@ -718,6 +791,8 @@ def _main_worker(
         os.makedirs(self.cfg.CKPT_DIR, exist_ok=True)
         os.makedirs(self.cfg.LOG_DIR, exist_ok=True)
 
+        # synchronizes all the threads to reach this point before moving on
+        dist.barrier()
         self._trainer(total_iterations, start_iteration)
 
         if self._is_main_process():
diff --git a/ezflow/functional/criterion/multiscale.py b/ezflow/functional/criterion/multiscale.py
index c13d44f5..c19bb1b9 100644
--- a/ezflow/functional/criterion/multiscale.py
+++ b/ezflow/functional/criterion/multiscale.py
@@ -14,10 +14,22 @@ class MultiScaleLoss(nn.Module):
 
     Parameters
     ----------
-    norm : str
+    norm : str, default: "l1"
         The norm to use for the loss. Can be either "l2", "l1" or "robust"
+    q : float, default: 0.4
+        This parameter is used in robust loss for fine tuning. q < 1 gives less penalty to outliers
+    eps : float, default: 0.01
+        This parameter is a small constant used in robust loss to stabilize fine tuning.
     weights : list
         The weights to use for each scale
+    average : str, default: "mean"
+        The mode to set the average of the EPE map.
+        If "mean", the mean of the EPE map is returned.
+        If "sum", the EPE map is summed and divided by the batch size.
+    resize_flow : str, default: "upsample"
+        The mode to resize flow.
+        If "upsample", predicted flow will be upsampled to the size of the ground truth.
+        If "downsample", ground truth flow will be downsampled to the size of the predicted flow.
     extra_mask : torch.Tensor
         A mask to apply to the loss. Useful for removing the loss on the background
     use_valid_range : bool
@@ -30,26 +42,45 @@ class MultiScaleLoss(nn.Module):
     def __init__(
         self,
         norm="l1",
+        q=0.4,
+        eps=1e-2,
         weights=(1, 0.5, 0.25),
+        average="mean",
+        resize_flow="upsample",
         extra_mask=None,
         use_valid_range=True,
         valid_range=None,
     ):
         super(MultiScaleLoss, self).__init__()
 
-        self.norm = norm.lower()
-        assert self.norm in ("l1", "l2", "robust"), "Norm must be one of L1, L2, Robust"
+        assert norm.lower() in (
+            "l1",
+            "l2",
+            "robust",
+        ), "Norm must be one of L1, L2, Robust"
+        assert resize_flow.lower() in (
+            "upsample",
+            "downsample",
+        ), "Resize flow must be one of upsample or downsample"
+        assert average.lower() in ("mean", "sum"), "Average must be one of mean or sum"
 
+        self.norm = norm.lower()
+        self.q = q
+        self.eps = eps
         self.weights = weights
         self.extra_mask = extra_mask
         self.use_valid_range = use_valid_range
         self.valid_range = valid_range
+        self.average = average.lower()
+        self.resize_flow = resize_flow.lower()
 
     @classmethod
     def from_config(cls, cfg):
         return {
             "norm": cfg.NORM,
             "weights": cfg.WEIGHTS,
+            "average": cfg.AVERAGE,
+            "resize_flow": cfg.RESIZE_FLOW,
             "extra_mask": cfg.EXTRA_MASK,
             "use_valid_range": cfg.USE_VALID_RANGE,
             "valid_range": cfg.VALID_RANGE,
@@ -59,10 +90,11 @@ def forward(self, pred, label):
 
         if label.shape[1] == 3:
             """Ignore valid mask for Multiscale Loss."""
+            mask = label[:, 2:, :, :]
             label = label[:, :2, :, :]
 
         loss = 0
-        h, w = label.size()[-2:]
+        b, c, h, w = label.size()
 
         if (
             (type(pred) is not tuple)
@@ -73,42 +105,58 @@ def forward(self, pred, label):
 
         for i, level_pred in enumerate(pred):
 
-            real_flow = F.interpolate(
-                level_pred, (h, w), mode="bilinear", align_corners=True
-            )
-            real_flow[:, 0, :, :] = real_flow[:, 0, :, :] * (w / level_pred.shape[3])
-            real_flow[:, 1, :, :] = real_flow[:, 1, :, :] * (h / level_pred.shape[2])
+            if self.resize_flow.lower() == "upsample":
+                real_flow = F.interpolate(
+                    level_pred, (h, w), mode="bilinear", align_corners=True
+                )
+                real_flow[:, 0, :, :] = real_flow[:, 0, :, :] * (
+                    w / level_pred.shape[3]
+                )
+                real_flow[:, 1, :, :] = real_flow[:, 1, :, :] * (
+                    h / level_pred.shape[2]
+                )
+                target = label
+
+            elif self.resize_flow.lower() == "downsample":
+                # down sample ground truth following irr solution
+                # https://github.com/visinf/irr/blob/master/losses.py#L16
+                b, c, h, w = level_pred.shape
+
+                target = F.adaptive_avg_pool2d(label, [h, w])
+                real_flow = level_pred
 
             if self.norm == "l2":
-                loss_value = torch.norm(real_flow - label, p=2, dim=1)
+                loss_value = torch.norm(real_flow - target, p=2, dim=1)
 
             elif self.norm == "robust":
-                loss_value = (real_flow - label).abs().sum(dim=1) + 1e-8
-                loss_value = loss_value**0.4
+                loss_value = torch.norm(real_flow - target, p=1, dim=1)
+                loss_value = (loss_value + self.eps) ** self.q
 
             elif self.norm == "l1":
-                loss_value = (real_flow - label).abs().sum(dim=1)
+                loss_value = torch.norm(real_flow - target, p=1, dim=1)
 
             if self.use_valid_range and self.valid_range is not None:
 
                 with torch.no_grad():
-                    mask = (label[:, 0, :, :].abs() <= self.valid_range[i][1]) & (
-                        label[:, 1, :, :].abs() <= self.valid_range[i][0]
+                    mask = (target[:, 0, :, :].abs() <= self.valid_range[i][1]) & (
+                        target[:, 1, :, :].abs() <= self.valid_range[i][0]
                     )
             else:
                 with torch.no_grad():
-                    mask = torch.ones(label[:, 0, :, :].shape).type_as(label)
+                    mask = torch.ones(target[:, 0, :, :].shape).type_as(target)
 
             loss_value = loss_value * mask.float()
 
             if self.extra_mask is not None:
                 val = self.extra_mask > 0
                 loss_value = loss_value[val]
-                level_loss = loss_value.mean() * self.weights[i]
 
-            else:
+            if self.average.lower() == "mean":
                 level_loss = loss_value.mean() * self.weights[i]
 
+            elif self.average.lower() == "sum":
+                level_loss = loss_value.sum() / b * self.weights[i]
+
             loss += level_loss
 
         loss = loss / len(pred)
diff --git a/ezflow/functional/data_augmentation/augmentor.py b/ezflow/functional/data_augmentation/augmentor.py
index ab3abea3..260d71fa 100644
--- a/ezflow/functional/data_augmentation/augmentor.py
+++ b/ezflow/functional/data_augmentation/augmentor.py
@@ -11,30 +11,73 @@ class FlowAugmentor:
     ----------
     crop_size : int
         Size of the crop to be applied to the images.
-    color_aug_params : dict
-        Parameters for the color augmentation.
     eraser_aug_params : dict
         Parameters for the eraser augmentation.
+    noise_aug_params : dict
+        Parameters for the noise augmentation.
+    flip_aug_params : dict
+        Parameters for the flip augmentation.
+    color_aug_params : dict
+        Parameters for the color augmentation.
     spatial_aug_params : dict
         Parameters for the spatial augmentation.
+    advanced_spatial_aug_params : dict
+        Parameters for the spatial augmentation.
+        If both spatial and advanced_spatial augmentations are enabled, the advanced spatial augmentations are used.
     """
 
     def __init__(
         self,
         crop_size,
-        color_aug_params={"aug_prob": 0.2},
-        eraser_aug_params={"aug_prob": 0.5},
-        spatial_aug_params={"aug_prob": 0.8},
-        translate_params={"aug_prob": 0.8},
-        rotate_params={"aug_prob": 0.8},
+        eraser_aug_params={"enabled": False, "aug_prob": 0.5, "bounds": [50, 100]},
+        noise_aug_params={"enabled": False, "aug_prob": 0.5, "noise_std_range": 0.06},
+        flip_aug_params={"enabled": False, "h_flip_prob": 0.5, "v_flip_prob": 0.1},
+        color_aug_params={
+            "enabled": False,
+            "asymmetric_color_aug_prob": 0.2,
+            "brightness": 0.4,
+            "contrast": 0.4,
+            "saturation": 0.4,
+            "hue": 0.15915494309189535,
+        },
+        spatial_aug_params={
+            "enabled": False,
+            "aug_prob": 0.8,
+            "stretch_prob": 0.8,
+            "min_scale": -0.1,
+            "max_scale": 1.0,
+            "max_stretch": 0.2,
+        },
+        advanced_spatial_aug_params={
+            "enabled": False,
+            "scale1": 0.3,
+            "scale2": 0.1,
+            "rotate": 0.4,
+            "translate": 0.4,
+            "stretch": 0.3,
+            "enable_out_of_boundary_crop": False,
+        },
     ):
 
         self.crop_size = crop_size
         self.color_aug_params = color_aug_params
         self.eraser_aug_params = eraser_aug_params
         self.spatial_aug_params = spatial_aug_params
-        self.translate_params = translate_params
-        self.rotate_params = rotate_params
+        self.noise_aug_params = noise_aug_params
+        self.flip_aug_params = flip_aug_params
+
+        self.advanced_spatial_aug_params = advanced_spatial_aug_params
+        self.advanced_spatial_aug_params["h_flip_prob"] = (
+            flip_aug_params["h_flip_prob"] if "h_flip_prob" in flip_aug_params else 0.0
+        )
+        self.advanced_spatial_transform = AdvancedSpatialTransform(
+            crop=self.crop_size, **self.advanced_spatial_aug_params
+        )
+
+        if self.advanced_spatial_aug_params["enabled"]:
+            # Disable spatial transform and horizontal flip if advanced spatial transforms are used
+            self.spatial_aug_params["enabled"] = False
+            self.flip_aug_params["h_flip_prob"] = 0.0
 
     def __call__(self, img1, img2, flow, valid=None):
         """
@@ -64,14 +107,17 @@ def __call__(self, img1, img2, flow, valid=None):
         """
 
         img1, img2 = color_transform(img1, img2, **self.color_aug_params)
-        img1, img2 = eraser_transform(img1, img2, **self.eraser_aug_params)
+
+        img1, img2, flow = self.advanced_spatial_transform(img1, img2, flow)
+
         img1, img2, flow = spatial_transform(
             img1, img2, flow, self.crop_size, **self.spatial_aug_params
         )
-        img1, img2, flow = translate_transform(
-            img1, img2, flow, **self.translate_params
-        )
-        img1, img2, flow = rotate_transform(img1, img2, flow, **self.rotate_params)
+
+        img1, img2, flow = flip_transform(img1, img2, flow, **self.flip_aug_params)
+
+        img1, img2 = noise_transform(img1, img2, **self.noise_aug_params)
+        img1, img2 = eraser_transform(img1, img2, **self.eraser_aug_params)
 
         img1 = np.ascontiguousarray(img1)
         img2 = np.ascontiguousarray(img2)
diff --git a/ezflow/functional/data_augmentation/operations.py b/ezflow/functional/data_augmentation/operations.py
index 1ed5325c..e81416f7 100644
--- a/ezflow/functional/data_augmentation/operations.py
+++ b/ezflow/functional/data_augmentation/operations.py
@@ -1,8 +1,17 @@
+from __future__ import division
+
+import numbers
+import pdb
+import random
+
 import cv2
 import numpy as np
 import scipy.ndimage as ndimage
+import torch
+import torchvision
 import torchvision.transforms as transforms
 from PIL import Image
+from torch.nn import functional as F
 from torchvision.transforms import ColorJitter
 
 
@@ -87,14 +96,15 @@ def crop(
 def color_transform(
     img1,
     img2,
-    aug_prob=0.2,
+    enabled=False,
+    asymmetric_color_aug_prob=0.2,
     brightness=0.4,
     contrast=0.4,
     saturation=0.4,
     hue=0.5 / 3.14,
 ):
     """
-    Photometric augmentation
+    Photometric augmentation borrowed from RAFT https://github.com/princeton-vl/RAFT/blob/master/core/utils/augmentor.py
 
     Parameters
     -----------
@@ -102,8 +112,10 @@ def color_transform(
         First of the pair of images
     img2 : PIL Image or numpy.ndarray
         Second of the pair of images
-    aug_prob : float
-        Probability of applying the augmentation
+    enabled : bool, default: False
+        If True, applies color transform
+    asymmetric_color_aug_prob : float
+        Probability of applying asymetric color jitter augmentation
     brightness : float
         Brightness augmentation factor
     contrast : float
@@ -120,12 +132,14 @@ def color_transform(
     img2 : PIL Image or numpy.ndarray
         Augmented image 2
     """
+    if not enabled:
+        return img1, img2
 
     aug = ColorJitter(
         brightness=brightness, contrast=contrast, saturation=saturation, hue=hue
     )
 
-    if np.random.rand() < aug_prob:
+    if np.random.rand() < asymmetric_color_aug_prob:
         img1 = np.array(aug(Image.fromarray(img1)), dtype=np.uint8)
         img2 = np.array(aug(Image.fromarray(img2)), dtype=np.uint8)
 
@@ -137,15 +151,18 @@ def color_transform(
     return img1, img2
 
 
-def eraser_transform(img1, img2, bounds=[50, 100], aug_prob=0.5):
+def eraser_transform(img1, img2, enabled=False, bounds=[50, 100], aug_prob=0.5):
     """
-    Occlusion augmentation
+    Occlusion augmentation borrowed from RAFT https://github.com/princeton-vl/RAFT/blob/master/core/utils/augmentor.py
+
     Parameters
     -----------
     img1 : PIL Image or numpy.ndarray
         First of the pair of images
     img2 : PIL Image or numpy.ndarray
         Second of the pair of images
+    enabled : bool, default: False
+        If True, applies eraser transform
     bounds : :obj:`list` of :obj:`int`
         Bounds of the eraser
     aug_prob : float
@@ -158,6 +175,8 @@ def eraser_transform(img1, img2, bounds=[50, 100], aug_prob=0.5):
     img2 : PIL Image or numpy.ndarray
         Augmented image 2
     """
+    if not enabled:
+        return img1, img2
 
     H, W = img1.shape[:2]
 
@@ -180,17 +199,17 @@ def spatial_transform(
     img2,
     flow,
     crop_size,
+    enabled=False,
     aug_prob=0.8,
     stretch_prob=0.8,
     max_stretch=0.2,
     min_scale=-0.2,
     max_scale=0.5,
-    flip=True,
-    h_flip_prob=0.5,
-    v_flip_prob=0.1,
 ):
     """
-    Spatial augmentation
+    Simple set of spatial augmentation borrowed from RAFT https://github.com/princeton-vl/RAFT/blob/master/core/utils/augmentor.py
+
+    Includes random scaling and stretch.
 
     Parameters
     -----------
@@ -202,6 +221,8 @@ def spatial_transform(
         Flow field
     crop_size : :obj:`list` of :obj:`int`
         Size of the crop
+    enabled : bool, default: False
+        If True, applies spatial transform
     aug_prob : float
         Probability of applying the augmentation
     stretch_prob : float
@@ -212,12 +233,6 @@ def spatial_transform(
         Minimum scale factor
     max_scale : float
         Maximum scale factor
-    flip : bool
-        Whether to apply the flip transform
-    h_flip_prob : float
-        Probability of applying the horizontal flip transform
-    v_flip_prob : float
-        Probability of applying the vertical flip transform
 
     Returns
     -------
@@ -228,9 +243,10 @@ def spatial_transform(
     flow : numpy.ndarray
         Augmented flow field
     """
+    if not enabled:
+        return img1, img2, flow
 
     H, W = img1.shape[:2]
-    min_scale = np.maximum((crop_size[0] + 8) / float(H), (crop_size[1] + 8) / float(W))
 
     scale = 2 ** np.random.uniform(min_scale, max_scale)
     scale_x = scale
@@ -240,6 +256,8 @@ def spatial_transform(
         scale_x *= 2 ** np.random.uniform(-max_stretch, max_stretch)
         scale_y *= 2 ** np.random.uniform(-max_stretch, max_stretch)
 
+    min_scale = np.maximum((crop_size[0] + 8) / float(H), (crop_size[1] + 8) / float(W))
+
     scale_x = np.clip(scale_x, min_scale, None)
     scale_y = np.clip(scale_y, min_scale, None)
 
@@ -256,16 +274,50 @@ def spatial_transform(
         )
         flow = flow * [scale_x, scale_y]
 
-    if flip:
-        if np.random.rand() < h_flip_prob:
-            img1 = img1[:, ::-1]
-            img2 = img2[:, ::-1]
-            flow = flow[:, ::-1] * [-1.0, 1.0]
+    return img1, img2, flow
+
 
-        if np.random.rand() < v_flip_prob:
-            img1 = img1[::-1, :]
-            img2 = img2[::-1, :]
-            flow = flow[::-1, :] * [1.0, -1.0]
+def flip_transform(img1, img2, flow, enabled=False, h_flip_prob=0.5, v_flip_prob=0.1):
+    """
+    Flip augmentation borrowed from RAFT https://github.com/princeton-vl/RAFT/blob/master/core/utils/augmentor.py
+
+    Parameters
+    -----------
+    img1 : PIL Image or numpy.ndarray
+        First of the pair of images
+    img2 : PIL Image or numpy.ndarray
+        Second of the pair of images
+    flow : numpy.ndarray
+        Flow field
+    enabled : bool, default: False
+        If True, applies flip transform
+    h_flip_prob : float, default=0.5
+        Probability of applying the horizontal flip transform
+    v_flip_prob : float, default=0.1
+        Probability of applying the vertical flip transform
+
+    Returns
+    -------
+    img1 : PIL Image or numpy.ndarray
+        Flipped image 1
+    img2 : PIL Image or numpy.ndarray
+        Flipped image 2
+    flow : numpy.ndarray
+        Flipped flow field
+    """
+
+    if not enabled:
+        return img1, img2, flow
+
+    if np.random.rand() < h_flip_prob:
+        img1 = img1[:, ::-1]
+        img2 = img2[:, ::-1]
+        flow = flow[:, ::-1] * [-1.0, 1.0]
+
+    if np.random.rand() < v_flip_prob:
+        img1 = img1[::-1, :]
+        img2 = img2[::-1, :]
+        flow = flow[::-1, :] * [1.0, -1.0]
 
     return img1, img2, flow
 
@@ -332,6 +384,7 @@ def sparse_spatial_transform(
     flow,
     valid,
     crop_size,
+    enabled=False,
     aug_prob=0.8,
     min_scale=-0.2,
     max_scale=0.5,
@@ -377,6 +430,9 @@ def sparse_spatial_transform(
     valid : numpy.ndarray
         Valid flow field
     """
+    if not enabled:
+        return img1, img2, flow, valid
+
     H, W = img1.shape[:2]
     min_scale = np.maximum((crop_size[0] + 1) / float(H), (crop_size[1] + 1) / float(W))
 
@@ -404,74 +460,40 @@ def sparse_spatial_transform(
     return img1, img2, flow, valid
 
 
-def translate_transform(
-    img1,
-    img2,
-    flow,
-    aug_prob=0.8,
-    translate=10,
-):
+class Normalize:
     """
-    Translation augmentation.
+    A class to return Normalized Image.
 
     Parameters
     -----------
-    img1 : PIL Image or numpy.ndarray
-        First of the pair of images
-    img2 : PIL Image or numpy.ndarray
-        Second of the pair of images
-    flow : numpy.ndarray
-        Flow field
-    aug_prob : float
-        Probability of applying the augmentation
-    translate : int
-        Pixels by which image will be translated
-
-    Returns
-    -------
-    img1 : PIL Image or numpy.ndarray
-        Augmented image 1
-    img2 : PIL Image or numpy.ndarray
-        Augmented image 2
-    flow : numpy.ndarray
-        Augmented flow field
+    use : boolean
+        Whether to normalize image or not
+    mean : list
+        The list of mean values to be substracted from each image channel
+    std : list
+        The list of std values with which to divide each image channel by
     """
-    H, W = img1.shape[:2]
-
-    max_t_x = translate
-    max_t_y = translate
-
-    t_x = np.random.randint(-1 * max_t_x, max_t_x)
-    t_y = np.random.randint(-1 * max_t_y, max_t_y)
-
-    if t_x == 0 and t_y == 0:
-        return img1, img2, flow
-
-    if np.random.rand() < aug_prob:
 
-        x1, x2, x3, x4 = max(0, t_x), min(W + t_x, W), max(0, -t_x), min(W - t_x, W)
-        y1, y2, y3, y4 = max(0, t_y), min(H + t_y, H), max(0, -t_y), min(H - t_y, H)
-
-        img1 = img1[y1:y2, x1:x2]
-        img2 = img2[y3:y4, x3:x4]
-        flow = flow[y1:y2, x1:x2]
-        flow[:, :, 0] += t_x
-        flow[:, :, 1] += t_y
+    def __init__(self, use=False, mean=[0, 0, 0], std=[255.0, 255.0, 255.0]):
+        self.use = use
+        self.mean = mean
+        self.std = std
+        self.normalize = transforms.Compose(
+            [
+                transforms.Normalize(mean=self.mean, std=self.std),
+            ]
+        )
 
-    return img1, img2, flow
+    def __call__(self, img1, img2):
+        if self.use:
+            return self.normalize(img1), self.normalize(img2)
+        return img1, img2
 
 
-def rotate_transform(
-    img1,
-    img2,
-    flow,
-    aug_prob=0.8,
-    degrees=10,
-    delta=0,
-):
+def noise_transform(img1, img2, enabled=False, aug_prob=0.5, noise_std_range=0.06):
     """
-    Rotation augmentation.
-    (Referenced from Clement Picard)
+    Applies random noise augmentation from a gaussian distribution borrowed from VCN:
+    https://github.com/gengshan-y/VCN/blob/master/dataloader/flow_transforms.py
 
     Parameters
     -----------
@@ -479,14 +501,12 @@ def rotate_transform(
         First of the pair of images
     img2 : PIL Image or numpy.ndarray
         Second of the pair of images
-    flow : numpy.ndarray
-        Flow field
+    enabled : bool, default: False
+        If True, applies noise transform
     aug_prob : float
         Probability of applying the augmentation
-    degrees : int
-        Angle by which image is to rotated
-    delta: int
-        Assigns angle range of degrees-delta to degrees+delta
+    noise_std_range : float
+        Standard deviation of the noise
 
     Returns
     -------
@@ -494,79 +514,363 @@ def rotate_transform(
         Augmented image 1
     img2 : PIL Image or numpy.ndarray
         Augmented image 2
-    flow : numpy.ndarray
-        Augmented flow field
     """
 
-    angle = np.random.uniform(-degrees, degrees)
-    diff = np.random.uniform(-delta, delta)
-    angle1 = angle - diff / 2
-    angle2 = angle + diff / 2
-    angle1_rad = angle1 * np.pi / 180
-    diff_rad = diff * np.pi / 180
-
-    H, W = img1.shape[:2]
-
-    warped_coords = np.mgrid[:W, :H].T + flow
-    warped_coords -= np.array([W / 2, H / 2])
-
-    warped_coords_rot = np.zeros_like(flow)
-
-    warped_coords_rot[..., 0] = (np.cos(diff_rad) - 1) * warped_coords[..., 0] + np.sin(
-        diff_rad
-    ) * warped_coords[..., 1]
-
-    warped_coords_rot[..., 1] = (
-        -np.sin(diff_rad) * warped_coords[..., 0]
-        + (np.cos(diff_rad) - 1) * warped_coords[..., 1]
-    )
+    if not enabled:
+        return img1, img2
 
     if np.random.rand() < aug_prob:
+        noise = np.random.uniform(0, noise_std_range * 255.0)
 
-        flow += warped_coords_rot
+        img1 = img1.astype(np.float64)
+        img2 = img2.astype(np.float64)
 
-        img1 = ndimage.interpolation.rotate(img1, angle1, reshape=False, order=2)
-        img2 = ndimage.interpolation.rotate(img2, angle2, reshape=False, order=2)
-        flow = ndimage.interpolation.rotate(flow, angle1, reshape=False, order=2)
+        img1 += np.random.normal(0, noise, img1.shape)
+        img2 += np.random.normal(0, noise, img2.shape)
 
-        target_ = np.copy(flow)
-        flow[:, :, 0] = (
-            np.cos(angle1_rad) * target_[:, :, 0]
-            + np.sin(angle1_rad) * target_[:, :, 1]
-        )
-        flow[:, :, 1] = (
-            -np.sin(angle1_rad) * target_[:, :, 0]
-            + np.cos(angle1_rad) * target_[:, :, 1]
-        )
+        img1 = np.clip(img1, 0.0, 255.0)
+        img2 = np.clip(img2, 0.0, 255.0)
 
-    return img1, img2, flow
+    return img1, img2
 
 
-class Normalize:
+class AdvancedSpatialTransform(object):
     """
-    A class to return Normalized Image.
+    Advanced set of spatial transformations borrowed from:
+
+    1. VCN: https://github.com/gengshan-y/VCN/blob/master/dataloader/flow_transforms.py
+    2. Autoflow: https://github.com/google-research/opticalflow-autoflow/blob/main/src/dataset_lib/augmentations/spatial_aug.py
+
+    This set of augmentations include random scaling, stretch, rotation, translation and out-of-boundary cropping.
 
     Parameters
     -----------
-    use : boolean
-        Whether to normalize image or not
-    mean : list
-        The list of mean values to be substracted from each image channel
-    std : list
-        The list of std values with which to divide each image channel by
+    crop_size : :obj:`list` of :obj:`int`
+        Size of the crop
+    enabled : bool, default: False
+        If True, applies flip transform
+    scale1 : float, default : 0.3
+        Scale factor 1
+    scale1 : float, default : 0.1
+        Scale factor 2
+    rotate : float, default : 0.4
+        Rotate factor
+    translate : float, default : 0.4
+        Translate factor
+    stretch : float, default : 0.3
+        Stretch factor
+    h_flip_prob : float, default=0.5
+        Probability of applying the horizontal flip transform
+
+    Returns
+    -------
+    img1 : PIL Image or numpy.ndarray
+        Flipped image 1
+    img2 : PIL Image or numpy.ndarray
+        Flipped image 2
+    flow : numpy.ndarray
+        Flipped flow field
     """
 
-    def __init__(self, use=False, mean=[0, 0, 0], std=[255.0, 255.0, 255.0]):
-        self.use = use
-        self.mean = mean
-        self.std = std
-        self.normalize = transforms.Compose(
+    def __init__(
+        self,
+        crop,
+        enabled=False,
+        scale1=0.3,
+        scale2=0.1,
+        rotate=0.4,
+        translate=0.4,
+        stretch=0.3,
+        h_flip_prob=0.5,
+        schedule_coeff=1,
+        order=1,
+        enable_out_of_boundary_crop=False,
+    ):
+        self.enabled = enabled
+        self.crop = crop
+        self.scale = [scale1, 0.03, scale2]
+        self.rot = [rotate, 0.03] if rotate != 0 else None
+        self.trans = [translate, 0.03] if translate != 0 else None
+        self.squeeze = [stretch, 0.0] if stretch != 0 else None
+        self.h_flip_prob = h_flip_prob
+        self.t = np.zeros(6)
+        self.schedule_coeff = schedule_coeff
+        self.order = order
+        self.black = enable_out_of_boundary_crop
+
+    def to_identity(self):
+        self.t[0] = 1
+        self.t[2] = 0
+        self.t[4] = 0
+        self.t[1] = 0
+        self.t[3] = 1
+        self.t[5] = 0
+
+    def left_multiply(self, u0, u1, u2, u3, u4, u5):
+        result = np.zeros(6)
+        result[0] = self.t[0] * u0 + self.t[1] * u2
+        result[1] = self.t[0] * u1 + self.t[1] * u3
+
+        result[2] = self.t[2] * u0 + self.t[3] * u2
+        result[3] = self.t[2] * u1 + self.t[3] * u3
+
+        result[4] = self.t[4] * u0 + self.t[5] * u2 + u4
+        result[5] = self.t[4] * u1 + self.t[5] * u3 + u5
+        self.t = result
+
+    def inverse(self):
+        result = np.zeros(6)
+        a = self.t[0]
+        c = self.t[2]
+        e = self.t[4]
+        b = self.t[1]
+        d = self.t[3]
+        f = self.t[5]
+
+        denom = a * d - b * c
+
+        result[0] = d / denom
+        result[1] = -b / denom
+        result[2] = -c / denom
+        result[3] = a / denom
+        result[4] = (c * f - d * e) / denom
+        result[5] = (b * e - a * f) / denom
+
+        return result
+
+    def grid_transform(self, meshgrid, t, normalize=True, gridsize=None):
+        if gridsize is None:
+            h, w = meshgrid[0].shape
+        else:
+            h, w = gridsize
+        vgrid = torch.cat(
             [
-                transforms.Normalize(mean=self.mean, std=self.std),
-            ]
+                (meshgrid[0] * t[0] + meshgrid[1] * t[2] + t[4])[:, :, np.newaxis],
+                (meshgrid[0] * t[1] + meshgrid[1] * t[3] + t[5])[:, :, np.newaxis],
+            ],
+            -1,
         )
+        if normalize:
+            vgrid[:, :, 0] = 2.0 * vgrid[:, :, 0] / max(w - 1, 1) - 1.0
+            vgrid[:, :, 1] = 2.0 * vgrid[:, :, 1] / max(h - 1, 1) - 1.0
+        return vgrid
+
+    def __call__(self, img1, img2, target):
+        """
+        Parameters
+        -----------
+        img1 : PIL Image or numpy.ndarray
+            First of the pair of images
+        img2 : PIL Image or numpy.ndarray
+            Second of the pair of images
+        target : numpy.ndarray
+            Flow field
 
-    def __call__(self, img1, img2):
-        if self.use:
-            return self.normalize(img1), self.normalize(img2)
-        return img1, img2
+        Returns
+        -------
+        img1 : PIL Image or numpy.ndarray
+            Flipped image 1
+        img2 : PIL Image or numpy.ndarray
+            Flipped image 2
+        flow : numpy.ndarray
+            Flipped flow field
+        """
+        if not self.enabled:
+            return img1, img2, target
+
+        inputs = [img1, img2]
+        h, w, _ = inputs[0].shape
+        th, tw = self.crop
+        meshgrid = torch.meshgrid([torch.Tensor(range(th)), torch.Tensor(range(tw))])[
+            ::-1
+        ]
+        cornergrid = torch.meshgrid(
+            [torch.Tensor([0, th - 1]), torch.Tensor([0, tw - 1])]
+        )[::-1]
+
+        for i in range(50):
+            # im0
+            self.to_identity()
+
+            if np.random.binomial(1, self.h_flip_prob):
+                mirror = True
+            else:
+                mirror = False
+
+            if mirror:
+                self.left_multiply(-1, 0, 0, 1, 0.5 * tw, -0.5 * th)
+            else:
+                self.left_multiply(1, 0, 0, 1, -0.5 * tw, -0.5 * th)
+            scale0 = 1
+            scale1 = 1
+            squeeze0 = 1
+            squeeze1 = 1
+            if not self.rot is None:
+                rot0 = np.random.uniform(-self.rot[0], +self.rot[0])
+                rot1 = (
+                    np.random.uniform(
+                        -self.rot[1] * self.schedule_coeff,
+                        self.rot[1] * self.schedule_coeff,
+                    )
+                    + rot0
+                )
+                self.left_multiply(
+                    np.cos(rot0), np.sin(rot0), -np.sin(rot0), np.cos(rot0), 0, 0
+                )
+            if not self.trans is None:
+                trans0 = np.random.uniform(-self.trans[0], +self.trans[0], 2)
+                trans1 = (
+                    np.random.uniform(
+                        -self.trans[1] * self.schedule_coeff,
+                        +self.trans[1] * self.schedule_coeff,
+                        2,
+                    )
+                    + trans0
+                )
+                self.left_multiply(1, 0, 0, 1, trans0[0] * tw, trans0[1] * th)
+            if not self.squeeze is None:
+                squeeze0 = np.exp(np.random.uniform(-self.squeeze[0], self.squeeze[0]))
+                squeeze1 = (
+                    np.exp(
+                        np.random.uniform(
+                            -self.squeeze[1] * self.schedule_coeff,
+                            self.squeeze[1] * self.schedule_coeff,
+                        )
+                    )
+                    * squeeze0
+                )
+            if not self.scale is None:
+                scale0 = np.exp(
+                    np.random.uniform(
+                        self.scale[2] - self.scale[0], self.scale[2] + self.scale[0]
+                    )
+                )
+                scale1 = (
+                    np.exp(
+                        np.random.uniform(
+                            -self.scale[1] * self.schedule_coeff,
+                            self.scale[1] * self.schedule_coeff,
+                        )
+                    )
+                    * scale0
+                )
+            self.left_multiply(
+                1.0 / (scale0 * squeeze0), 0, 0, 1.0 / (scale0 / squeeze0), 0, 0
+            )
+
+            self.left_multiply(1, 0, 0, 1, 0.5 * w, 0.5 * h)
+            transmat0 = self.t.copy()
+
+            # im1
+            self.to_identity()
+            if mirror:
+                self.left_multiply(-1, 0, 0, 1, 0.5 * tw, -0.5 * th)
+            else:
+                self.left_multiply(1, 0, 0, 1, -0.5 * tw, -0.5 * th)
+            if not self.rot is None:
+                self.left_multiply(
+                    np.cos(rot1), np.sin(rot1), -np.sin(rot1), np.cos(rot1), 0, 0
+                )
+            if not self.trans is None:
+                self.left_multiply(1, 0, 0, 1, trans1[0] * tw, trans1[1] * th)
+            self.left_multiply(
+                1.0 / (scale1 * squeeze1), 0, 0, 1.0 / (scale1 / squeeze1), 0, 0
+            )
+            self.left_multiply(1, 0, 0, 1, 0.5 * w, 0.5 * h)
+            transmat1 = self.t.copy()
+            transmat1_inv = self.inverse()
+
+            if self.black:
+                # black augmentation, allowing 0 values in the input images
+                # https://github.com/lmb-freiburg/flownet2/blob/master/src/caffe/layers/black_augmentation_layer.cu
+                break
+            else:
+                if (
+                    (
+                        self.grid_transform(
+                            cornergrid, transmat0, gridsize=[float(h), float(w)]
+                        ).abs()
+                        > 1
+                    ).sum()
+                    + (
+                        self.grid_transform(
+                            cornergrid, transmat1, gridsize=[float(h), float(w)]
+                        ).abs()
+                        > 1
+                    ).sum()
+                ) == 0:
+                    break
+        if i == 49:
+            # print("max_iter in augmentation")
+            self.to_identity()
+            self.left_multiply(1, 0, 0, 1, -0.5 * tw, -0.5 * th)
+            self.left_multiply(1, 0, 0, 1, 0.5 * w, 0.5 * h)
+            transmat0 = self.t.copy()
+            transmat1 = self.t.copy()
+
+        # do the real work
+        vgrid = self.grid_transform(meshgrid, transmat0, gridsize=[float(h), float(w)])
+        inputs_0 = F.grid_sample(
+            torch.Tensor(inputs[0]).permute(2, 0, 1)[np.newaxis], vgrid[np.newaxis]
+        )[0].permute(1, 2, 0)
+        if self.order == 0:
+            target_0 = F.grid_sample(
+                torch.Tensor(target).permute(2, 0, 1)[np.newaxis],
+                vgrid[np.newaxis],
+                mode="nearest",
+            )[0].permute(1, 2, 0)
+        else:
+            target_0 = F.grid_sample(
+                torch.Tensor(target).permute(2, 0, 1)[np.newaxis], vgrid[np.newaxis]
+            )[0].permute(1, 2, 0)
+
+        mask_0 = target[:, :, 2:3].copy()
+        mask_0[mask_0 == 0] = np.nan
+        if self.order == 0:
+            mask_0 = F.grid_sample(
+                torch.Tensor(mask_0).permute(2, 0, 1)[np.newaxis],
+                vgrid[np.newaxis],
+                mode="nearest",
+            )[0].permute(1, 2, 0)
+        else:
+            mask_0 = F.grid_sample(
+                torch.Tensor(mask_0).permute(2, 0, 1)[np.newaxis], vgrid[np.newaxis]
+            )[0].permute(1, 2, 0)
+        mask_0[torch.isnan(mask_0)] = 0
+
+        vgrid = self.grid_transform(meshgrid, transmat1, gridsize=[float(h), float(w)])
+        inputs_1 = F.grid_sample(
+            torch.Tensor(inputs[1]).permute(2, 0, 1)[np.newaxis], vgrid[np.newaxis]
+        )[0].permute(1, 2, 0)
+
+        # flow
+        pos = target_0[:, :, :2] + self.grid_transform(
+            meshgrid, transmat0, normalize=False
+        )
+        pos = self.grid_transform(pos.permute(2, 0, 1), transmat1_inv, normalize=False)
+        if target_0.shape[2] >= 4:
+            # scale
+            exp = target_0[:, :, 3:] * scale1 / scale0
+            target = torch.cat(
+                [
+                    (pos[:, :, 0] - meshgrid[0]).unsqueeze(-1),
+                    (pos[:, :, 1] - meshgrid[1]).unsqueeze(-1),
+                    mask_0,
+                    exp,
+                ],
+                -1,
+            )
+        else:
+            target = torch.cat(
+                [
+                    (pos[:, :, 0] - meshgrid[0]).unsqueeze(-1),
+                    (pos[:, :, 1] - meshgrid[1]).unsqueeze(-1),
+                    mask_0,
+                ],
+                -1,
+            )
+        #                               target_0[:,:,2].unsqueeze(-1) ], -1)
+        inputs = [np.asarray(inputs_0), np.asarray(inputs_1)]
+        target = np.asarray(target)
+
+        return inputs[0], inputs[1], target
diff --git a/ezflow/models/dicl.py b/ezflow/models/dicl.py
index 5a48c96e..418f6840 100644
--- a/ezflow/models/dicl.py
+++ b/ezflow/models/dicl.py
@@ -250,8 +250,9 @@ def forward(self, img1, img2):
 
         Returns
         -------
-        torch.Tensor
-            Flow from img1 to img2
+        :class:`dict`
+            <flow_preds> torch.Tensor : intermediate flow predications from img1 to img2
+            <flow_upsampled> torch.Tensor : if model is in eval state, return upsampled flow
         """
 
         _, x2, x3, x4, x5, x6 = self.feature_net(img1)
@@ -313,10 +314,10 @@ def forward(self, img1, img2):
             self.scale_contexts[0],
         )
 
+        output = {"flow_preds": [flow2, flow3, flow4, flow5, flow6]}
         if self.training:
-
             if self.cfg.SUP_RAW_FLOW:
-                return (
+                output["flow_preds"] = [
                     flow2,
                     raw_flow2,
                     flow3,
@@ -327,12 +328,14 @@ def forward(self, img1, img2):
                     raw_flow5,
                     flow6,
                     raw_flow6,
-                )
+                ]
 
-            return (flow2, flow3, flow4, flow5, flow6)
+            return output
 
-        else:
-            _, _, height, width = img1.size()
-            return F.interpolate(
-                flow2, (height, width), mode="bilinear", align_corners=True
-            )
+        _, _, height, width = img1.size()
+        flow_up = F.interpolate(
+            flow2, (height, width), mode="bilinear", align_corners=True
+        )
+
+        output["flow_upsampled"] = flow_up
+        return output
diff --git a/ezflow/models/flownet_c.py b/ezflow/models/flownet_c.py
index dcfa9c5c..d8252ee6 100644
--- a/ezflow/models/flownet_c.py
+++ b/ezflow/models/flownet_c.py
@@ -6,7 +6,7 @@
 from ..decoder import build_decoder
 from ..encoder import BasicConvEncoder, build_encoder
 from ..modules import BaseModule, conv
-from ..similarity import CorrelationLayer
+from ..similarity import IterSpatialCorrelationSampler as SpatialCorrelationSampler
 from .build import MODEL_REGISTRY
 
 
@@ -32,10 +32,13 @@ def __init__(self, cfg):
 
         self.feature_encoder = build_encoder(cfg.ENCODER)
 
-        self.correlation_layer = CorrelationLayer(
-            pad_size=cfg.SIMILARITY.PAD_SIZE,
-            max_displacement=cfg.SIMILARITY.MAX_DISPLACEMENT,
+        self.correlation_layer = SpatialCorrelationSampler(
+            kernel_size=1,
+            patch_size=2 * cfg.SIMILARITY.MAX_DISPLACEMENT + 1,
+            padding=cfg.SIMILARITY.PAD_SIZE,
+            dilation_patch=2,
         )
+
         self.corr_activation = nn.LeakyReLU(negative_slope=0.1, inplace=True)
 
         self.conv_redirect = conv(
@@ -70,8 +73,9 @@ def forward(self, img1, img2):
 
         Returns
         -------
-        torch.Tensor
-            Flow from img1 to img2
+        :class:`dict`
+            <flow_preds> torch.Tensor : intermediate flow predications from img1 to img2
+            <flow_upsampled> torch.Tensor : if model is in eval state, return upsampled flow
         """
 
         H, W = img1.shape[-2:]
@@ -80,6 +84,9 @@ def forward(self, img1, img2):
         conv_outputs2 = self.feature_encoder(img2)
 
         corr_output = self.correlation_layer(conv_outputs1[-1], conv_outputs2[-1])
+        corr_output = corr_output.view(
+            corr_output.shape[0], -1, corr_output.shape[3], corr_output.shape[4]
+        )
         corr_output = self.corr_activation(corr_output)
 
         # Redirect final feature output of img1
@@ -93,22 +100,18 @@ def forward(self, img1, img2):
         conv_outputs = [conv_outputs1[0], conv_outputs1[1]] + conv_outputs
 
         flow_preds = self.decoder(conv_outputs)
-        flow_preds.reverse()
+
+        output = {"flow_preds": flow_preds}
 
         if self.training:
-            return flow_preds
+            return output
 
-        else:
+        flow_up = flow_preds[-1]
 
-            flow = flow_preds[0]
+        flow_up = F.interpolate(
+            flow_up, size=(H, W), mode="bilinear", align_corners=False
+        )
 
-            if self.cfg.INTERPOLATE_FLOW:
-                H_, W_ = flow.shape[-2:]
-                flow = F.interpolate(
-                    flow, img1.shape[-2:], mode="bilinear", align_corners=True
-                )
-                flow_u = flow[:, 0, :, :] * (W / W_)
-                flow_v = flow[:, 1, :, :] * (H / H_)
-                flow = torch.stack([flow_u, flow_v], dim=1)
+        output["flow_upsampled"] = flow_up
 
-            return flow
+        return output
diff --git a/ezflow/models/flownet_s.py b/ezflow/models/flownet_s.py
index b0208be5..50418b3a 100644
--- a/ezflow/models/flownet_s.py
+++ b/ezflow/models/flownet_s.py
@@ -52,8 +52,9 @@ def forward(self, img1, img2):
 
         Returns
         -------
-        torch.Tensor
-            Flow from img1 to img2
+        :class:`dict`
+            <flow_preds> torch.Tensor : intermediate flow predications from img1 to img2
+            <flow_upsampled> torch.Tensor : if model is in eval state, return upsampled flow
         """
 
         H, W = img1.shape[-2:]
@@ -65,20 +66,20 @@ def forward(self, img1, img2):
         flow_preds = self.decoder(conv_outputs)
         flow_preds.reverse()
 
-        if self.training:
-            return flow_preds
+        output = {"flow_preds": flow_preds}
 
-        else:
+        if self.training:
+            return output
 
-            flow = flow_preds[0]
+        flow = flow_preds[0]
 
-            if self.cfg.INTERPOLATE_FLOW:
-                H_, W_ = flow.shape[-2:]
-                flow = F.interpolate(
-                    flow, img1.shape[-2:], mode="bilinear", align_corners=True
-                )
-                flow_u = flow[:, 0, :, :] * (W / W_)
-                flow_v = flow[:, 1, :, :] * (H / H_)
-                flow = torch.stack([flow_u, flow_v], dim=1)
+        H_, W_ = flow.shape[-2:]
+        flow = F.interpolate(
+            flow, img1.shape[-2:], mode="bilinear", align_corners=False
+        )
+        flow_u = flow[:, 0, :, :] * (W / W_)
+        flow_v = flow[:, 1, :, :] * (H / H_)
+        flow = torch.stack([flow_u, flow_v], dim=1)
 
-            return flow
+        output["flow_upsampled"] = flow
+        return output
diff --git a/ezflow/models/predictor.py b/ezflow/models/predictor.py
index f073aba1..aed5738a 100644
--- a/ezflow/models/predictor.py
+++ b/ezflow/models/predictor.py
@@ -105,8 +105,7 @@ def __call__(self, img1, img2):
         padder = InputPadder(img1.shape, divisor=self.pad_divisor)
         img1, img2 = padder.pad(img1, img2)
 
-        flow_pred = self.model(img1, img2)
-        flow_pred = padder.unpad(flow_pred)
+        output = self.model(img1, img2)
+        flow_pred = padder.unpad(output["flow_upsampled"])
         flow_pred = flow_pred * self.flow_scale
-
         return flow_pred
diff --git a/ezflow/models/pwcnet.py b/ezflow/models/pwcnet.py
index 27d6b8b5..975289ea 100644
--- a/ezflow/models/pwcnet.py
+++ b/ezflow/models/pwcnet.py
@@ -2,11 +2,9 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
-from ..decoder import ConvDecoder
+from ..decoder import ContextNetwork, build_decoder
 from ..encoder import build_encoder
-from ..modules import BaseModule, conv, deconv
-from ..similarity import CorrelationLayer
-from ..utils import warp
+from ..modules import BaseModule
 from .build import MODEL_REGISTRY
 
 
@@ -27,105 +25,17 @@ def __init__(self, cfg):
 
         self.cfg = cfg
         self.encoder = build_encoder(cfg.ENCODER)
-        self.correlation_layer = CorrelationLayer(
-            pad_size=cfg.SIMILARITY.PAD_SIZE,
-            max_displacement=cfg.SIMILARITY.MAX_DISPLACEMENT,
-        )
 
-        search_range = (2 * cfg.SIMILARITY.MAX_DISPLACEMENT + 1) ** 2
-
-        self.decoder_layers = nn.ModuleList()
-        decoder_cfg = cfg.DECODER.CONFIG
-
-        self.up_feature_layers = nn.ModuleList()
-
-        for i in range(len(decoder_cfg)):
-
-            if i == 0:
-                concat_channels = search_range
-            else:
-                concat_channels = (
-                    search_range + decoder_cfg[i] + cfg.SIMILARITY.MAX_DISPLACEMENT
-                )
-
-            self.decoder_layers.append(
-                ConvDecoder(
-                    config=decoder_cfg,
-                    to_flow=True,
-                    concat_channels=concat_channels,
-                )
-            )
-
-            self.up_feature_layers.append(
-                deconv(
-                    concat_channels + sum(decoder_cfg),
-                    2,
-                    kernel_size=4,
-                    stride=2,
-                    padding=1,
-                )
-            )
-
-        self.deconv_layers = nn.ModuleList()
-        for i in range(len(decoder_cfg)):
-            self.deconv_layers.append(deconv(2, 2, kernel_size=4, stride=2, padding=1))
-
-        self.dc_conv = nn.ModuleList(
-            [
-                conv(
-                    search_range
-                    + cfg.SIMILARITY.MAX_DISPLACEMENT
-                    + decoder_cfg[-1]
-                    + sum(decoder_cfg),
-                    128,
-                    kernel_size=3,
-                    stride=1,
-                    padding=1,
-                    dilation=1,
-                ),
-            ]
-        )
-        self.dc_conv.append(
-            conv(
-                decoder_cfg[0],
-                decoder_cfg[0],
-                kernel_size=3,
-                stride=1,
-                padding=2,
-                dilation=2,
-            )
-        )
+        self.decoder = build_decoder(cfg.DECODER)
 
-        padding = 4
-        dilation = 4
-        for i in range(len(decoder_cfg) - 2):
-            self.dc_conv.append(
-                conv(
-                    decoder_cfg[i],
-                    decoder_cfg[i + 1],
-                    kernel_size=3,
-                    stride=1,
-                    padding=padding,
-                    dilation=dilation,
-                )
-            )
-            padding *= 2
-            dilation *= 2
-
-        self.dc_conv.append(
-            conv(
-                decoder_cfg[3],
-                decoder_cfg[4],
-                kernel_size=3,
-                stride=1,
-                padding=1,
-                dilation=1,
-            )
+        search_range = (2 * cfg.DECODER.SIMILARITY.MAX_DISPLACEMENT + 1) ** 2
+        self.context_net = ContextNetwork(
+            in_channels=search_range
+            + cfg.DECODER.SIMILARITY.MAX_DISPLACEMENT
+            + cfg.DECODER.CONFIG[-1]
+            + sum(cfg.DECODER.CONFIG),
+            config=cfg.DECODER.CONFIG,
         )
-        self.dc_conv.append(
-            nn.Conv2d(32, 2, kernel_size=3, stride=1, padding=1, bias=True)
-        )
-        self.dc_conv = nn.Sequential(*self.dc_conv)
 
         self._init_weights()
 
@@ -137,12 +47,6 @@ def _init_weights(self):
                 if m.bias is not None:
                     m.bias.data.zero_()
 
-    def _corr_relu(self, features1, features2):
-
-        corr = self.correlation_layer(features1, features2)
-
-        return F.leaky_relu(corr, negative_slope=0.1)
-
     def forward(self, img1, img2):
         """
         Performs forward pass of the network
@@ -156,8 +60,9 @@ def forward(self, img1, img2):
 
         Returns
         -------
-        torch.Tensor
-            Flow from img1 to img2
+        :class:`dict`
+            <flow_preds> torch.Tensor : intermediate flow predications from img1 to img2
+            <flow_upsampled> torch.Tensor : if model is in eval state, return upsampled flow
         """
 
         H, W = img1.shape[-2:]
@@ -165,55 +70,24 @@ def forward(self, img1, img2):
         feature_pyramid1 = self.encoder(img1)
         feature_pyramid2 = self.encoder(img2)
 
-        up_flow, up_features = None, None
-        up_flow_scale = 0.625
-
-        flow_preds = []
-
-        for i in range(len(self.decoder_layers)):
+        feature_pyramid1.reverse()
+        feature_pyramid2.reverse()
 
-            if i == 0:
-                corr = self._corr_relu(feature_pyramid1[i], feature_pyramid2[i])
-                concatenated_features = corr
+        flow_preds, features = self.decoder(feature_pyramid1, feature_pyramid2)
 
-            else:
+        flow_preds[-1] += self.context_net(features)
 
-                warped_features = warp(feature_pyramid2[i], up_flow * up_flow_scale)
-                up_flow_scale *= 2
-
-                corr = self._corr_relu(feature_pyramid1[i], warped_features)
-
-                concatenated_features = torch.cat(
-                    [corr, feature_pyramid1[i], up_flow, up_features], dim=1
-                )
-
-            flow, features = self.decoder_layers[i](concatenated_features)
-            flow_preds.append(flow)
-
-            up_flow = self.deconv_layers[i](flow)
-            up_features = self.up_feature_layers[i](features)
-
-        flow_preds.reverse()
-        flow_preds[0] += self.dc_conv(features)
+        output = {"flow_preds": flow_preds}
 
         if self.training:
-            return flow_preds
-
-        else:
+            return output
 
-            flow = flow_preds[0]
+        flow_up = flow_preds[-1]
 
-            if self.cfg.INTERPOLATE_FLOW:
-
-                H_, W_ = flow.shape[-2:]
-                flow = F.interpolate(
-                    flow, img1.shape[-2:], mode="bilinear", align_corners=True
-                )
-                flow_u = flow[:, 0, :, :] * (W / W_)
-                flow_v = flow[:, 1, :, :] * (H / H_)
-                flow = torch.stack([flow_u, flow_v], dim=1)
+        flow_up = F.interpolate(
+            flow_up, size=(H, W), mode="bilinear", align_corners=False
+        )
 
-            if self.cfg.FLOW_SCALE_FACTOR is not None:
-                flow *= self.cfg.FLOW_SCALE_FACTOR
+        output["flow_upsampled"] = flow_up
 
-            return flow
+        return output
diff --git a/ezflow/models/raft.py b/ezflow/models/raft.py
index 5efd19e1..240cfa75 100644
--- a/ezflow/models/raft.py
+++ b/ezflow/models/raft.py
@@ -80,13 +80,7 @@ def _upsample_flow(self, flow, mask):
 
         return up_flow.reshape(N, 2, 8 * H, 8 * W)
 
-    def forward(
-        self,
-        img1,
-        img2,
-        flow_init=None,
-        only_flow=True,
-    ):
+    def forward(self, img1, img2, flow_init=None):
         """
         Performs forward pass of the network
 
@@ -99,11 +93,10 @@ def forward(
 
         Returns
         -------
-        torch.Tensor
-            Flow from img1 to img2
+        :class:`dict`
+            <flow_preds> torch.Tensor : intermediate flow predications from img1 to img2
+            <flow_upsampled> torch.Tensor : if model is in eval state, return upsampled flow
         """
-        img1 = 2 * (img1 / 255.0) - 1.0
-        img2 = 2 * (img2 / 255.0) - 1.0
 
         img1 = img1.contiguous()
         img2 = img2.contiguous()
@@ -149,11 +142,10 @@ def forward(
 
             flow_predictions.append(flow_up)
 
-        if not self.training:
+        output = {"flow_preds": flow_predictions}
 
-            if only_flow:
-                return flow_up
+        if self.training:
+            return output
 
-            return coords1 - coords0, flow_up
-
-        return flow_predictions
+        output["flow_upsampled"] = flow_up
+        return output
diff --git a/ezflow/models/vcn.py b/ezflow/models/vcn.py
index 70aa2b5c..c3e4086f 100644
--- a/ezflow/models/vcn.py
+++ b/ezflow/models/vcn.py
@@ -315,12 +315,14 @@ def forward(self, img1, img2):
                 flow_preds[i],
                 [img1.shape[2], img1.shape[3]],
                 mode="bilinear",
-                align_corners=True,
+                align_corners=False,
             )
             flow_preds[i] = flow_preds[i] * scale
             scale *= 2
 
+        output = {"flow_preds": flow_preds}
         if self.training:
-            return flow_preds
+            return output
 
-        return flow_preds[0]
+        output["flow_upsampled"] = flow_preds[0]
+        return output
diff --git a/ezflow/modules/blocks.py b/ezflow/modules/blocks.py
index c5b79a57..bbfcf569 100644
--- a/ezflow/modules/blocks.py
+++ b/ezflow/modules/blocks.py
@@ -68,6 +68,7 @@ def __init__(
             if stride != 1:
                 norm3 = nn.Identity()
 
+        # Change required
         if activation.lower() == "leakyrelu":
             self.activation = nn.LeakyReLU(negative_slope=0.1, inplace=True)
         else:
diff --git a/requirements.txt b/requirements.txt
index 0b0b91b9..88be8516 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
 absl-py==0.13.0
 alabaster==0.7.12
-antlr4-python3-runtime==4.9
+antlr4-python3-runtime==4.8
 appdirs==1.4.4
 argh==0.26.2
 arrow==0.15.1
diff --git a/tests/test_engine.py b/tests/test_engine.py
index 4dd8ad16..47721434 100644
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@@ -228,9 +228,11 @@ def test_setup_training_with_custom_loss_fn(
     @mock.patch("ezflow.engine.trainer.SummaryWriter")
     @mock.patch("ezflow.engine.trainer.os")
     def test_epoch_trainer(self, mock_os, mock_writer, mock_save_model):
-        trainer = Trainer(
-            self.training_cfg, self.mock_model, self.train_loader, self.val_loader
+        cfg = get_training_cfg(
+            cfg_path="./tests/configs/custom_loss_trainer.yaml", custom=True
         )
+
+        trainer = Trainer(cfg, self.mock_model, self.train_loader, self.val_loader)
         trainer._trainer = Trainer._epoch_trainer
 
         trainer.train()
@@ -241,7 +243,6 @@ def test_epoch_trainer(self, mock_os, mock_writer, mock_save_model):
 
         del trainer
 
-        cfg = self.training_cfg
         cfg.VALIDATE_ON = "loss"
         trainer = Trainer(cfg, self.mock_model, self.train_loader, self.val_loader)
         trainer._trainer = Trainer._epoch_trainer
@@ -258,7 +259,9 @@ def test_epoch_trainer(self, mock_os, mock_writer, mock_save_model):
     @mock.patch("ezflow.engine.trainer.SummaryWriter")
     @mock.patch("ezflow.engine.trainer.os")
     def test_step_trainer(self, mock_os, mock_writer, mock_save_model):
-        cfg = self.training_cfg
+        cfg = get_training_cfg(
+            cfg_path="./tests/configs/custom_loss_trainer.yaml", custom=True
+        )
         cfg.NUM_STEPS = 1
 
         trainer = Trainer(cfg, self.mock_model, self.train_loader, self.val_loader)
@@ -309,9 +312,11 @@ def test_validate_ddp_config(self, mock_os, mock_utils, mock_cuda_device_count):
 
         del trainer
 
+    @mock.patch.object(torch.distributed, "barrier")
     @mock.patch.object(torch.distributed, "init_process_group")
     @mock.patch.object(torch, "device", return_value=torch.device)
     @mock.patch.object(torch.cuda, "is_available", return_value=True)
+    @mock.patch.object(torch.cuda, "set_device")
     @mock.patch.object(torch.cuda, "empty_cache")
     @mock.patch.object(torch.cuda, "device_count", return_value=2)
     @mock.patch("ezflow.engine.trainer.os")
@@ -320,9 +325,11 @@ def test_setup_ddp(
         mock_os,
         mock_device_count,
         mock_empty_cache,
+        mock_torch_cuda_set_device,
         mock_cuda_available,
         mock_torch_device,
         mock_init_process_group,
+        mock_dist_barrier,
     ):
         trainer = DistributedTrainer(
             self.training_cfg,
@@ -332,7 +339,7 @@ def test_setup_ddp(
         )
 
         trainer._setup_device(rank=0)
-
+        mock_torch_cuda_set_device.assert_called_with(0)
         mock_torch_device.assert_called_with(0)
         assert trainer.local_rank == 0
 
@@ -347,6 +354,7 @@ def test_setup_ddp(
         del trainer
 
     @mock.patch.object(torch.distributed, "destroy_process_group")
+    @mock.patch.object(torch.distributed, "barrier")
     @mock.patch.object(DistributedTrainer, "_setup_model")
     @mock.patch.object(DistributedTrainer, "_setup_training")
     @mock.patch.object(DistributedTrainer, "_epoch_trainer")
@@ -363,6 +371,7 @@ def test_main_worker(
         mock_trainer,
         mock_setup_training,
         mock_setup_model,
+        mock_dist_barrier,
         mock_dist_cleanup,
     ):
         trainer = DistributedTrainer(
@@ -383,6 +392,7 @@ def test_main_worker(
             rank=0, loss_fn=None, optimizer=None, scheduler=None
         )
         mock_trainer.assert_called_with(None, None)
+        mock_dist_barrier.assert_called()
         mock_dist_cleanup.assert_called()
 
         del trainer
diff --git a/tests/test_functional.py b/tests/test_functional.py
index 67be0773..b8c98fc2 100644
--- a/tests/test_functional.py
+++ b/tests/test_functional.py
@@ -42,30 +42,74 @@ def test_FlowAugmentor():
 
     augmentor = FlowAugmentor(
         crop_size=(224, 224),
-        color_aug_params={"aug_prob": 1.0},
-        eraser_aug_params={"aug_prob": 1.0},
-        spatial_aug_params={
-            "aug_prob": 1.0,
-            "h_flip_prob": 1.0,
-            "v_flip_prob": 1.0,
-            "stretch_prob": 1.0,
+        noise_aug_params={"enabled": True, "aug_prob": 1.0},
+        eraser_aug_params={"enabled": True, "aug_prob": 1.0},
+        color_aug_params={"enabled": True, "asymmetric_color_aug_prob": 1.0},
+        flip_aug_params={"enabled": True, "h_flip_prob": 1.0, "v_flip_prob": 1.0},
+        spatial_aug_params={"enabled": True, "aug_prob": 1.0, "stretch_prob": 1.0},
+        advanced_spatial_aug_params={"enabled": False},
+    )
+    _ = augmentor(img1, img2, flow)
+
+    augmentor = FlowAugmentor(
+        crop_size=(224, 224),
+        noise_aug_params={"enabled": True, "aug_prob": 1.0},
+        eraser_aug_params={"enabled": True, "aug_prob": 1.0},
+        color_aug_params={"enabled": True, "asymmetric_color_aug_prob": 0.0},
+        flip_aug_params={"enabled": True, "h_flip_prob": 1.0, "v_flip_prob": 1.0},
+        spatial_aug_params={"enabled": False},
+        advanced_spatial_aug_params={
+            "enabled": True,
+            "scale1": 0.3,
+            "scale2": 0.1,
+            "rotate": 0.4,
+            "translate": 0.4,
+            "stretch": 0.3,
+            "enable_out_of_boundary_crop": False,
         },
     )
     _ = augmentor(img1, img2, flow)
 
     augmentor = FlowAugmentor(
         crop_size=(224, 224),
-        color_aug_params={"aug_prob": 0.0},
-        eraser_aug_params={"aug_prob": 0.0},
-        spatial_aug_params={
-            "aug_prob": 0.0,
-            "h_flip_prob": 0.0,
-            "v_flip_prob": 0.0,
-            "stretch_prob": 0.0,
+        noise_aug_params={"enabled": True, "aug_prob": 1.0},
+        eraser_aug_params={"enabled": True, "aug_prob": 1.0},
+        color_aug_params={"enabled": True, "asymmetric_color_aug_prob": 0.0},
+        flip_aug_params={"enabled": True, "h_flip_prob": 1.0, "v_flip_prob": 1.0},
+        spatial_aug_params={"enabled": False},
+        advanced_spatial_aug_params={
+            "enabled": True,
+            "scale1": 0.3,
+            "scale2": 0.1,
+            "rotate": 0.4,
+            "translate": 0.4,
+            "stretch": 0.3,
+            "enable_out_of_boundary_crop": True,
         },
     )
     _ = augmentor(img1, img2, flow)
 
+    augmentor = FlowAugmentor(
+        crop_size=(224, 224),
+        color_aug_params={"enabled": False},
+        eraser_aug_params={"enabled": False},
+        noise_aug_params={"enabled": False},
+        flip_aug_params={"enabled": False},
+        spatial_aug_params={"enabled": False},
+        advanced_spatial_aug_params={"enabled": False},
+    )
+    _ = augmentor(img1, img2, flow)
+
+    augmentor = FlowAugmentor(
+        crop_size=(224, 224),
+        noise_aug_params={"enabled": True, "aug_prob": 0.0},
+        eraser_aug_params={"enabled": True, "aug_prob": 0.0},
+        color_aug_params={"enabled": True, "asymmetric_color_aug_prob": 0.0},
+        flip_aug_params={"enabled": True, "h_flip_prob": 0.0, "v_flip_prob": 0.0},
+        spatial_aug_params={"enabled": True, "aug_prob": 0.0, "stretch_prob": 0.0},
+    )
+    _ = augmentor(img1, img2, flow)
+
     del augmentor
 
 
@@ -75,17 +119,17 @@ def test_SparseFlowAugmentor():
 
     augmentor = SparseFlowAugmentor(
         crop_size=(224, 224),
-        color_aug_params={"aug_prob": 1.0},
-        eraser_aug_params={"aug_prob": 1.0},
-        spatial_aug_params={"aug_prob": 1.0, "h_flip_prob": 1.0},
+        color_aug_params={"enabled": True, "asymmetric_color_aug_prob": 1.0},
+        eraser_aug_params={"enabled": True, "aug_prob": 1.0},
+        spatial_aug_params={"enabled": True, "aug_prob": 1.0, "h_flip_prob": 1.0},
     )
     _ = augmentor(img1, img2, flow, valid)
 
     augmentor = SparseFlowAugmentor(
         crop_size=(224, 224),
-        color_aug_params={"aug_prob": 0.0},
-        eraser_aug_params={"aug_prob": 0.0},
-        spatial_aug_params={"aug_prob": 0.0, "h_flip_prob": 0.0},
+        color_aug_params={"enabled": True, "asymmetric_color_aug_prob": 0.0},
+        eraser_aug_params={"enabled": True, "aug_prob": 0.0},
+        spatial_aug_params={"enabled": True, "aug_prob": 0.0, "h_flip_prob": 0.0},
     )
     _ = augmentor(img1, img2, flow, valid)
 
@@ -104,61 +148,40 @@ def test_SequenceLoss():
 
 def test_MultiScaleLoss():
 
-    loss_fn = MultiScaleLoss()
+    loss_fn = MultiScaleLoss(norm="l1")
     _ = loss_fn(flow_pred, flow_gt)
     del loss_fn
 
     valid_mask = torch.randn(4, 1, 256, 256)
     flow_target = torch.cat([flow_gt, valid_mask], dim=1)
 
-    loss_fn = MultiScaleLoss()
+    loss_fn = MultiScaleLoss(norm="l1")
     _ = loss_fn(flow_pred, flow_target)
     del loss_fn
 
+    loss_fn = MultiScaleLoss(norm="l2")
+    _ = loss_fn(flow_pred, flow_gt)
+    del loss_fn
 
-def test_Augmentor():
-    augmentor = FlowAugmentor(
-        crop_size=(224, 224),
-        color_aug_params={"aug_prob": 1.0},
-        eraser_aug_params={"aug_prob": 1.0},
-        spatial_aug_params={
-            "aug_prob": 1.0,
-            "h_flip_prob": 1.0,
-            "v_flip_prob": 1.0,
-            "stretch_prob": 1.0,
-        },
-        translate_params={
-            "aug_prob": 1.0,
-            "translate": 20,
-        },
-        rotate_params={
-            "aug_prob": 1.0,
-            "degrees": 20,
-            "delta": 5,
-        },
-    )
-    _ = augmentor(img1, img2, flow)
+    loss_fn = MultiScaleLoss(norm="robust")
+    _ = loss_fn(flow_pred, flow_gt)
+    del loss_fn
 
-    augmentor = FlowAugmentor(
-        crop_size=(224, 224),
-        color_aug_params={"aug_prob": 0.0},
-        eraser_aug_params={"aug_prob": 0.0},
-        spatial_aug_params={
-            "aug_prob": 0.0,
-            "h_flip_prob": 0.0,
-            "v_flip_prob": 0.0,
-            "stretch_prob": 0.0,
-        },
-        translate_params={
-            "aug_prob": 0.0,
-        },
-        rotate_params={
-            "aug_prob": 0.0,
-        },
-    )
-    _ = augmentor(img1, img2, flow)
+    loss_fn = MultiScaleLoss(resize_flow="upsample")
+    _ = loss_fn(flow_pred, flow_gt)
+    del loss_fn
 
-    del augmentor
+    loss_fn = MultiScaleLoss(resize_flow="downsample")
+    _ = loss_fn(flow_pred, flow_gt)
+    del loss_fn
+
+    loss_fn = MultiScaleLoss(average="mean")
+    _ = loss_fn(flow_pred, flow_gt)
+    del loss_fn
+
+    loss_fn = MultiScaleLoss(average="sum")
+    _ = loss_fn(flow_pred, flow_gt)
+    del loss_fn
 
 
 def test_normalize():
diff --git a/tests/test_models.py b/tests/test_models.py
index cce6c876..7d3e5590 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -25,15 +25,17 @@ def test_Predictor():
 def test_RAFT():
 
     model = build_model("RAFT", "raft.yaml")
-    flow_preds = model(img1, img2)
-    assert isinstance(flow_preds, tuple) or isinstance(flow_preds, list)
+    output = model(img1, img2)
+    assert isinstance(output, dict)
+    assert isinstance(output["flow_preds"], tuple) or isinstance(
+        output["flow_preds"], list
+    )
 
     model.eval()
-    _ = model(img1, img2, only_flow=False)
-    flow = model(img1, img2)
-    assert flow.shape == (2, 2, 256, 256)
+    output = model(img1, img2)
+    assert output["flow_upsampled"].shape == (2, 2, 256, 256)
 
-    del model, flow, flow_preds
+    del model, output
 
     _ = build_model("RAFT", default=True)
 
@@ -41,14 +43,17 @@ def test_RAFT():
 def test_DICL():
 
     model = build_model("DICL", "dicl.yaml")
-    flow_preds = model(img1, img2)
-    assert isinstance(flow_preds, tuple) or isinstance(flow_preds, list)
+    output = model(img1, img2)
+    assert isinstance(output, dict)
+    assert isinstance(output["flow_preds"], tuple) or isinstance(
+        output["flow_preds"], list
+    )
 
     model.eval()
-    flow = model(img1, img2)
-    assert flow.shape == (2, 2, 256, 256)
+    output = model(img1, img2)
+    assert output["flow_upsampled"].shape == (2, 2, 256, 256)
 
-    del model, flow, flow_preds
+    del model, output
 
     _ = build_model("DICL", default=True)
 
@@ -56,14 +61,17 @@ def test_DICL():
 def test_PWCNet():
 
     model = build_model("PWCNet", "pwcnet.yaml")
-    flow_preds = model(img1, img2)
-    assert isinstance(flow_preds, tuple) or isinstance(flow_preds, list)
+    output = model(img1, img2)
+    assert isinstance(output, dict)
+    assert isinstance(output["flow_preds"], tuple) or isinstance(
+        output["flow_preds"], list
+    )
 
     model.eval()
-    flow = model(img1, img2)
-    assert flow.shape == (2, 2, 256, 256)
+    output = model(img1, img2)
+    assert output["flow_upsampled"].shape == (2, 2, 256, 256)
 
-    del model, flow, flow_preds
+    del model, output
 
     _ = build_model("PWCNet", default=True)
 
@@ -71,14 +79,17 @@ def test_PWCNet():
 def test_FlowNetS():
 
     model = build_model("FlowNetS", "flownet_s.yaml")
-    flow_preds = model(img1, img2)
-    assert isinstance(flow_preds, tuple) or isinstance(flow_preds, list)
+    output = model(img1, img2)
+    assert isinstance(output, dict)
+    assert isinstance(output["flow_preds"], tuple) or isinstance(
+        output["flow_preds"], list
+    )
 
     model.eval()
-    flow = model(img1, img2)
-    assert flow.shape == (2, 2, 256, 256)
+    output = model(img1, img2)
+    assert output["flow_upsampled"].shape == (2, 2, 256, 256)
 
-    del model, flow, flow_preds
+    del model, output
 
     _ = build_model("FlowNetS", default=True)
 
@@ -86,14 +97,17 @@ def test_FlowNetS():
 def test_FlowNetC():
 
     model = build_model("FlowNetC", "flownet_c.yaml")
-    flow_preds = model(img1, img2)
-    assert isinstance(flow_preds, tuple) or isinstance(flow_preds, list)
+    output = model(img1, img2)
+    assert isinstance(output, dict)
+    assert isinstance(output["flow_preds"], tuple) or isinstance(
+        output["flow_preds"], list
+    )
 
     model.eval()
-    flow = model(img1, img2)
-    assert flow.shape == (2, 2, 256, 256)
+    output = model(img1, img2)
+    assert output["flow_upsampled"].shape == (2, 2, 256, 256)
 
-    del model, flow, flow_preds
+    del model, output
 
     _ = build_model("FlowNetC", default=True)
 
@@ -104,12 +118,14 @@ def test_VCN():
 
     img = torch.randn(16, 3, 256, 256)
 
-    flow_preds = model(img, img)
-    assert isinstance(flow_preds, tuple) or isinstance(flow_preds, list)
-    assert flow_preds[0].shape == (16, 2, 256, 256)
+    output = model(img, img)
+    assert isinstance(output, dict)
+    assert isinstance(output["flow_preds"], tuple) or isinstance(
+        output["flow_preds"], list
+    )
 
     model.eval()
-    flow = model(img, img)
-    assert flow.shape == (16, 2, 256, 256)
+    output = model(img, img)
+    assert output["flow_upsampled"].shape == (16, 2, 256, 256)
 
-    del model, flow, flow_preds
+    del model, output
diff --git a/tests/utils/mock_model.py b/tests/utils/mock_model.py
index 70203126..77dd2f9a 100644
--- a/tests/utils/mock_model.py
+++ b/tests/utils/mock_model.py
@@ -1,4 +1,5 @@
 import torch
+import torch.nn.functional as F
 from torch import nn
 
 from ezflow.modules import BaseModule
@@ -13,5 +14,10 @@ def __init__(self, img_channels):
     def forward(self, img1, img2):
 
         x = torch.cat([img1, img2], dim=-3)
+        mock_flow_prediction = self.model(x)
 
-        return self.model(x)
+        flow_up = F.interpolate(
+            mock_flow_prediction, img1.shape[-2:], mode="bilinear", align_corners=True
+        )
+        output = {"flow_preds": [mock_flow_prediction], "flow_upsampled": flow_up}
+        return output
diff --git a/tools/train.py b/tools/train.py
index db727d1c..0af63ca7 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -1,5 +1,5 @@
 from ezflow.data import DataloaderCreator
-from ezflow.engine import DistributedTrainer, Trainer, get_training_cfg
+from ezflow.engine import DistributedTrainer, Trainer, get_cfg
 from ezflow.models import build_model
 
 
@@ -7,32 +7,70 @@ def main(args):
 
     # Load training configuration
 
-    cfg = get_training_cfg(args.train_cfg)
+    cfg = get_cfg(args.train_cfg)
 
     if args.device:
         cfg.DEVICE = args.device
 
+    cfg.DATA.TRAIN_DATASET.ROOT_DIR = args.train_data_dir
+    cfg.DATA.VAL_DATASET.ROOT_DIR = args.val_data_dir
+
+    if args.n_steps is not None:
+        cfg.NUM_STEPS = args.n_steps
+
+        if cfg.SCHEDULER.NAME == "OneCycleLR":
+            cfg.SCHEDULER.PARAMS.total_steps = cfg.NUM_STEPS
+
     # Create dataloaders
 
-    aug_params = None
+    train_aug_params = None
+    val_aug_params = None
     if cfg.DATA.AUGMENTATION.USE and cfg.DATA.AUGMENTATION.PARAMS:
-        aug_params = cfg.DATA.AUGMENTATION.PARAMS.to_dict()
+        train_aug_params = cfg.DATA.AUGMENTATION.PARAMS.TRAINING.to_dict()
+        val_aug_params = cfg.DATA.AUGMENTATION.PARAMS.VALIDATION.to_dict()
 
     train_loader_creator = DataloaderCreator(
-        cfg.DATA.BATCH_SIZE, num_workers=cfg.NUM_WORKERS
-    )
-    train_loader_creator.add_FlyingChairs(
-        root_dir=args.data_dir, augment=cfg.DATA.AUGMENTATION.USE, aug_params=aug_params
+        batch_size=cfg.DATA.BATCH_SIZE,
+        num_workers=cfg.DATA.NUM_WORKERS,
+        pin_memory=cfg.DATA.PIN_MEMORY,
+        distributed=cfg.DISTRIBUTED.USE,
+        world_size=cfg.DISTRIBUTED.WORLD_SIZE,
+        append_valid_mask=cfg.DATA.APPEND_VALID_MASK,
+        shuffle=cfg.DATA.SHUFFLE,
     )
 
     val_loader_creator = DataloaderCreator(
-        cfg.DATA.BATCH_SIZE, num_workers=cfg.NUM_WORKERS
+        batch_size=cfg.DATA.BATCH_SIZE,
+        num_workers=cfg.DATA.NUM_WORKERS,
+        pin_memory=cfg.DATA.PIN_MEMORY,
+        distributed=cfg.DISTRIBUTED.USE,
+        world_size=cfg.DISTRIBUTED.WORLD_SIZE,
+        append_valid_mask=cfg.DATA.APPEND_VALID_MASK,
+        shuffle=cfg.DATA.SHUFFLE,
     )
-    val_loader_creator.add_FlyingChairs(
-        root_dir=args.data_dir,
-        split="validation",
+
+    # TODO: Create a Dataloader Registry
+    train_loader_creator.add_FlyingChairs(
+        root_dir=cfg.DATA.TRAIN_DATASET.ROOT_DIR,
+        crop=True,
+        crop_type="random",
+        crop_size=cfg.DATA.TRAIN_CROP_SIZE,
         augment=cfg.DATA.AUGMENTATION.USE,
-        aug_params=aug_params,
+        aug_params=train_aug_params,
+        norm_params=cfg.DATA.NORM_PARAMS,
+    )
+
+    val_loader_creator.add_FlyingChairs(
+        val_loader_creator.add_FlyingChairs(
+            root_dir=cfg.DATA.VAL_DATASET.ROOT_DIR,
+            split="validation",
+            crop=True,
+            crop_type="center",
+            crop_size=cfg.DATA.VAL_CROP_SIZE,
+            augment=cfg.DATA.AUGMENTATION.USE,
+            aug_params=val_aug_params,
+            norm_params=cfg.DATA.NORM_PARAMS,
+        )
     )
 
     # Build model
@@ -40,8 +78,7 @@ def main(args):
     model = build_model(args.model, default=True)
 
     # Create trainer
-
-    if training_cfg.DISTRIBUTED.USE is True:
+    if cfg.DISTRIBUTED.USE is True:
         trainer = DistributedTrainer(
             cfg,
             model,
@@ -74,32 +111,29 @@ def main(args):
         help="Path to the training configuration file",
     )
     parser.add_argument(
-        "--data_dir", type=str, required=True, help="Path to the root data directory"
+        "--train_data_dir",
+        type=str,
+        required=True,
+        help="Path to the root data directory",
+    )
+    parser.add_argument(
+        "--val_data_dir",
+        type=str,
+        required=True,
+        help="Path to the root data directory",
     )
     parser.add_argument(
         "--model", type=str, required=True, help="Name of the model to train"
     )
     parser.add_argument(
-        "--n_epochs", type=int, default=None, help="Number of epochs to train"
+        "--n_steps", type=int, default=None, help="Number of iterations to train"
     )
     parser.add_argument(
         "--device",
         type=str,
-        default=None,
+        default="0",
         help="Device(s) to train on separated by commas. -1 for CPU",
     )
-    parser.add_argument(
-        "--distributed",
-        type=bool,
-        default=False,
-        help="Whether to do distributed training",
-    )
-    parser.add_argument(
-        "--distributed_backend",
-        type=str,
-        default="nccl",
-        help="Backend to use for distributed computing",
-    )
 
     args = parser.parse_args()