diff --git a/demo/demo_bodymocap.py b/demo/demo_bodymocap.py
index 704b77f..eabeacb 100644
--- a/demo/demo_bodymocap.py
+++ b/demo/demo_bodymocap.py
@@ -1,92 +1,23 @@
 # Copyright (c) Facebook, Inc. and its affiliates.
 
-import os
-import sys
-import os.path as osp
 import torch
-from torchvision.transforms import Normalize
 import numpy as np
-import cv2
-import argparse
-import json
-import pickle
-from datetime import datetime
 
-from demo.demo_options import DemoOptions
 from bodymocap.body_mocap_api import BodyMocap
 from bodymocap.body_bbox_detector import BodyPoseEstimator
 import mocap_utils.demo_utils as demo_utils
-import mocap_utils.general_utils as gnu
-from mocap_utils.timer import Timer
 
-import renderer.image_utils as imu
-from renderer.viewer2D import ImShow
+import demo
+from demo.demo_options import DemoOptions
 
 def run_body_mocap(args, body_bbox_detector, body_mocap, visualizer):
-    #Setup input data to handle different types of inputs
-    input_type, input_data = demo_utils.setup_input(args)
-
-    cur_frame = args.start_frame
-    video_frame = 0
-    timer = Timer()
-    while True:
-        timer.tic()
-        # load data
-        load_bbox = False
-
-        if input_type =='image_dir':
-            if cur_frame < len(input_data):
-                image_path = input_data[cur_frame]
-                img_original_bgr  = cv2.imread(image_path)
-            else:
-                img_original_bgr = None
-
-        elif input_type == 'bbox_dir':
-            if cur_frame < len(input_data):
-                print("Use pre-computed bounding boxes")
-                image_path = input_data[cur_frame]['image_path']
-                hand_bbox_list = input_data[cur_frame]['hand_bbox_list']
-                body_bbox_list = input_data[cur_frame]['body_bbox_list']
-                img_original_bgr  = cv2.imread(image_path)
-                load_bbox = True
-            else:
-                img_original_bgr = None
-
-        elif input_type == 'video':      
-            _, img_original_bgr = input_data.read()
-            if video_frame < cur_frame:
-                video_frame += 1
-                continue
-            # save the obtained video frames
-            image_path = osp.join(args.out_dir, "frames", f"{cur_frame:05d}.jpg")
-            if img_original_bgr is not None:
-                video_frame += 1
-                if args.save_frame:
-                    gnu.make_subdir(image_path)
-                    cv2.imwrite(image_path, img_original_bgr)
-
-        elif input_type == 'webcam':    
-            _, img_original_bgr = input_data.read()
-
-            if video_frame < cur_frame:
-                video_frame += 1
-                continue
-            # save the obtained video frames
-            image_path = osp.join(args.out_dir, "frames", f"scene_{cur_frame:05d}.jpg")
-            if img_original_bgr is not None:
-                video_frame += 1
-                if args.save_frame:
-                    gnu.make_subdir(image_path)
-                    cv2.imwrite(image_path, img_original_bgr)
-        else:
-            assert False, "Unknown input_type"
-
-        cur_frame +=1
-        if img_original_bgr is None or cur_frame > args.end_frame:
-            break   
-        print("--------------------------------------")
+    for input_frame_and_metadata in demo.demo_common.input_frame_and_metadata_iterator(args):
+        image_path = input_frame_and_metadata.image_path
+        img_original_bgr = input_frame_and_metadata.img_original_bgr
+        load_bbox = input_frame_and_metadata.load_bbox
 
         if load_bbox:
+            body_bbox_list = input_frame_and_metadata.body_bbox_list
             body_pose_list = None
         else:
             body_pose_list, body_bbox_list = body_bbox_detector.detect_body_pose(
@@ -113,40 +44,9 @@ def run_body_mocap(args, body_bbox_detector, body_mocap, visualizer):
         pred_output_list = body_mocap.regress(img_original_bgr, body_bbox_list)
         assert len(body_bbox_list) == len(pred_output_list)
 
-        # extract mesh for rendering (vertices in image space and faces) from pred_output_list
-        pred_mesh_list = demo_utils.extract_mesh_from_output(pred_output_list)
-
-        # visualization
-        res_img = visualizer.visualize(
-            img_original_bgr,
-            pred_mesh_list = pred_mesh_list, 
-            body_bbox_list = body_bbox_list)
-        
-        # show result in the screen
-        if not args.no_display:
-            res_img = res_img.astype(np.uint8)
-            ImShow(res_img)
-
-        # save result image
-        if args.out_dir is not None:
-            demo_utils.save_res_img(args.out_dir, image_path, res_img)
-
-        # save predictions to pkl
-        if args.save_pred_pkl:
-            demo_type = 'body'
-            demo_utils.save_pred_to_pkl(
-                args, demo_type, image_path, body_bbox_list, hand_bbox_list, pred_output_list)
-
-        timer.toc(bPrint=True,title="Time")
-        print(f"Processed : {image_path}")
-
-    #save images as a video
-    if not args.no_video_out and input_type in ['video', 'webcam']:
-        demo_utils.gen_video_out(args.out_dir, args.seq_name)
-
-    if input_type =='webcam' and input_data is not None:
-        input_data.release()
-    cv2.destroyAllWindows()
+        demo.demo_common.show_and_save_result(
+            args, 'body', input_frame_and_metadata, visualizer, pred_output_list
+        )
 
 
 def main():
diff --git a/demo/demo_common.py b/demo/demo_common.py
new file mode 100644
index 0000000..bf690db
--- /dev/null
+++ b/demo/demo_common.py
@@ -0,0 +1,206 @@
+import argparse
+import os.path
+
+import cv2
+import numpy as np
+
+import mocap_utils.demo_utils as demo_utils
+import mocap_utils.general_utils as gnu
+from mocap_utils.timer import Timer
+
+from renderer.viewer2D import ImShow
+
+
+def input_frame_and_metadata_iterator(args):
+    # Setup input data to handle different types of inputs
+    input_type, input_data = demo_utils.setup_input(args)
+
+    assert args.out_dir is not None, "Please specify output dir to store the results"
+    cur_frame = args.start_frame
+    video_frame = 0
+    timer = Timer()
+
+    while True:
+        timer.tic()
+
+        # load data
+        load_bbox = False
+        hand_bbox_list = None
+        body_bbox_list = None
+        image_path = None
+
+        if input_type =='image_dir':
+            if cur_frame < len(input_data):
+                image_path = input_data[cur_frame]
+                img_original_bgr = cv2.imread(image_path)
+            else:
+                img_original_bgr = None
+
+        elif input_type == 'bbox_dir':
+            if cur_frame < len(input_data):
+                print("Use pre-computed bounding boxes")
+                image_path = input_data[cur_frame]['image_path']
+                hand_bbox_list = input_data[cur_frame]['hand_bbox_list']
+                body_bbox_list = input_data[cur_frame]['body_bbox_list']
+                img_original_bgr = cv2.imread(image_path)
+                load_bbox = True
+            else:
+                img_original_bgr = None
+
+        elif input_type == 'video':
+            _, img_original_bgr = input_data.read()
+            if video_frame < cur_frame:
+                video_frame += 1
+                continue
+            # save the obtained video frames
+            image_path = os.path.join(args.out_dir, "frames", f"{cur_frame:05d}.jpg")
+            if img_original_bgr is not None:
+                video_frame += 1
+                if args.save_frame:
+                    gnu.make_subdir(image_path)
+                    cv2.imwrite(image_path, img_original_bgr)
+
+        elif input_type == 'webcam':
+            _, img_original_bgr = input_data.read()
+
+            if video_frame < cur_frame:
+                video_frame += 1
+                continue
+            # save the obtained video frames
+            image_path = os.path.join(args.out_dir, "frames", f"scene_{cur_frame:05d}.jpg")
+            if img_original_bgr is not None:
+                video_frame += 1
+                if args.save_frame:
+                    gnu.make_subdir(image_path)
+                    cv2.imwrite(image_path, img_original_bgr)
+        else:
+            assert False, "Unknown input_type"
+
+        cur_frame += 1
+        if img_original_bgr is None or cur_frame > args.end_frame:
+            break
+
+        input_frame_and_metadata = argparse.Namespace(
+            image_path=image_path,
+            img_original_bgr=img_original_bgr,
+            load_bbox=load_bbox,
+        )
+
+        print("--------------------------------------")
+
+        if load_bbox:
+            input_data.body_bbox_list = body_bbox_list
+            input_data.hand_bbox_list = hand_bbox_list
+        yield input_frame_and_metadata
+
+        timer.toc(bPrint=True, title="Time")
+        print(f"Processed : {image_path}")
+
+    # save images as a video
+    if not args.no_video_out and input_type in ['video', 'webcam']:
+        demo_utils.gen_video_out(args.out_dir, args.seq_name)
+
+    # When everything done, release the capture
+    if input_type == 'webcam' and input_data is not None:
+        input_data.release()
+    cv2.destroyAllWindows()
+
+
+def detect_hand_bbox_and_save_it_into_frame_and_metadata(args, input_frame_and_metadata, bbox_detector_method):
+    image_path = input_frame_and_metadata.image_path
+    img_original_bgr = input_frame_and_metadata.img_original_bgr
+    load_bbox = input_frame_and_metadata.load_bbox
+
+    # bbox detection
+    body_bbox_list = None
+    if load_bbox:
+        body_bbox_list = input_frame_and_metadata.body_bbox_list
+        hand_bbox_list = input_frame_and_metadata.hand_bbox_list
+        body_pose_list = None
+        raw_hand_bboxes = None
+    elif args.crop_type == 'hand_crop':
+        # hand already cropped, therefore, no need for detection
+        img_h, img_w = img_original_bgr.shape[:2]
+        body_pose_list = None
+        raw_hand_bboxes = None
+        hand_bbox_list = [dict(right_hand=np.array([0, 0, img_w, img_h]))]
+    else:
+        # Input images has other body part or hand not cropped.
+        # Use hand detection model & body detector for hand detection
+        assert args.crop_type == 'no_crop'
+        detect_output = bbox_detector_method(img_original_bgr.copy())
+        body_pose_list, body_bbox_list, hand_bbox_list, raw_hand_bboxes = detect_output
+
+    # save the obtained body & hand bbox to json file
+    if args.save_bbox_output:
+        demo_utils.save_info_to_json(args, image_path, body_bbox_list, hand_bbox_list)
+
+    if len(hand_bbox_list) < 1:
+        print(f"No hand detected: {image_path}")
+        return False
+
+    input_frame_and_metadata.body_bbox_list = body_bbox_list
+    input_frame_and_metadata.hand_bbox_list = hand_bbox_list
+    input_frame_and_metadata.body_pose_list = body_pose_list
+    input_frame_and_metadata.raw_hand_bboxes = raw_hand_bboxes
+    return True
+
+
+def show_and_save_result(
+        args, demo_type, input_frame_and_metadata, visualizer=None,
+        pred_output_list=None, transformed_image=None, image_category="rendered"
+):
+    image_path = input_frame_and_metadata.image_path
+    img_original_bgr = input_frame_and_metadata.img_original_bgr
+    body_bbox_list = input_frame_and_metadata.body_bbox_list
+    hand_bbox_list = input_frame_and_metadata.hand_bbox_list
+
+    # extract mesh for rendering (vertices in image space and faces) from pred_output_list
+    pred_mesh_list = None
+    if pred_output_list is not None:
+        pred_mesh_list = demo_utils.extract_mesh_from_output(pred_output_list)
+
+    res_img = None
+    if transformed_image is not None:
+        res_img = transformed_image
+    elif visualizer is not None:
+        # visualization
+        if demo_type == 'frank':
+            res_img = visualizer.visualize(
+                img_original_bgr,
+                pred_mesh_list=pred_mesh_list,
+                body_bbox_list=body_bbox_list,
+                hand_bbox_list=hand_bbox_list
+            )
+        elif demo_type == 'body':
+            res_img = visualizer.visualize(
+                img_original_bgr,
+                pred_mesh_list=pred_mesh_list,
+                hand_bbox_list=body_bbox_list
+            )
+        elif demo_type == 'hand':
+            res_img = visualizer.visualize(
+                img_original_bgr,
+                pred_mesh_list=pred_mesh_list,
+                hand_bbox_list=hand_bbox_list
+            )
+        else:
+            raise ValueError("Unknown demo_type")
+
+    if res_img is not None:
+        # show result in the screen
+        if not args.no_display:
+            res_img = res_img.astype(np.uint8)
+            ImShow(res_img)
+
+        # save result image (we can make an option here)
+        if args.out_dir is not None:
+            demo_utils.save_res_img(
+                args.out_dir, image_path, res_img, image_category=image_category
+            )
+
+    # save predictions to pkl
+    if args.save_pred_pkl:
+        demo_utils.save_pred_to_pkl(
+            args, demo_type, image_path, body_bbox_list, hand_bbox_list, pred_output_list
+        )
diff --git a/demo/demo_frankmocap.py b/demo/demo_frankmocap.py
index e920261..0534786 100644
--- a/demo/demo_frankmocap.py
+++ b/demo/demo_frankmocap.py
@@ -1,31 +1,17 @@
 # Copyright (c) Facebook, Inc. and its affiliates.
 
-import os
-import sys
-import os.path as osp
 import torch
-from torchvision.transforms import Normalize
 import numpy as np
-import cv2
-import argparse
-import json
-import pickle
 
-############# input parameters  #############
-from demo.demo_options import DemoOptions
 from bodymocap.body_mocap_api import BodyMocap
 from handmocap.hand_mocap_api import HandMocap
 import mocap_utils.demo_utils as demo_utils
-import mocap_utils.general_utils as gnu
-from mocap_utils.timer import Timer
-from datetime import datetime
 
-from bodymocap.body_bbox_detector import BodyPoseEstimator
 from handmocap.hand_bbox_detector import HandBboxDetector
 from integration.copy_and_paste import integration_copy_paste
 
-import renderer.image_utils as imu
-from renderer.viewer2D import ImShow
+import demo.demo_common
+from demo.demo_options import DemoOptions
 
 
 def __filter_bbox_list(body_bbox_list, hand_bbox_list, single_person):
@@ -108,66 +94,11 @@ def run_regress(
 
 
 def run_frank_mocap(args, bbox_detector, body_mocap, hand_mocap, visualizer):
-    #Setup input data to handle different types of inputs
-    input_type, input_data = demo_utils.setup_input(args)
-
-    cur_frame = args.start_frame
-    video_frame = 0
-    while True:
-        # load data
-        load_bbox = False
-
-        if input_type =='image_dir':
-            if cur_frame < len(input_data):
-                image_path = input_data[cur_frame]
-                img_original_bgr  = cv2.imread(image_path)
-            else:
-                img_original_bgr = None
-
-        elif input_type == 'bbox_dir':
-            if cur_frame < len(input_data):
-                image_path = input_data[cur_frame]['image_path']
-                hand_bbox_list = input_data[cur_frame]['hand_bbox_list']
-                body_bbox_list = input_data[cur_frame]['body_bbox_list']
-                img_original_bgr  = cv2.imread(image_path)
-                load_bbox = True
-            else:
-                img_original_bgr = None
-
-        elif input_type == 'video':      
-            _, img_original_bgr = input_data.read()
-            if video_frame < cur_frame:
-                video_frame += 1
-                continue
-          # save the obtained video frames
-            image_path = osp.join(args.out_dir, "frames", f"{cur_frame:05d}.jpg")
-            if img_original_bgr is not None:
-                video_frame += 1
-                if args.save_frame:
-                    gnu.make_subdir(image_path)
-                    cv2.imwrite(image_path, img_original_bgr)
-        
-        elif input_type == 'webcam':
-            _, img_original_bgr = input_data.read()
-
-            if video_frame < cur_frame:
-                video_frame += 1
-                continue
-            # save the obtained video frames
-            image_path = osp.join(args.out_dir, "frames", f"scene_{cur_frame:05d}.jpg")
-            if img_original_bgr is not None:
-                video_frame += 1
-                if args.save_frame:
-                    gnu.make_subdir(image_path)
-                    cv2.imwrite(image_path, img_original_bgr)
-        else:
-            assert False, "Unknown input_type"
+    for input_frame_and_metadata in demo.demo_common.input_frame_and_metadata_iterator(args):
+        image_path = input_frame_and_metadata.image_path
+        img_original_bgr = input_frame_and_metadata.img_original_bgr
+        load_bbox = input_frame_and_metadata.load_bbox
 
-        cur_frame +=1
-        if img_original_bgr is None or cur_frame > args.end_frame:
-            break   
-        print("--------------------------------------")
-        
         # bbox detection
         if not load_bbox:
             body_bbox_list, hand_bbox_list = list(), list()
@@ -186,39 +117,9 @@ def run_frank_mocap(args, bbox_detector, body_mocap, hand_mocap, visualizer):
             print(f"No body deteced: {image_path}")
             continue
 
-        pred_mesh_list = demo_utils.extract_mesh_from_output(pred_output_list)
-
-        # visualization
-        res_img = visualizer.visualize(
-            img_original_bgr,
-            pred_mesh_list = pred_mesh_list,
-            body_bbox_list = body_bbox_list,
-            hand_bbox_list = hand_bbox_list)
-        
-       # show result in the screen
-        if not args.no_display:
-            res_img = res_img.astype(np.uint8)
-            ImShow(res_img)
-
-        # save result image
-        if args.out_dir is not None:
-            demo_utils.save_res_img(args.out_dir, image_path, res_img)
-
-        # save predictions to pkl
-        if args.save_pred_pkl:
-            demo_type = 'frank'
-            demo_utils.save_pred_to_pkl(
-                args, demo_type, image_path, body_bbox_list, hand_bbox_list, pred_output_list)
-
-        print(f"Processed : {image_path}")
-
-    # save images as a video
-    if not args.no_video_out and input_type in ['video', 'webcam']:
-        demo_utils.gen_video_out(args.out_dir, args.seq_name)
-
-    if input_type =='webcam' and input_data is not None:
-        input_data.release()
-    cv2.destroyAllWindows()
+        demo.demo_common.show_and_save_result(
+            args, 'frank', input_frame_and_metadata, visualizer, pred_output_list
+        )
 
 def main():
     args = DemoOptions().parse()
diff --git a/demo/demo_handmocap.py b/demo/demo_handmocap.py
index 2262c33..573b72c 100644
--- a/demo/demo_handmocap.py
+++ b/demo/demo_handmocap.py
@@ -1,156 +1,37 @@
 # Copyright (c) Facebook, Inc. and its affiliates.
 
-import os, sys, shutil
-import os.path as osp
-import numpy as np
-import cv2
-import json
 import torch
-from torchvision.transforms import Normalize
-
-from demo.demo_options import DemoOptions
-import mocap_utils.general_utils as gnu
-import mocap_utils.demo_utils as demo_utils
 
 from handmocap.hand_mocap_api import HandMocap
 from handmocap.hand_bbox_detector import HandBboxDetector
 
-import renderer.image_utils as imu
-from renderer.viewer2D import ImShow
-import time
+import demo.demo_common
+from demo.demo_options import DemoOptions
 
 
 def run_hand_mocap(args, bbox_detector, hand_mocap, visualizer):
-    #Set up input data (images or webcam)
-    input_type, input_data = demo_utils.setup_input(args)
- 
-    assert args.out_dir is not None, "Please specify output dir to store the results"
-    cur_frame = args.start_frame
-    video_frame = 0
-
-    while True:
-        # load data
-        load_bbox = False
-
-        if input_type =='image_dir':
-            if cur_frame < len(input_data):
-                image_path = input_data[cur_frame]
-                img_original_bgr  = cv2.imread(image_path)
-            else:
-                img_original_bgr = None
-
-        elif input_type == 'bbox_dir':
-            if cur_frame < len(input_data):
-                print("Use pre-computed bounding boxes")
-                image_path = input_data[cur_frame]['image_path']
-                hand_bbox_list = input_data[cur_frame]['hand_bbox_list']
-                body_bbox_list = input_data[cur_frame]['body_bbox_list']
-                img_original_bgr  = cv2.imread(image_path)
-                load_bbox = True
-            else:
-                img_original_bgr = None
-
-        elif input_type == 'video':      
-            _, img_original_bgr = input_data.read()
-            if video_frame < cur_frame:
-                video_frame += 1
-                continue
-            # save the obtained video frames
-            image_path = osp.join(args.out_dir, "frames", f"{cur_frame:05d}.jpg")
-            if img_original_bgr is not None:
-                video_frame += 1
-                if args.save_frame:
-                    gnu.make_subdir(image_path)
-                    cv2.imwrite(image_path, img_original_bgr)
-        
-        elif input_type == 'webcam':
-            _, img_original_bgr = input_data.read()
-
-            if video_frame < cur_frame:
-                video_frame += 1
-                continue
-            # save the obtained video frames
-            image_path = osp.join(args.out_dir, "frames", f"scene_{cur_frame:05d}.jpg")
-            if img_original_bgr is not None:
-                video_frame += 1
-                if args.save_frame:
-                    gnu.make_subdir(image_path)
-                    cv2.imwrite(image_path, img_original_bgr)
-        else:
-            assert False, "Unknown input_type"
-
-        cur_frame +=1
-        if img_original_bgr is None or cur_frame > args.end_frame:
-            break   
-        print("--------------------------------------")
-
-        # bbox detection
-        if load_bbox:
-            body_pose_list = None
-            raw_hand_bboxes = None
-        elif args.crop_type == 'hand_crop':
-            # hand already cropped, thererore, no need for detection
-            img_h, img_w = img_original_bgr.shape[:2]
-            body_pose_list = None
-            raw_hand_bboxes = None
-            hand_bbox_list = [ dict(right_hand = np.array([0, 0, img_w, img_h])) ]
-        else:            
-            # Input images has other body part or hand not cropped.
-            # Use hand detection model & body detector for hand detection
-            assert args.crop_type == 'no_crop'
-            detect_output = bbox_detector.detect_hand_bbox(img_original_bgr.copy())
-            body_pose_list, body_bbox_list, hand_bbox_list, raw_hand_bboxes = detect_output
-        
-        # save the obtained body & hand bbox to json file
-        if args.save_bbox_output:
-            demo_utils.save_info_to_json(args, image_path, body_bbox_list, hand_bbox_list)
-
-        if len(hand_bbox_list) < 1:
-            print(f"No hand deteced: {image_path}")
+    for input_frame_and_metadata in demo.demo_common.input_frame_and_metadata_iterator(args):
+        img_original_bgr = input_frame_and_metadata.img_original_bgr
+
+        if not demo.demo_common.detect_hand_bbox_and_save_it_into_frame_and_metadata(
+                args, input_frame_and_metadata, bbox_detector.detect_hand_bbox
+        ):
             continue
-    
+
+        body_bbox_list = input_frame_and_metadata.body_bbox_list
+        hand_bbox_list = input_frame_and_metadata.hand_bbox_list
+
         # Hand Pose Regression
         pred_output_list = hand_mocap.regress(
                 img_original_bgr, hand_bbox_list, add_margin=True)
         assert len(hand_bbox_list) == len(body_bbox_list)
         assert len(body_bbox_list) == len(pred_output_list)
 
-        # extract mesh for rendering (vertices in image space and faces) from pred_output_list
-        pred_mesh_list = demo_utils.extract_mesh_from_output(pred_output_list)
-
-        # visualize
-        res_img = visualizer.visualize(
-            img_original_bgr, 
-            pred_mesh_list = pred_mesh_list, 
-            hand_bbox_list = hand_bbox_list)
-
-        # show result in the screen
-        if not args.no_display:
-            res_img = res_img.astype(np.uint8)
-            ImShow(res_img)
-
-        # save the image (we can make an option here)
-        if args.out_dir is not None:
-            demo_utils.save_res_img(args.out_dir, image_path, res_img)
-
-        # save predictions to pkl
-        if args.save_pred_pkl:
-            demo_type = 'hand'
-            demo_utils.save_pred_to_pkl(
-                args, demo_type, image_path, body_bbox_list, hand_bbox_list, pred_output_list)
-
-        print(f"Processed : {image_path}")
-        
-    #save images as a video
-    if not args.no_video_out and input_type in ['video', 'webcam']:
-        demo_utils.gen_video_out(args.out_dir, args.seq_name)
-
-    # When everything done, release the capture
-    if input_type =='webcam' and input_data is not None:
-        input_data.release()
-    cv2.destroyAllWindows()
-
-  
+        demo.demo_common.show_and_save_result(
+            args, 'hand', input_frame_and_metadata, visualizer, pred_output_list
+        )
+
+
 def main():
     args = DemoOptions().parse()
     args.use_smplx = True