Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Performance]: FILM CPU inference time lower than iGPU in MTL #28589

Closed
3 tasks done
eugeooi opened this issue Jan 21, 2025 · 0 comments
Closed
3 tasks done

[Performance]: FILM CPU inference time lower than iGPU in MTL #28589

eugeooi opened this issue Jan 21, 2025 · 0 comments
Labels
performance Performance related topics support_request

Comments

@eugeooi
Copy link

eugeooi commented Jan 21, 2025

OpenVINO Version

2024.2

Operating System

Ubuntu 22.04 (LTS)

Device used for inference

iGPU

OpenVINO installation

PyPi

Programming Language

Python

Hardware Architecture

x86 (64 bits)

Model used

https://docs.openvino.ai/2024/notebooks/film-slowmo-with-output.html

Model quantization

No

Target Platform

OS: Ubuntu 22.04.5 LTS
CPU: Intel(R) Core(TM) Ultra 7 165H
RAM: 96GB

Performance issue description

Following the tutorial in FILM. It seems that CPU inference time is lower than iGPU.

CPU: ~7s
iGPU: ~30s

Step-by-step reproduction

from pathlib import Path
import requests
from typing import Optional, Generator
from datetime import datetime
import gc
import os

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
os.environ["TF_USE_LEGACY_KERAS"] = "1"
os.environ["TFHUB_CACHE_DIR"] = str(Path("./tfhub_modules").resolve())

import tensorflow_hub as hub
import tensorflow as tf
import openvino as ov
import numpy as np
import cv2
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
#import gradio as gr
import PIL
import time
import openvino.properties.hint as hints

class Interpolator:
def init(self, model):
self._model = model

def _recursive_generator(
    self,
    frame1: np.ndarray,
    frame2: np.ndarray,
    num_recursions: int,
    bar: Optional[tqdm] = None,
) -> Generator[np.ndarray, None, None]:
    """Splits halfway to repeatedly generate more frames.

    Args:
      frame1: Input image 1.
      frame2: Input image 2.
      num_recursions: How many times to interpolate the consecutive image pairs.

    Yields:
      The interpolated frames, including the first frame (frame1), but excluding
      the final frame2.
    """
    if num_recursions == 0:
        yield frame1
    else:
        time = np.array([[0.5]], dtype=np.float32)
        mid_frame = self._model({"x0": frame1, "x1": frame2, "time": time})["image"]
        if bar is not None:
            bar.update(1)
        yield from self._recursive_generator(frame1, mid_frame, num_recursions - 1, bar)
        yield from self._recursive_generator(mid_frame, frame2, num_recursions - 1, bar)

def interpolate_recursively(self, frame1: np.ndarray, frame2: np.ndarray, times_to_interpolate: int) -> Generator[np.ndarray, None, None]:
    """Generates interpolated frames by repeatedly interpolating the midpoint.

    Args:
      frame1: Input image 1.
      frame2: Input image 2.
      times_to_interpolate: Number of times to do recursive midpoint
        interpolation.

    Yields:
      The interpolated frames (including the inputs).
    """
    num_frames = 2 ** (times_to_interpolate) - 1
    bar = tqdm(total=num_frames)
    yield from self._recursive_generator(frame1, frame2, times_to_interpolate, bar)
    # Separately yield the final frame.
    yield frame2

core = ov.Core()
device = "GPU"
MODEL_PATH = Path("models/model.xml")
OV_OUTPUT_VIDEO_PATH = "data/ov_output.mp4"
compiled_model = core.compile_model(MODEL_PATH, device, {hints.performance_mode(): hints.PerformanceMode.LATENCY})

IMAGES = {
"https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/c3ddf65f-95ec-44ca-9ed4-3ef2d8f4b47e": Path("data/one.jpg"),
"https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/6d21f1ce-69eb-41b5-aedd-6e3c29013b30": Path("data/two.jpg"),
}

TIMES_TO_INTERPOLATE = 1

def preprocess_np_frame(frame):
result = frame.astype(np.float32) / 255 # normalize to [0, 1]
result = result[np.newaxis, ...] # add batch dim
return result

def prepare_input(img_url: str):
if not IMAGES[img_url].exists():
r = requests.get(img_url)
with IMAGES[img_url].open("wb") as f:
f.write(r.content)
filename = str(IMAGES[img_url])
img = cv2.imread(filename)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = np.array(img)
img = preprocess_np_frame(img)

return img

def save_as_video(frames: Generator[np.ndarray, None, None], width: int, height: int, filename: Path):
out = cv2.VideoWriter(str(filename), cv2.VideoWriter_fourcc(*"VP90"), 30, (width, height))
for frame in frames:
img = frame[0]
img = np.clip(img, 0, 1)
rgb_img = img * 255
rgb_img = rgb_img.astype(np.uint8)
bgr_img = cv2.cvtColor(rgb_img, cv2.COLOR_RGB2BGR)
out.write(bgr_img)
out.release()

'''
input_layer = compiled_model.input(0)
output_layer = compiled_model.output(0)

Get the input size

num, height, width, channels = input_layer.shape
print("Model input shape:", num, height, width, channels)
'''

input_images = [prepare_input(url) for url in IMAGES]

height, width = input_images[0][0].shape[:2]
ov_interpolator = Interpolator(compiled_model)

for i in range(5):
start = time.time()
frames = ov_interpolator.interpolate_recursively(input_images[0], input_images[1], TIMES_TO_INTERPOLATE)
save_as_video(frames, width, height, OV_OUTPUT_VIDEO_PATH)
end = time.time()
print(end-start)

Issue submission checklist

  • I'm reporting a performance issue. It's not a question.
  • I checked the problem with the documentation, FAQ, open issues, Stack Overflow, etc., and have not found a solution.
  • There is reproducer code and related data files such as images, videos, models, etc.
@eugeooi eugeooi added performance Performance related topics support_request labels Jan 21, 2025
@eugeooi eugeooi closed this as completed Jan 21, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
performance Performance related topics support_request
Projects
None yet
Development

No branches or pull requests

1 participant