Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update RVC2 and RVC4 benchmark scripts to work with the dai Benchmark Nodes #64

Open
wants to merge 27 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
c424740
Update benchmark scirpt for RVC2 using daiv3
ptoupas Jan 8, 2025
6cb5a2c
Add dai based benchmark execution for RVC4 device
ptoupas Jan 9, 2025
c4b1a5d
Ignore latency measurements on dai based benchmark reports
ptoupas Jan 9, 2025
e97a453
Update is_hubai_available to work with hubAI API calls
ptoupas Jan 10, 2025
7295d96
Update is_hubai_available to work with various teams from HubAI
ptoupas Jan 10, 2025
82a7044
Remove removeprefix to work with python version 3.8 [skip ci]
ptoupas Jan 10, 2025
a34b9ed
Fix test_modifier test error with EfficientVIT model and change the A…
ptoupas Jan 10, 2025
44a097b
Update .pre-commit-config.yaml
ptoupas Jan 10, 2025
7d4d223
Fix model path and HubAI model slug parsing [ci skip]
ptoupas Jan 13, 2025
57b8982
Add HUBAI_API_KEY to getModelFromZoo calls [ci skip]
ptoupas Jan 13, 2025
d6e5da1
Update Benchmarking Section of README file [ci skip]
ptoupas Jan 13, 2025
4d3bc5b
Update .pre-commit-config.yaml [ci skip]
ptoupas Jan 13, 2025
e8bc974
Fix dlc parsing on Benchmark __init__
ptoupas Jan 14, 2025
e2a7ed7
Update the way modify_onnx optimisation runs are conducted in the ONN…
ptoupas Jan 14, 2025
cd2b088
Fix SNPE benchmark on RVC4 and added support for benchmark over model…
ptoupas Jan 14, 2025
addc5f1
Updated ONNX version (#56)
kozlov721 Jan 15, 2025
f0149cd
Update the RVC4 benchmark to take into account the data type for each…
ptoupas Jan 16, 2025
2753987
Merge remote-tracking branch 'origin' into fix/update-benchmarks-scri…
ptoupas Jan 16, 2025
8dfdb84
Update .pre-commit-config.yaml [ci skip]
ptoupas Jan 16, 2025
b58782c
Fix issue when extracting the model from NNArchive in snpe benchmark …
ptoupas Jan 27, 2025
9b2a602
Add bool tensor type during evaluation of onnx models on ONNXModifier…
ptoupas Jan 27, 2025
e081181
Add a try except block on onnx optimisation and validation.
ptoupas Jan 27, 2025
9cd7158
Merge remote-tracking branch 'origin' into fix/update-benchmarks-scri…
ptoupas Jan 28, 2025
565ae6e
add disable_onnx_optimisation flag on the example defaults.yaml file
ptoupas Jan 28, 2025
d37ec5e
Update dai requirement to version 3.0.0a12 [ci skip]
ptoupas Jan 29, 2025
0548541
Add botocore requirement
ptoupas Jan 29, 2025
c2f91f2
Remove the extra-index-url from the requirements-bench.txt file
ptoupas Jan 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
default_language_version:
python: python3
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.2
Expand Down
21 changes: 20 additions & 1 deletion modelconverter/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,12 @@ def benchmark(

**RVC2**

- `--repetitions`: The number of repetitions to perform. Default: `1`
- `--repetitions`: The number of repetitions to perform. Default: `10`

- `--num-threads`: The number of threads to use for inference. Default: `2`

- `--num-messages`: The number of messages to measure for each report. Default: `50`

---

**RVC3**
Expand All @@ -191,8 +193,18 @@ def benchmark(

- `--profile`: The SNPE profile to use for inference. Default: `"default"`

- `--runtime`: The SNPE runtime to use for inference (dsp or cpu). Default: `"dsp"`

- `--num-images`: The number of images to use for inference. Default: `1000`

- `--dai-benchmark`: Whether to run the benchmark using the DAI V3. If False the SNPE tools are used. Default: `True`

- `--repetitions`: The number of repetitions to perform (dai-benchmark only). Default: `10`

- `--num-threads`: The number of threads to use for inference (dai-benchmark only). Default: `1`

- `--num-messages`: The number of messages to measure for each report (dai-benchmark only). Default: `50`

---
"""

Expand All @@ -203,6 +215,13 @@ def benchmark(
key = key[2:].replace("-", "_")
else:
raise typer.BadParameter(f"Unknown argument: {key}")
if key == "dai_benchmark":
value = value.capitalize()
if value not in ["True", "False"]:
raise typer.BadParameter(
"dai_benchmark must be either True or False"
)
value = value == "True"
kwargs[key] = value
Benchmark = get_benchmark(target)
benchmark = Benchmark(str(model_path))
Expand Down
40 changes: 28 additions & 12 deletions modelconverter/packages/base_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pandas as pd
from typing_extensions import TypeAlias

from modelconverter.utils import resolve_path
from modelconverter.utils import is_hubai_available, resolve_path

logger = getLogger(__name__)

Expand All @@ -28,9 +28,14 @@ def __init__(
model_path: str,
dataset_path: Optional[Path] = None,
):
self.model_path = resolve_path(model_path, Path.cwd())
if not is_hubai_available(model_path):
self.model_path = resolve_path(model_path, Path.cwd())
self.model_name = self.model_path.stem
else:
self.model_path = model_path
self.model_name = self.model_path.split("/", 1)[-1]
self.dataset_path = dataset_path
self.model_name = self.model_path.stem

self.header = [
*self.default_configuration.keys(),
"fps",
Expand Down Expand Up @@ -64,7 +69,13 @@ def print_results(
title=f"Benchmark Results for [yellow]{self.model_name}",
box=box.ROUNDED,
)
for field in self.header:

updated_header = [
*results[0][0].keys(),
"fps",
"latency (ms)",
]
for field in updated_header:
table.add_column(f"[cyan]{field}")
for configuration, result in results:
fps_color = (
Expand All @@ -74,17 +85,22 @@ def print_results(
if result.fps < 5
else "green"
)
latency_color = (
"yellow"
if 50 < result.latency < 100
else "red"
if result.latency > 100
else "green"
)
if isinstance(result.latency, str):
latency_color = "orange3"
else:
latency_color = (
"yellow"
if 50 < result.latency < 100
else "red"
if result.latency > 100
else "green"
)
table.add_row(
*map(lambda x: f"[magenta]{x}", configuration.values()),
f"[{fps_color}]{result.fps:.2f}",
f"[{latency_color}]{result.latency:.5f}",
f"[{latency_color}]{result.latency}"
if isinstance(result.latency, str)
else f"[{latency_color}]{result.latency:.5f}",
)
console = Console()
console.print(table)
Expand Down
222 changes: 93 additions & 129 deletions modelconverter/packages/rvc2/benchmark.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import logging
import time
from pathlib import Path
from typing import Dict, List, cast
from typing import List

import depthai as dai
import numpy as np
from depthai import NNData
from rich.progress import Progress

from ..base_benchmark import Benchmark, BenchmarkResult, Configuration
Expand All @@ -20,150 +18,116 @@ def default_configuration(self) -> Configuration:
repetitions: The number of repetitions to perform.
num_threads: The number of threads to use for inference.
"""
return {"repetitions": 1, "num_threads": 2}
return {"repetitions": 10, "num_messages": 50, "num_threads": 2}

@property
def all_configurations(self) -> List[Configuration]:
return [
{"repetitions": 5, "num_threads": 1},
{"repetitions": 5, "num_threads": 2},
{"repetitions": 5, "num_threads": 3},
{"repetitions": 10, "num_messages": 50, "num_threads": 1},
{"repetitions": 10, "num_messages": 50, "num_threads": 2},
{"repetitions": 10, "num_messages": 50, "num_threads": 3},
]

def benchmark(self, configuration: Configuration) -> BenchmarkResult:
return self._benchmark(self.model_path, **configuration)

@staticmethod
def _benchmark(
model_path: Path, repetitions: int, num_threads: int
model_path: Path | str,
repetitions: int,
num_messages: int,
num_threads: int,
) -> BenchmarkResult:
model = dai.OpenVINO.Blob(model_path)
input_name_shape: Dict[str, List[int]] = {}
input_name_type = {}
for i in list(model.networkInputs):
input_name_shape[i] = model.networkInputs[i].dims
input_name_type[i] = model.networkInputs[i].dataType.name

output_name_shape = {}
output_name_type = {}
for i in list(model.networkOutputs):
output_name_shape[i] = model.networkOutputs[i].dims
output_name_type[i] = model.networkOutputs[i].dataType.name

pipeline = dai.Pipeline()

detection_nn = pipeline.createNeuralNetwork()
detection_nn.setBlobPath(model_path)
detection_nn.setNumInferenceThreads(num_threads)
detection_nn.input.setBlocking(True)
detection_nn.input.setQueueSize(1)

nn_in = pipeline.createXLinkIn()
nn_in.setMaxDataSize(6291456)
nn_in.setStreamName("in_nn")
nn_in.out.link(detection_nn.input)

xout_nn = pipeline.createXLinkOut()
xout_nn.setStreamName("nn")
xout_nn.input.setQueueSize(1)
xout_nn.input.setBlocking(True)
detection_nn.out.link(xout_nn.input)

xlink_buffer_max_size = 5 * 1024 * 1024
product_sum = sum(
map(lambda x: np.product(np.array(x)), output_name_shape.values())
)

xlink_buffer_count = int(xlink_buffer_max_size / product_sum)

logger.info(f"XLink buffer count: {xlink_buffer_count}")
if xlink_buffer_count > 1000:
logger.warning(
"XLink buffer count is too high! "
"The benchmarking will take more time and "
"the results may be overestimated."
device = dai.Device()
if device.getPlatform() != dai.Platform.RVC2:
raise ValueError(
f"Found {device.getPlatformAsString()}, expected RVC2 platform."
)

with dai.Device(pipeline) as device, Progress() as progress:
device = cast(dai.Device, device)
detection_in_count = 100 + xlink_buffer_count
detection_in = device.getInputQueue(
"in_nn", maxSize=detection_in_count, blocking=True
if isinstance(model_path, str):
modelPath = dai.getModelFromZoo(
dai.NNModelDescription(
model_path,
platform=device.getPlatformAsString(),
)
)
q_nn = device.getOutputQueue(name="nn", maxSize=1, blocking=True)
elif str(model_path).endswith(".tar.xz"):
modelPath = str(model_path)
elif str(model_path).endswith(".blob"):
modelPath = model_path
else:
raise ValueError(
"Unsupported model format. Supported formats: .tar.xz, .blob, or HubAI model slug."
)

inputSizes = []
inputNames = []
if isinstance(model_path, str) or str(model_path).endswith(".tar.xz"):
modelArhive = dai.NNArchive(modelPath)
for input in modelArhive.getConfig().model.inputs:
inputSizes.append(input.shape[::-1])
inputNames.append(input.name)
elif str(model_path).endswith(".blob"):
blob_model = dai.OpenVINO.Blob(modelPath)
for input in blob_model.networkInputs:
inputSizes.append(blob_model.networkInputs[input].dims)
inputNames.append(input)

inputData = dai.NNData()
for name, inputSize in zip(inputNames, inputSizes):
img = np.random.randint(
0, 255, (inputSize[1], inputSize[0], 3), np.uint8
)
inputData.addTensor(name, img)

fps_storage = []
diffs = []
time.sleep(1)
with dai.Pipeline(device) as pipeline, Progress() as progress:
repet_task = progress.add_task(
"[magenta]Repetition", total=repetitions
)
infer_task = progress.add_task(
"[magenta]Inference", total=300 + 2 * xlink_buffer_count
)
for _ in range(repetitions):
progress.reset(infer_task, total=300 + 2 * xlink_buffer_count)
for _ in range(100 + xlink_buffer_count):
nn_data = dai.NNData()
for inp_name in input_name_shape:
if input_name_type[inp_name] in ["FLOAT16", "FLOAT32"]:
frame = cast(
np.ndarray,
np.random.rand(*input_name_shape[inp_name]),
)
frame = frame.astype(
"float16"
if input_name_type[inp_name] == "FLOAT16"
else "float32"
)
elif input_name_type[inp_name] in ["INT", "I8", "U8F"]:
frame = np.random.randint(
256,
size=input_name_shape[inp_name],
dtype=(
np.int32
if input_name_type[inp_name] == "INT"
else (
np.uint8
if input_name_type[inp_name] == "U8F"
else np.int8
)
),
)
else:
raise RuntimeError(
f"Unknown input type detected: {input_name_type[inp_name]}!"
)

nn_data.setLayer(inp_name, frame)

if len(input_name_shape) == 0:
raise RuntimeError(
"Failed to create input data: missing required information for one or more input layers."
)
detection_in.send(nn_data)
progress.update(infer_task, advance=1)

for _ in range(100):
progress.update(infer_task, advance=1)
time.sleep(3 / 100)

for _ in range(40 + xlink_buffer_count):
cast(NNData, q_nn.get()).getFirstLayerFp16()
progress.update(infer_task, advance=1)

start = time.time()
for _ in range(50):
cast(NNData, q_nn.get()).getFirstLayerFp16()
progress.update(infer_task, advance=1)
diff = time.time() - start
diffs.append(diff / 50)
fps_storage.append(50 / diff)

for _ in range(10):
cast(NNData, q_nn.get()).getFirstLayerFp16()
progress.update(infer_task, advance=1)

benchmarkOut = pipeline.create(dai.node.BenchmarkOut)
benchmarkOut.setRunOnHost(False)
benchmarkOut.setFps(-1)

neuralNetwork = pipeline.create(dai.node.NeuralNetwork)
if isinstance(model_path, str) or str(model_path).endswith(
".tar.xz"
):
neuralNetwork.setNNArchive(modelArhive)
elif str(model_path).endswith(".blob"):
neuralNetwork.setBlobPath(modelPath)
neuralNetwork.setNumInferenceThreads(num_threads)

benchmarkIn = pipeline.create(dai.node.BenchmarkIn)
benchmarkIn.setRunOnHost(False)
benchmarkIn.sendReportEveryNMessages(num_messages)
benchmarkIn.logReportsAsWarnings(False)

benchmarkOut.out.link(neuralNetwork.input)
neuralNetwork.out.link(benchmarkIn.input)

outputQueue = benchmarkIn.report.createOutputQueue()
inputQueue = benchmarkOut.input.createInputQueue()

pipeline.start()
inputQueue.send(inputData)

rep = 0
fps_list = []
avg_latency_list = []
while pipeline.isRunning() and rep < repetitions:
benchmarkReport = outputQueue.get()
if not isinstance(benchmarkReport, dai.BenchmarkReport):
raise ValueError(
f"Expected BenchmarkReport, got {type(benchmarkReport)}"
)
fps = benchmarkReport.fps
avg_latency = benchmarkReport.averageLatency * 1000

fps_list.append(fps)
avg_latency_list.append(avg_latency)
progress.update(repet_task, advance=1)
rep += 1

diffs = np.array(diffs) * 1000
return BenchmarkResult(np.mean(fps_storage), np.mean(diffs))
# Currently, the latency measurement is not supported on RVC2 by the depthai library.
return BenchmarkResult(np.mean(fps_list), "N/A")
Loading
Loading