diff --git a/.github/workflows/modelconverter_test.yaml b/.github/workflows/modelconverter_test.yaml index 50f9c73..e5bb82a 100644 --- a/.github/workflows/modelconverter_test.yaml +++ b/.github/workflows/modelconverter_test.yaml @@ -51,7 +51,7 @@ jobs: cache: pip - name: Install dependencies - run: pip install -e .[dev] + run: pip install -e .[dev] --extra-index-url https://artifacts.luxonis.com/artifactory/luxonis-python-release-local/ - name: Authenticate to Google Cloud id: google-auth diff --git a/.github/workflows/unittests.yaml b/.github/workflows/unittests.yaml index 39fbfcb..a92ef85 100644 --- a/.github/workflows/unittests.yaml +++ b/.github/workflows/unittests.yaml @@ -25,7 +25,7 @@ jobs: cache: pip - name: Install package - run: python -m pip install -e .[dev] + run: python -m pip install -e .[dev] --extra-index-url https://artifacts.luxonis.com/artifactory/luxonis-python-release-local/ - name: Run Unit Tests env: diff --git a/README.md b/README.md index b24ee18..5f7bd71 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,13 @@ pip install modelconv Run `modelconverter --help` to see the available commands and options. +> \[!NOTE\] +> To use the [benchmarking feature](#benchmarking), the `depthai v3` package must be installed. While the `depthai v3` is not yet released on PyPI, you can install it with the following command: +> +> ```bash +> pip install -r requirements-bench.txt --extra-index-url https://artifacts.luxonis.com/artifactory/luxonis-python-release-local/ +> ``` + ## Configuration There are two main ways to execute configure the conversion process: @@ -437,3 +444,6 @@ modelconverter benchmark rvc3 --model-path The command prints a table with the benchmark results to the console and optionally saves the results to a `.csv` file. + +> \[!NOTE\] +> For **RVC2** and **RVC4**: The `--model-path` can be a path to a local .blob file, a NN Archive file (.tar.xz), or a name of a model slug from [Luxonis HubAI](https://hub.luxonis.com/ai). To access models from different teams in Luxonis HubAI, remember to update the HUBAI_API_KEY environment variable respectively. diff --git a/modelconverter/__main__.py b/modelconverter/__main__.py index 5d73669..6c2a7ab 100644 --- a/modelconverter/__main__.py +++ b/modelconverter/__main__.py @@ -175,10 +175,12 @@ def benchmark( **RVC2** - - `--repetitions`: The number of repetitions to perform. Default: `1` + - `--repetitions`: The number of repetitions to perform. Default: `10` - `--num-threads`: The number of threads to use for inference. Default: `2` + - `--num-messages`: The number of messages to measure for each report. Default: `50` + --- **RVC3** @@ -191,8 +193,18 @@ def benchmark( - `--profile`: The SNPE profile to use for inference. Default: `"default"` + - `--runtime`: The SNPE runtime to use for inference (dsp or cpu). Default: `"dsp"` + - `--num-images`: The number of images to use for inference. Default: `1000` + - `--dai-benchmark`: Whether to run the benchmark using the DAI V3. If False the SNPE tools are used. Default: `True` + + - `--repetitions`: The number of repetitions to perform (dai-benchmark only). Default: `10` + + - `--num-threads`: The number of threads to use for inference (dai-benchmark only). Default: `1` + + - `--num-messages`: The number of messages to measure for each report (dai-benchmark only). Default: `50` + --- """ @@ -203,6 +215,13 @@ def benchmark( key = key[2:].replace("-", "_") else: raise typer.BadParameter(f"Unknown argument: {key}") + if key == "dai_benchmark": + value = value.capitalize() + if value not in ["True", "False"]: + raise typer.BadParameter( + "dai_benchmark must be either True or False" + ) + value = value == "True" kwargs[key] = value Benchmark = get_benchmark(target) benchmark = Benchmark(str(model_path)) diff --git a/modelconverter/packages/base_benchmark.py b/modelconverter/packages/base_benchmark.py index e3eba9a..0fb3d33 100644 --- a/modelconverter/packages/base_benchmark.py +++ b/modelconverter/packages/base_benchmark.py @@ -1,3 +1,4 @@ +import re from abc import ABC, abstractmethod from collections import namedtuple from logging import getLogger @@ -7,7 +8,7 @@ import pandas as pd from typing_extensions import TypeAlias -from modelconverter.utils import resolve_path +from modelconverter.utils import is_hubai_available, resolve_path logger = getLogger(__name__) @@ -23,14 +24,36 @@ class Benchmark(ABC): + VALID_EXTENSIONS = (".tar.xz", ".blob", ".dlc") + HUB_MODEL_PATTERN = re.compile(r"^(?:([^/]+)/)?([^:]+):(.+)$") + def __init__( self, model_path: str, dataset_path: Optional[Path] = None, ): - self.model_path = resolve_path(model_path, Path.cwd()) + if any(model_path.endswith(ext) for ext in self.VALID_EXTENSIONS): + self.model_path = resolve_path(model_path, Path.cwd()) + self.model_name = self.model_path.stem + else: + hub_match = self.HUB_MODEL_PATTERN.match(model_path) + if not hub_match: + raise ValueError( + "Invalid 'model-path' format. Expected either:\n" + "- Model file path: path/to/model.blob, path/to/model.dlc or path/to/model.tar.xz\n" + "- HubAI model slug: [team_name/]model_name:variant" + ) + team_name, model_name, model_variant = hub_match.groups() + if is_hubai_available(model_name, model_variant): + self.model_path = model_path + self.model_name = model_name + else: + raise ValueError( + f"Model {team_name+'/' if team_name else ''}{model_name}:{model_variant} not found in HubAI." + ) + self.dataset_path = dataset_path - self.model_name = self.model_path.stem + self.header = [ *self.default_configuration.keys(), "fps", @@ -64,7 +87,13 @@ def print_results( title=f"Benchmark Results for [yellow]{self.model_name}", box=box.ROUNDED, ) - for field in self.header: + + updated_header = [ + *results[0][0].keys(), + "fps", + "latency (ms)", + ] + for field in updated_header: table.add_column(f"[cyan]{field}") for configuration, result in results: fps_color = ( @@ -74,17 +103,22 @@ def print_results( if result.fps < 5 else "green" ) - latency_color = ( - "yellow" - if 50 < result.latency < 100 - else "red" - if result.latency > 100 - else "green" - ) + if isinstance(result.latency, str): + latency_color = "orange3" + else: + latency_color = ( + "yellow" + if 50 < result.latency < 100 + else "red" + if result.latency > 100 + else "green" + ) table.add_row( *map(lambda x: f"[magenta]{x}", configuration.values()), f"[{fps_color}]{result.fps:.2f}", - f"[{latency_color}]{result.latency:.5f}", + f"[{latency_color}]{result.latency}" + if isinstance(result.latency, str) + else f"[{latency_color}]{result.latency:.5f}", ) console = Console() console.print(table) diff --git a/modelconverter/packages/rvc2/benchmark.py b/modelconverter/packages/rvc2/benchmark.py index 808edd0..e57ff3d 100644 --- a/modelconverter/packages/rvc2/benchmark.py +++ b/modelconverter/packages/rvc2/benchmark.py @@ -1,13 +1,13 @@ import logging -import time from pathlib import Path -from typing import Dict, List, cast +from typing import List import depthai as dai import numpy as np -from depthai import NNData from rich.progress import Progress +from modelconverter.utils import environ + from ..base_benchmark import Benchmark, BenchmarkResult, Configuration logger = logging.getLogger(__name__) @@ -20,14 +20,14 @@ def default_configuration(self) -> Configuration: repetitions: The number of repetitions to perform. num_threads: The number of threads to use for inference. """ - return {"repetitions": 1, "num_threads": 2} + return {"repetitions": 10, "num_messages": 50, "num_threads": 2} @property def all_configurations(self) -> List[Configuration]: return [ - {"repetitions": 5, "num_threads": 1}, - {"repetitions": 5, "num_threads": 2}, - {"repetitions": 5, "num_threads": 3}, + {"repetitions": 10, "num_messages": 50, "num_threads": 1}, + {"repetitions": 10, "num_messages": 50, "num_threads": 2}, + {"repetitions": 10, "num_messages": 50, "num_threads": 3}, ] def benchmark(self, configuration: Configuration) -> BenchmarkResult: @@ -35,135 +35,102 @@ def benchmark(self, configuration: Configuration) -> BenchmarkResult: @staticmethod def _benchmark( - model_path: Path, repetitions: int, num_threads: int + model_path: Path | str, + repetitions: int, + num_messages: int, + num_threads: int, ) -> BenchmarkResult: - model = dai.OpenVINO.Blob(model_path) - input_name_shape: Dict[str, List[int]] = {} - input_name_type = {} - for i in list(model.networkInputs): - input_name_shape[i] = model.networkInputs[i].dims - input_name_type[i] = model.networkInputs[i].dataType.name - - output_name_shape = {} - output_name_type = {} - for i in list(model.networkOutputs): - output_name_shape[i] = model.networkOutputs[i].dims - output_name_type[i] = model.networkOutputs[i].dataType.name - - pipeline = dai.Pipeline() - - detection_nn = pipeline.createNeuralNetwork() - detection_nn.setBlobPath(model_path) - detection_nn.setNumInferenceThreads(num_threads) - detection_nn.input.setBlocking(True) - detection_nn.input.setQueueSize(1) - - nn_in = pipeline.createXLinkIn() - nn_in.setMaxDataSize(6291456) - nn_in.setStreamName("in_nn") - nn_in.out.link(detection_nn.input) - - xout_nn = pipeline.createXLinkOut() - xout_nn.setStreamName("nn") - xout_nn.input.setQueueSize(1) - xout_nn.input.setBlocking(True) - detection_nn.out.link(xout_nn.input) - - xlink_buffer_max_size = 5 * 1024 * 1024 - product_sum = sum( - map(lambda x: np.product(np.array(x)), output_name_shape.values()) - ) - - xlink_buffer_count = int(xlink_buffer_max_size / product_sum) - - logger.info(f"XLink buffer count: {xlink_buffer_count}") - if xlink_buffer_count > 1000: - logger.warning( - "XLink buffer count is too high! " - "The benchmarking will take more time and " - "the results may be overestimated." + device = dai.Device() + if device.getPlatform() != dai.Platform.RVC2: + raise ValueError( + f"Found {device.getPlatformAsString()}, expected RVC2 platform." + ) + + if isinstance(model_path, str): + modelPath = dai.getModelFromZoo( + dai.NNModelDescription( + model_path, + platform=device.getPlatformAsString(), + ), + apiKey=environ.HUBAI_API_KEY if environ.HUBAI_API_KEY else "", + ) + elif str(model_path).endswith(".tar.xz"): + modelPath = str(model_path) + elif str(model_path).endswith(".blob"): + modelPath = model_path + else: + raise ValueError( + "Unsupported model format. Supported formats: .tar.xz, .blob, or HubAI model slug." ) - with dai.Device(pipeline) as device, Progress() as progress: - device = cast(dai.Device, device) - detection_in_count = 100 + xlink_buffer_count - detection_in = device.getInputQueue( - "in_nn", maxSize=detection_in_count, blocking=True + inputSizes = [] + inputNames = [] + if isinstance(model_path, str) or str(model_path).endswith(".tar.xz"): + modelArhive = dai.NNArchive(modelPath) + for input in modelArhive.getConfig().model.inputs: + inputSizes.append(input.shape[::-1]) + inputNames.append(input.name) + elif str(model_path).endswith(".blob"): + blob_model = dai.OpenVINO.Blob(modelPath) + for input in blob_model.networkInputs: + inputSizes.append(blob_model.networkInputs[input].dims) + inputNames.append(input) + + inputData = dai.NNData() + for name, inputSize in zip(inputNames, inputSizes): + img = np.random.randint( + 0, 255, (inputSize[1], inputSize[0], 3), np.uint8 ) - q_nn = device.getOutputQueue(name="nn", maxSize=1, blocking=True) + inputData.addTensor(name, img) - fps_storage = [] - diffs = [] - time.sleep(1) + with dai.Pipeline(device) as pipeline, Progress() as progress: repet_task = progress.add_task( "[magenta]Repetition", total=repetitions ) - infer_task = progress.add_task( - "[magenta]Inference", total=300 + 2 * xlink_buffer_count - ) - for _ in range(repetitions): - progress.reset(infer_task, total=300 + 2 * xlink_buffer_count) - for _ in range(100 + xlink_buffer_count): - nn_data = dai.NNData() - for inp_name in input_name_shape: - if input_name_type[inp_name] in ["FLOAT16", "FLOAT32"]: - frame = cast( - np.ndarray, - np.random.rand(*input_name_shape[inp_name]), - ) - frame = frame.astype( - "float16" - if input_name_type[inp_name] == "FLOAT16" - else "float32" - ) - elif input_name_type[inp_name] in ["INT", "I8", "U8F"]: - frame = np.random.randint( - 256, - size=input_name_shape[inp_name], - dtype=( - np.int32 - if input_name_type[inp_name] == "INT" - else ( - np.uint8 - if input_name_type[inp_name] == "U8F" - else np.int8 - ) - ), - ) - else: - raise RuntimeError( - f"Unknown input type detected: {input_name_type[inp_name]}!" - ) - - nn_data.setLayer(inp_name, frame) - - if len(input_name_shape) == 0: - raise RuntimeError( - "Failed to create input data: missing required information for one or more input layers." - ) - detection_in.send(nn_data) - progress.update(infer_task, advance=1) - - for _ in range(100): - progress.update(infer_task, advance=1) - time.sleep(3 / 100) - - for _ in range(40 + xlink_buffer_count): - cast(NNData, q_nn.get()).getFirstLayerFp16() - progress.update(infer_task, advance=1) - - start = time.time() - for _ in range(50): - cast(NNData, q_nn.get()).getFirstLayerFp16() - progress.update(infer_task, advance=1) - diff = time.time() - start - diffs.append(diff / 50) - fps_storage.append(50 / diff) - - for _ in range(10): - cast(NNData, q_nn.get()).getFirstLayerFp16() - progress.update(infer_task, advance=1) + + benchmarkOut = pipeline.create(dai.node.BenchmarkOut) + benchmarkOut.setRunOnHost(False) + benchmarkOut.setFps(-1) + + neuralNetwork = pipeline.create(dai.node.NeuralNetwork) + if isinstance(model_path, str) or str(model_path).endswith( + ".tar.xz" + ): + neuralNetwork.setNNArchive(modelArhive) + elif str(model_path).endswith(".blob"): + neuralNetwork.setBlobPath(modelPath) + neuralNetwork.setNumInferenceThreads(num_threads) + + benchmarkIn = pipeline.create(dai.node.BenchmarkIn) + benchmarkIn.setRunOnHost(False) + benchmarkIn.sendReportEveryNMessages(num_messages) + benchmarkIn.logReportsAsWarnings(False) + + benchmarkOut.out.link(neuralNetwork.input) + neuralNetwork.out.link(benchmarkIn.input) + + outputQueue = benchmarkIn.report.createOutputQueue() + inputQueue = benchmarkOut.input.createInputQueue() + + pipeline.start() + inputQueue.send(inputData) + + rep = 0 + fps_list = [] + avg_latency_list = [] + while pipeline.isRunning() and rep < repetitions: + benchmarkReport = outputQueue.get() + if not isinstance(benchmarkReport, dai.BenchmarkReport): + raise ValueError( + f"Expected BenchmarkReport, got {type(benchmarkReport)}" + ) + fps = benchmarkReport.fps + avg_latency = benchmarkReport.averageLatency * 1000 + + fps_list.append(fps) + avg_latency_list.append(avg_latency) progress.update(repet_task, advance=1) + rep += 1 - diffs = np.array(diffs) * 1000 - return BenchmarkResult(np.mean(fps_storage), np.mean(diffs)) + # Currently, the latency measurement is not supported on RVC2 by the depthai library. + return BenchmarkResult(np.mean(fps_list), "N/A") diff --git a/modelconverter/packages/rvc4/benchmark.py b/modelconverter/packages/rvc4/benchmark.py index 4e71bd0..b449766 100644 --- a/modelconverter/packages/rvc4/benchmark.py +++ b/modelconverter/packages/rvc4/benchmark.py @@ -1,15 +1,19 @@ import io +import json import logging import re +import shutil import subprocess import tempfile from pathlib import Path from typing import Dict, Final, List, Optional, Tuple, cast +import depthai as dai import numpy as np import pandas as pd +from rich.progress import Progress -from modelconverter.utils import subprocess_run +from modelconverter.utils import environ, subprocess_run from ..base_benchmark import Benchmark, BenchmarkResult, Configuration @@ -29,6 +33,11 @@ "system_settings", ] +RUNTIMES: Dict[str, str] = { + "dsp": "use_dsp", + "cpu": "use_cpu", +} + class AdbHandler: def __init__(self, device_id: Optional[str] = None) -> None: @@ -71,29 +80,49 @@ def push(self, src: str, dst: str) -> Tuple[int, str, str]: class RVC4Benchmark(Benchmark): adb = AdbHandler() + force_cpu: bool = False @property def default_configuration(self) -> Configuration: """ profile: The SNPE profile to use for inference. + runtime: The SNPE runtime to use for inference. num_images: The number of images to use for inference. + dai_benchmark: Whether to use the DepthAI for benchmarking. + repetitions: The number of repetitions to perform (dai-benchmark only). + num_threads: The number of threads to use for inference (dai-benchmark only). + num_messages: The number of messages to use for inference (dai-benchmark only). """ - return {"profile": "default", "num_images": 1000} + return { + "profile": "default", + "runtime": "dsp", + "num_images": 1000, + "dai_benchmark": True, + "repetitions": 10, + "num_threads": 1, + "num_messages": 50, + } @property def all_configurations(self) -> List[Configuration]: return [{"profile": profile} for profile in PROFILES] - def _get_input_sizes(self) -> Dict[str, List[int]]: + def _get_input_sizes(self) -> Tuple[Dict[str, List[int]], Dict[str, str]]: csv_path = Path("info.csv") subprocess_run( - ["snpe-dlc-info", "-i", self.model_path, "-s", csv_path] + [ + "snpe-dlc-info", + "-i", + self.model_path, + "-s", + csv_path, + ] ) content = csv_path.read_text() csv_path.unlink() start_marker = "Input Name,Dimensions,Type,Encoding Info" - end_marker = "Total parameters:" + end_marker = "Output Name,Dimensions,Type,Encoding Info" start_index = content.find(start_marker) end_index = content.find(end_marker, start_index) @@ -106,18 +135,34 @@ def _get_input_sizes(self) -> Dict[str, List[int]]: ) for _, row in df.iterrows() } - return sizes + data_types = { + str(row["Input Name"]): str(row["Type"]) + for _, row in df.iterrows() + } + + return sizes, data_types def _prepare_raw_inputs(self, num_images: int) -> None: - input_sizes = self._get_input_sizes() + input_sizes, data_types = self._get_input_sizes() input_list = "" self.adb.shell(f"mkdir /data/local/tmp/{self.model_name}/inputs") for i in range(num_images): for name, size in input_sizes.items(): + if data_types[name] == "Float_32": + self.force_cpu = True + numpy_type = np.float32 + elif data_types[name] == "Float_16": + numpy_type = np.float16 + elif data_types[name] == "uFxp_8": + numpy_type = np.uint8 + else: + raise ValueError( + f"Unsupported data type {data_types[name]} for input {name}." + ) img = cast(np.ndarray, np.random.rand(*size)).astype( - np.float32 + numpy_type ) - with tempfile.TemporaryFile() as f: + with tempfile.NamedTemporaryFile() as f: img.tofile(f) self.adb.push( f.name, @@ -125,39 +170,162 @@ def _prepare_raw_inputs(self, num_images: int) -> None: ) input_list += f"{name}:=/data/local/tmp/{self.model_name}/inputs/{name}_{i}.raw " - with tempfile.NamedTemporaryFile(mode="w", delete=False) as f: + input_list += "\n" + + temp_path = tempfile.mktemp() + with open(temp_path, "w") as f: f.write(input_list) + f.flush() + try: self.adb.push( - f.name, f"/data/local/tmp/{self.model_name}/input_list.txt" + temp_path, f"/data/local/tmp/{self.model_name}/input_list.txt" ) + finally: + Path(temp_path).unlink() + + def _get_data_type(self) -> dai.TensorInfo.DataType: + """Retrieve the data type of the model inputs. If the model is not a HubAI + model, it defaults to dai.TensorInfo.DataType.U8F (INT8). + + @return: The data type of the model inputs. + @rtype: dai.TensorInfo.DataType + """ + from modelconverter.cli import Request, slug_to_id + + if not isinstance( + self.model_path, str + ) or not self.HUB_MODEL_PATTERN.match(self.model_path): + return dai.TensorInfo.DataType.U8F + + model_id = slug_to_id(self.model_name, "models") + model_variant = self.model_path.split(":")[1] + + model_variants = [] + for is_public in [True, False]: + try: + model_variants += Request.get( + "modelVersions/", + params={"model_id": model_id, "is_public": is_public}, + ) + except Exception: + continue + + model_version_id = None + for version in model_variants: + if version["variant_slug"] == model_variant: + model_version_id = version["id"] + break + + if not model_version_id: + return dai.TensorInfo.DataType.U8F + + model_instances = [] + for is_public in [True, False]: + try: + model_instances += Request.get( + "modelInstances/", + params={ + "model_id": model_id, + "model_version_id": model_version_id, + "is_public": is_public, + }, + ) + except Exception: + continue + + model_precision_type = "INT8" + for instance in model_instances: + if instance["platforms"] == ["RVC4"]: + model_precision_type = instance.get( + "model_precision_type", "INT8" + ) + break + + if model_precision_type == "FP16": + return dai.TensorInfo.DataType.FP16 + elif model_precision_type == "FP32": + self.force_cpu = True + return dai.TensorInfo.DataType.FP32 + + return dai.TensorInfo.DataType.U8F def benchmark(self, configuration: Configuration) -> BenchmarkResult: + dai_benchmark = configuration.get("dai_benchmark") try: - return self._benchmark(self.model_path, **configuration) + if dai_benchmark: + for key in ["dai_benchmark", "num_images"]: + configuration.pop(key) + return self._benchmark_dai(self.model_path, **configuration) + else: + for key in [ + "dai_benchmark", + "repetitions", + "num_threads", + "num_messages", + ]: + configuration.pop(key) + return self._benchmark_snpe(self.model_path, **configuration) finally: - # so we don't delete the wrong directory - assert self.model_name + if not dai_benchmark: + # so we don't delete the wrong directory + assert self.model_name - self.adb.shell(f"rm -rf /data/local/tmp/{self.model_name}") + self.adb.shell(f"rm -rf /data/local/tmp/{self.model_name}") - def _benchmark( - self, model_path: Path, num_images: int, profile: str + def _benchmark_snpe( + self, + model_path: Path | str, + num_images: int, + profile: str, + runtime: str, ) -> BenchmarkResult: + runtime = RUNTIMES[runtime] if runtime in RUNTIMES else "use_dsp" + + if isinstance(model_path, str): + model_archive = dai.getModelFromZoo( + dai.NNModelDescription( + model_path, + platform=dai.Platform.RVC4.name, + ), + apiKey=environ.HUBAI_API_KEY if environ.HUBAI_API_KEY else "", + ) + tmp_dir = Path(model_archive).parent / "tmp" + shutil.unpack_archive(model_archive, tmp_dir) + + dlc_model_name = json.loads((tmp_dir / "config.json").read_text())[ + "model" + ]["metadata"]["path"] + dlc_path = next(tmp_dir.rglob(dlc_model_name), None) + if not dlc_path: + raise ValueError("Could not find model.dlc in the archive.") + self.model_path = dlc_path + elif str(model_path).endswith(".dlc"): + dlc_path = model_path + else: + raise ValueError( + "Unsupported model format. Supported formats: .dlc, or HubAI model slug." + ) + self.adb.shell(f"mkdir /data/local/tmp/{self.model_name}") self.adb.push( - str(model_path), f"/data/local/tmp/{self.model_name}/model.dlc" + str(dlc_path), f"/data/local/tmp/{self.model_name}/model.dlc" ) self._prepare_raw_inputs(num_images) + if self.force_cpu: + logger.warning( + "Forcing CPU runtime due to Float_32 input data type." + ) + runtime = "use_cpu" _, stdout, _ = self.adb.shell( - "source /data/local/tmp/source_me.sh && " + # "source /data/local/tmp/source_me.sh && " "snpe-parallel-run " f"--container /data/local/tmp/{self.model_name}/model.dlc " f"--input_list /data/local/tmp/{self.model_name}/input_list.txt " f"--output_dir /data/local/tmp/{self.model_name}/outputs " f"--perf_profile {profile} " - "--cpu_fallback false " - "--use_dsp" + "--cpu_fallback true " + f"--{runtime}" ) pattern = re.compile(r"(\d+\.\d+) infs/sec") match = pattern.search(stdout) @@ -167,4 +335,120 @@ def _benchmark( f"stdout:\n{stdout}" ) fps = float(match.group(1)) - return BenchmarkResult(fps=fps, latency=0) + return BenchmarkResult(fps=fps, latency="N/A") + + def _benchmark_dai( + self, + model_path: Path | str, + profile: str, + runtime: str, + repetitions: int, + num_threads: int, + num_messages: int, + ) -> BenchmarkResult: + device = dai.Device() + + if device.getPlatform() != dai.Platform.RVC4: + raise ValueError( + f"Found {device.getPlatformAsString()}, expected RVC4 platform." + ) + + if isinstance(model_path, str): + modelPath = dai.getModelFromZoo( + dai.NNModelDescription( + model_path, + platform=device.getPlatformAsString(), + ), + apiKey=environ.HUBAI_API_KEY if environ.HUBAI_API_KEY else "", + ) + elif str(model_path).endswith(".tar.xz"): + modelPath = str(model_path) + elif str(model_path).endswith(".dlc"): + raise ValueError( + "DLC model format is not currently supported for dai-benchmark. Please use SNPE for DLC models." + ) + else: + raise ValueError( + "Unsupported model format. Supported formats: .tar.xz, or HubAI model slug." + ) + + inputSizes = [] + inputNames = [] + if isinstance(model_path, str) or str(model_path).endswith(".tar.xz"): + modelArhive = dai.NNArchive(modelPath) + for input in modelArhive.getConfig().model.inputs: + inputSizes.append(input.shape) + inputNames.append(input.name) + + data_type = self._get_data_type() + inputData = dai.NNData() + for name, inputSize in zip(inputNames, inputSizes): + img = np.random.randint(0, 255, inputSize, np.uint8) + inputData.addTensor(name, img, dataType=data_type) + + with dai.Pipeline(device) as pipeline, Progress() as progress: + repet_task = progress.add_task( + "[magenta]Repetition", total=repetitions + ) + + benchmarkOut = pipeline.create(dai.node.BenchmarkOut) + benchmarkOut.setRunOnHost(False) + benchmarkOut.setFps(-1) + + neuralNetwork = pipeline.create(dai.node.NeuralNetwork) + if isinstance(model_path, str) or str(model_path).endswith( + ".tar.xz" + ): + neuralNetwork.setNNArchive(modelArhive) + + if self.force_cpu: + logger.warning( + "Forcing CPU runtime due to Float_32 input data type." + ) + runtime = "cpu" + neuralNetwork.setBackendProperties( + { + "runtime": runtime, + "performance_profile": profile, + } + ) + if num_threads > 1: + logger.warning( + "num_threads > 1 is not supported for RVC4. Setting num_threads to 1." + ) + num_threads = 1 + neuralNetwork.setNumInferenceThreads(num_threads) + + benchmarkIn = pipeline.create(dai.node.BenchmarkIn) + benchmarkIn.setRunOnHost(False) + benchmarkIn.sendReportEveryNMessages(num_messages) + benchmarkIn.logReportsAsWarnings(False) + + benchmarkOut.out.link(neuralNetwork.input) + neuralNetwork.out.link(benchmarkIn.input) + + outputQueue = benchmarkIn.report.createOutputQueue() + inputQueue = benchmarkOut.input.createInputQueue() + + pipeline.start() + inputQueue.send(inputData) + + rep = 0 + fps_list = [] + avg_latency_list = [] + while pipeline.isRunning() and rep < repetitions: + benchmarkReport = outputQueue.get() + if not isinstance(benchmarkReport, dai.BenchmarkReport): + raise ValueError( + f"Expected BenchmarkReport, got {type(benchmarkReport)}" + ) + fps = benchmarkReport.fps + avg_latency = benchmarkReport.averageLatency * 1000 + + fps_list.append(fps) + avg_latency_list.append(avg_latency) + progress.update(repet_task, advance=1) + rep += 1 + + # Currently, the latency measurement is only supported on RVC4 when using ImgFrame as the input to the BenchmarkOut which we don't do here. + return BenchmarkResult(np.mean(fps_list), "N/A") diff --git a/modelconverter/utils/__init__.py b/modelconverter/utils/__init__.py index 9566125..7d7bf33 100644 --- a/modelconverter/utils/__init__.py +++ b/modelconverter/utils/__init__.py @@ -19,6 +19,7 @@ resolve_path, upload_file_to_remote, ) +from .hubai_utils import is_hubai_available from .image import read_calib_dir, read_image from .layout import guess_new_layout, make_default_layout from .metadata import Metadata, get_metadata @@ -45,6 +46,7 @@ "subprocess_run", "download_from_remote", "upload_file_to_remote", + "is_hubai_available", "get_protocol", "process_nn_archive", "modelconverter_config_to_nn", diff --git a/modelconverter/utils/hubai_utils.py b/modelconverter/utils/hubai_utils.py new file mode 100644 index 0000000..1229179 --- /dev/null +++ b/modelconverter/utils/hubai_utils.py @@ -0,0 +1,25 @@ +def is_hubai_available(model_name: str, model_variant: str) -> bool: + from modelconverter.cli import Request, slug_to_id + + model_slug = f"{model_name}:{model_variant}" + + model_id = slug_to_id( + model_name, + "models", + ) + + model_variants = [] + for is_public in [True, False]: + try: + model_variants += Request.get( + "modelVersions/", + params={"model_id": model_id, "is_public": is_public}, + ) + except Exception: + pass + + for version in model_variants: + if f"{model_name}:{version['variant_slug']}" == model_slug: + return True + + return False diff --git a/requirements-bench.txt b/requirements-bench.txt index a2d1086..9f3e337 100644 --- a/requirements-bench.txt +++ b/requirements-bench.txt @@ -1,2 +1,2 @@ -depthai +depthai>=3.0.0a12 pandas diff --git a/requirements.txt b/requirements.txt index cb9cb9c..35ca1da 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,4 +13,4 @@ keyring onnx_graphsurgeon onnxoptimizer wget -aiobotocore<2.18 # to be removed after luxonis-ml>=0.6.0 \ No newline at end of file +aiobotocore<2.18 # to be removed after luxonis-ml>=0.6.0 diff --git a/shared_with_container/configs/defaults.yaml b/shared_with_container/configs/defaults.yaml index 095bfbe..395b175 100644 --- a/shared_with_container/configs/defaults.yaml +++ b/shared_with_container/configs/defaults.yaml @@ -85,6 +85,9 @@ stages: # Do not run ONNX simplifier on the provided model. disable_onnx_simplification: false + # Do not run ONNX graph optimisations on the provided model. + disable_onnx_optimisation: false + # List of input names with shapes, # data types, values for freezing and input modifiers. # Overrides the top-level input modifiers.