diff --git a/.github/workflows/mac_arm64.yml b/.github/workflows/mac_arm64.yml index b60daefa442c83..7c148a98787903 100644 --- a/.github/workflows/mac_arm64.yml +++ b/.github/workflows/mac_arm64.yml @@ -4,7 +4,7 @@ on: schedule: # at 00:00 on workdays - cron: '0 0 * * 1,2,3,4,5' - #pull_request: + # pull_request: # paths-ignore: # - '**/docs/**' # - 'docs/**' @@ -12,7 +12,7 @@ on: # - '**.md' # - '**/layer_tests_summary/**' # - '**/conformance/**' - #push: + # push: # paths-ignore: # - '**/docs/**' # - 'docs/**' diff --git a/.github/workflows/ubuntu_20.yml b/.github/workflows/ubuntu_20.yml index 19760ff2551773..f78daa951ce815 100644 --- a/.github/workflows/ubuntu_20.yml +++ b/.github/workflows/ubuntu_20.yml @@ -168,7 +168,7 @@ jobs: Overall_Status: name: ci/gha_overall_status_ubuntu_20 - needs: [Smart_CI, Build, Debian_Packages, Samples] + needs: [Smart_CI, Build, Debian_Packages, Samples, CXX_Unit_Tests] if: ${{ always() }} runs-on: ubuntu-latest steps: diff --git a/docs/articles_en/about-openvino/key-features.rst b/docs/articles_en/about-openvino/key-features.rst index c751a5bc65d3cf..7e4ffab3cbb2ec 100644 --- a/docs/articles_en/about-openvino/key-features.rst +++ b/docs/articles_en/about-openvino/key-features.rst @@ -14,7 +14,7 @@ Easy Integration OpenVINO optimizations to your PyTorch models directly with a single line of code. | :doc:`GenAI Out Of The Box <../openvino-workflow-generative/inference-with-genai>` -| With the genAI flavor of OpenVINO, you can run generative AI with just a couple lines of code. +| With the OpenVINO GenAI, you can run generative models with just a few lines of code. Check out the GenAI guide for instructions on how to do it. | `Python / C++ / C / NodeJS APIs `__ diff --git a/docs/articles_en/about-openvino/performance-benchmarks.rst b/docs/articles_en/about-openvino/performance-benchmarks.rst index 4262ec6b2b3732..723bc1a96f7e9d 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks.rst @@ -132,21 +132,21 @@ For a listing of all platforms and configurations used for testing, refer to the .. grid-item:: - .. button-link:: ../_static/benchmarks_files/OV-2024.6-platform_list.pdf + .. button-link:: ../_static/downloads/benchmarking_OV_platform_list.pdf :color: primary :outline: :expand: :material-regular:`download;1.5em` Click for Hardware Platforms [PDF] - .. button-link:: ../_static/benchmarks_files/OV-2024.6-system-info-detailed.xlsx + .. button-link:: ../_static/downloads/benchmarking_OV_system_info_detailed.xlsx :color: primary :outline: :expand: :material-regular:`download;1.5em` Click for Configuration Details [XLSX] - .. button-link:: ../_static/benchmarks_files/OV-2024.6-Performance-Data.xlsx + .. button-link:: ../_static/downloads/benchmarking_OV_performance-data.xlsx :color: primary :outline: :expand: diff --git a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst index 83581d465df92e..1f111563a4f29a 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst @@ -56,7 +56,7 @@ The tables below list the key performance indicators for inference on built-in G .. grid-item:: - .. button-link:: https://docs.openvino.ai/2024/_static/benchmarks_files/llm_models_platform_list_.pdf + .. button-link:: https://docs.openvino.ai/2024/_static/download/benchmarking_genai_platform_list.pdf :color: primary :outline: :expand: diff --git a/docs/articles_en/about-openvino/release-notes-openvino.rst b/docs/articles_en/about-openvino/release-notes-openvino.rst index 739c411dcbe7e5..f898ddaf42ba03 100644 --- a/docs/articles_en/about-openvino/release-notes-openvino.rst +++ b/docs/articles_en/about-openvino/release-notes-openvino.rst @@ -27,7 +27,7 @@ What's new +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ * . -* . + @@ -44,9 +44,9 @@ CPU Device Plugin GPU Device Plugin ----------------------------------------------------------------------------------------------- -* . * . + NPU Device Plugin ----------------------------------------------------------------------------------------------- @@ -68,10 +68,6 @@ Other Changes and Known Issues Jupyter Notebooks ----------------------------- -* `Visual-language assistant with GLM-Edge-V and OpenVINO `__ -* `Local AI and OpenVINO `__ -* `Multimodal understanding and generation with Janus and OpenVINO `__ - @@ -119,19 +115,19 @@ Discontinued in 2025 * Runtime components: - * OpenVINO property Affinity API will is no longer available. It has been replaced with CPU + * The OpenVINO property of Affinity API will is no longer available. It has been replaced with CPU binding configurations (``ov::hint::enable_cpu_pinning``). * Tools: - * Intel® Streaming SIMD Extensions (Intel® SSE) are currently not enabled in the binary - package by default. They are still supported in the source code form. * The OpenVINO™ Development Tools package (pip install openvino-dev) is no longer available for OpenVINO releases in 2025. - * Model Optimizer is no longer avilable. Consider using the + * Model Optimizer is no longer available. Consider using the :doc:`new conversion methods <../openvino-workflow/model-preparation/convert-model-to-ir>` instead. For more details, see the `model conversion transition guide `__. + * Intel® Streaming SIMD Extensions (Intel® SSE) are currently not enabled in the binary + package by default. They are still supported in the source code form. Deprecated and to be removed in the future @@ -141,7 +137,7 @@ Deprecated and to be removed in the future standard support. * The openvino-nightly PyPI module will soon be discontinued. End-users should proceed with the Simple PyPI nightly repo instead. More information in - `Release Policy `__. + `Release Policy `__. * “auto shape” and “auto batch size” (reshaping a model in runtime) will be removed in the future. OpenVINO's dynamic shape models are recommended instead. * MacOS x86 is no longer recommended for use due to the discontinuation of validation. @@ -161,17 +157,13 @@ Legal Information +++++++++++++++++++++++++++++++++++++++++++++ You may not use or facilitate the use of this document in connection with any infringement -or other legal analysis concerning Intel products described herein. - -You agree to grant Intel a non-exclusive, royalty-free license to any patent claim -thereafter drafted which includes subject matter disclosed herein. +or other legal analysis concerning Intel products described herein. All information provided +here is subject to change without notice. Contact your Intel representative to obtain the +latest Intel product specifications and roadmaps. No license (express or implied, by estoppel or otherwise) to any intellectual property rights is granted by this document. -All information provided here is subject to change without notice. Contact your Intel -representative to obtain the latest Intel product specifications and roadmaps. - The products described may contain design defects or errors known as errata which may cause the product to deviate from published specifications. Current characterized errata are available on request. @@ -183,10 +175,9 @@ or from the OEM or retailer. No computer system can be absolutely secure. -Intel, Atom, Core, Xeon, OpenVINO, and the Intel logo are trademarks -of Intel Corporation in the U.S. and/or other countries. - -Other names and brands may be claimed as the property of others. +Intel, Atom, Core, Xeon, OpenVINO, and the Intel logo are trademarks of Intel Corporation in +the U.S. and/or other countries. Other names and brands may be claimed as the property of +others. Copyright © 2025, Intel Corporation. All rights reserved. diff --git a/docs/articles_en/documentation/openvino-ecosystem.rst b/docs/articles_en/documentation/openvino-ecosystem.rst index cb62672c032412..fbd4b6e53240a3 100644 --- a/docs/articles_en/documentation/openvino-ecosystem.rst +++ b/docs/articles_en/documentation/openvino-ecosystem.rst @@ -24,7 +24,7 @@ you an overview of a whole ecosystem of tools and solutions under the OpenVINO u | **GenAI** | :bdg-link-dark:`Github ` - :bdg-link-success:`User Guide ` + :bdg-link-success:`User Guide ` OpenVINO™ GenAI Library aims to simplify running inference of generative AI models. Check the LLM-powered Chatbot Jupyter notebook to see how GenAI works. @@ -113,7 +113,7 @@ generative AI and vision models directly on your computer or edge device using O | **Tokenizers** | :bdg-link-dark:`Github ` - :bdg-link-success:`User Guide ` + :bdg-link-success:`User Guide ` OpenVINO Tokenizers add text processing operations to OpenVINO. diff --git a/docs/articles_en/get-started/configurations.rst b/docs/articles_en/get-started/configurations.rst index 3e471c33445292..c0e885dd956c78 100644 --- a/docs/articles_en/get-started/configurations.rst +++ b/docs/articles_en/get-started/configurations.rst @@ -32,8 +32,9 @@ potential of OpenVINO™. Check the following list for components used in your w for details. | **OpenVINO GenAI Dependencies** -| OpenVINO GenAI is a flavor of OpenVINO, aiming to simplify running generative - AI models. For information on the dependencies required to use OpenVINO GenAI, see the +| OpenVINO GenAI is a tool based on the OpenVNO Runtime but simplifying the process of + running generative AI models. For information on the dependencies required to use + OpenVINO GenAI, see the :doc:`guide on OpenVINO GenAI Dependencies `. | **Open Computer Vision Library** diff --git a/docs/articles_en/get-started/install-openvino.rst b/docs/articles_en/get-started/install-openvino.rst index 387a0bf2ab37e3..7616a87d6f3384 100644 --- a/docs/articles_en/get-started/install-openvino.rst +++ b/docs/articles_en/get-started/install-openvino.rst @@ -11,11 +11,11 @@ Install OpenVINO™ 2025.0 :maxdepth: 3 :hidden: + OpenVINO GenAI OpenVINO Runtime on Linux OpenVINO Runtime on Windows OpenVINO Runtime on macOS Create an OpenVINO Yocto Image - OpenVINO GenAI Flavor .. raw:: html @@ -30,13 +30,13 @@ All currently supported versions are: * 2023.3 (LTS) -.. dropdown:: Effortless GenAI integration with OpenVINO GenAI Flavor +.. dropdown:: Effortless GenAI integration with OpenVINO GenAI - A new OpenVINO GenAI Flavor streamlines application development by providing - LLM-specific interfaces for easy integration of language models, handling tokenization and - text generation. For installation and usage instructions, proceed to - :doc:`Install OpenVINO GenAI Flavor <../openvino-workflow-generative>` and - :doc:`Run LLMs with OpenVINO GenAI Flavor <../openvino-workflow-generative/inference-with-genai>`. + OpenVINO GenAI streamlines application development by providing LLM-specific interfaces for + easy integration of language models, handling tokenization and text generation. + For installation and usage instructions, check + :doc:`OpenVINO GenAI installation <../openvino-workflow-generative>` and + :doc:`inference with OpenVINO GenAI <../openvino-workflow-generative/inference-with-genai>`. .. dropdown:: Building OpenVINO from Source diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-genai.rst b/docs/articles_en/get-started/install-openvino/install-openvino-genai.rst index b548353b36977e..026a76f2ee86d7 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-genai.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-genai.rst @@ -1,24 +1,26 @@ Install OpenVINO™ GenAI ==================================== -OpenVINO GenAI is a new flavor of OpenVINO, aiming to simplify running inference of generative AI models. -It hides the complexity of the generation process and minimizes the amount of code required. -You can now provide a model and input context directly to OpenVINO, which performs tokenization of the -input text, executes the generation loop on the selected device, and returns the generated text. -For a quickstart guide, refer to the :doc:`GenAI API Guide <../../openvino-workflow-generative/inference-with-genai>`. - -To see GenAI in action, check the Jupyter notebooks: -`LLM-powered Chatbot `__ and +OpenVINO GenAI is a tool, simplifying generative AI model inference. It is based on the +OpenVINO Runtime, hiding the complexity of the generation process and minimizing the amount of +code required. You provide a model and the input context directly to the tool, while it +performs tokenization of the input text, executes the generation loop on the selected device, +and returns the generated content. For a quickstart guide, refer to the +:doc:`GenAI API Guide <../../openvino-workflow-generative/inference-with-genai>`. + +To see OpenVINO GenAI in action, check these Jupyter notebooks: +`LLM-powered Chatbot `__ +and `LLM Instruction-following pipeline `__. -The OpenVINO GenAI flavor is available for installation via PyPI and Archive distributions. +OpenVINO GenAI is available for installation via PyPI and Archive distributions. A `detailed guide `__ on how to build OpenVINO GenAI is available in the OpenVINO GenAI repository. PyPI Installation ############################### -To install the GenAI flavor of OpenVINO via PyPI, follow the standard :doc:`installation steps `, +To install the GenAI package via PyPI, follow the standard :doc:`installation steps `, but use the *openvino-genai* package instead of *openvino*: .. code-block:: python @@ -28,9 +30,9 @@ but use the *openvino-genai* package instead of *openvino*: Archive Installation ############################### -The OpenVINO GenAI archive package includes the OpenVINO™ Runtime and :doc:`Tokenizers <../../openvino-workflow-generative/ov-tokenizers>`. -To install the GenAI flavor of OpenVINO from an archive file, follow the standard installation steps for your system -but instead of using the vanilla package file, download the one with OpenVINO GenAI: +The OpenVINO GenAI archive package includes the OpenVINO™ Runtime, as well as :doc:`Tokenizers <../../openvino-workflow-generative/ov-tokenizers>`. +It installs the same way as the standard OpenVINO Runtime, so follow its installation steps, +just use the OpenVINO GenAI package instead: Linux ++++++++++++++++++++++++++ diff --git a/docs/articles_en/openvino-workflow-generative.rst b/docs/articles_en/openvino-workflow-generative.rst index 14521f118f6dfc..5ac880ace110c3 100644 --- a/docs/articles_en/openvino-workflow-generative.rst +++ b/docs/articles_en/openvino-workflow-generative.rst @@ -96,8 +96,8 @@ The advantages of using OpenVINO for generative model deployment: Proceed to guides on: -* :doc:`OpenVINO GenAI Flavor <./openvino-workflow-generative/inference-with-genai>` +* :doc:`OpenVINO GenAI <./openvino-workflow-generative/inference-with-genai>` * :doc:`Hugging Face and Optimum Intel <./openvino-workflow-generative/inference-with-optimum-intel>` -* `Generative AI with Base OpenVINO `__ +* `Generative AI with Base OpenVINO `__ diff --git a/docs/articles_en/openvino-workflow-generative/inference-with-genai.rst b/docs/articles_en/openvino-workflow-generative/inference-with-genai.rst index 1f19c3eed7da8f..7e26f0891f779a 100644 --- a/docs/articles_en/openvino-workflow-generative/inference-with-genai.rst +++ b/docs/articles_en/openvino-workflow-generative/inference-with-genai.rst @@ -2,13 +2,13 @@ Inference with OpenVINO GenAI =============================================================================================== .. meta:: - :description: Learn how to use the OpenVINO GenAI flavor to execute LLM models. + :description: Learn how to use OpenVINO GenAI to execute LLM models. .. toctree:: :maxdepth: 1 :hidden: - NPU inference of LLMs + NPU inference of LLMs OpenVINO™ GenAI is a library of pipelines and methods, extending the OpenVINO runtime to work diff --git a/docs/articles_en/openvino-workflow-generative/inference-with-genai-on-npu.rst b/docs/articles_en/openvino-workflow-generative/inference-with-genai/inference-with-genai-on-npu.rst similarity index 97% rename from docs/articles_en/openvino-workflow-generative/inference-with-genai-on-npu.rst rename to docs/articles_en/openvino-workflow-generative/inference-with-genai/inference-with-genai-on-npu.rst index 8fb6ad27c4232f..540d13894c7d02 100644 --- a/docs/articles_en/openvino-workflow-generative/inference-with-genai-on-npu.rst +++ b/docs/articles_en/openvino-workflow-generative/inference-with-genai/inference-with-genai-on-npu.rst @@ -2,9 +2,10 @@ Inference with OpenVINO GenAI ========================================== .. meta:: - :description: Learn how to use the OpenVINO GenAI flavor to execute LLM models on NPU. + :description: Learn how to use OpenVINO GenAI to execute LLM models on NPU. -This guide will give you extra details on how to utilize NPU with the GenAI flavor. + +This guide will give you extra details on how to utilize NPU with OpenVINO GenAI. :doc:`See the installation guide <../../get-started/install-openvino/install-openvino-genai>` for information on how to start. @@ -24,6 +25,10 @@ Note that for systems based on Intel® Core™ Ultra Processors Series 2, more t may be required to run prompts over 1024 tokens on models exceeding 7B parameters, such as Llama-2-7B, Mistral-0.2-7B, and Qwen-2-7B. +Make sure your model works with NPU. Some models may not be supported, for example, +**the FLUX.1 pipeline is currently not supported by the device**. + + Export an LLM model via Hugging Face Optimum-Intel ################################################## diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst index a3bdbfc7c2b7d1..ed28633f1a9198 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst @@ -22,7 +22,7 @@ for more streamlined resource management. NPU Plugin is now available through all relevant OpenVINO distribution channels. | **Supported Platforms:** -| Host: Intel® Core™ Ultra (former Meteor Lake) +| Host: Intel® Core™ Ultra series | NPU device: NPU 3720 | OS: Ubuntu* 22.04 64-bit (with Linux kernel 6.6+), MS Windows* 11 64-bit (22H2, 23H2) @@ -33,10 +33,10 @@ Follow the instructions below to install the latest NPU drivers: * `Linux driver `__ -The plugin uses the graph extension API exposed by the driver to convert the OpenVINO specific representation -of the model into a proprietary format. The compiler included in the user mode driver (UMD) performs -platform specific optimizations in order to efficiently schedule the execution of network layers and -memory transactions on various NPU hardware submodules. +The plugin uses the graph extension API exposed by the driver to convert the OpenVINO specific +representation of the model into a proprietary format. The compiler included in the user mode +driver (UMD) performs platform specific optimizations in order to efficiently schedule the +execution of network layers and memory transactions on various NPU hardware submodules. To use NPU for inference, pass the device name to the ``ov::Core::compile_model()`` method: diff --git a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-latency.rst b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-latency.rst index 7d6df9166f163e..febba3134cad40 100644 --- a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-latency.rst +++ b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-latency.rst @@ -14,34 +14,62 @@ Optimizing for Latency improve throughput without degrading latency. -A significant portion of deep learning use cases involve applications loading a single model and using a single input at a time, which is the of typical "consumer" scenario. -While an application can create more than one request if needed, for example to support :ref:`asynchronous inputs population `, its **inference performance depends on how many requests are being inferenced in parallel** on a device. - -Similarly, when multiple models are served on the same device, it is important whether the models are executed simultaneously or in a chain, for example, in the inference pipeline. -As expected, the easiest way to achieve **low latency is by running only one inference at a time** on one device. Accordingly, any additional concurrency usually results in latency rising fast. - -However, some conventional "root" devices (i.e., CPU or GPU) can be in fact internally composed of several "sub-devices". In many cases, letting OpenVINO leverage the "sub-devices" transparently helps to improve application's throughput (e.g., serve multiple clients simultaneously) without degrading latency. For example, multi-socket CPUs can deliver as many requests at the same minimal latency as there are NUMA nodes in the system. Similarly, a multi-tile GPU, which is essentially multiple GPUs in a single package, can deliver a multi-tile scalability with the number of inference requests, while preserving the single-tile latency. - -Typically, human expertise is required to get more "throughput" out of the device, even in the inherently latency-oriented cases. OpenVINO can take this configuration burden via :doc:`high-level performance hints `, the `ov::hint::PerformanceMode::LATENCY `__ specified for the ``ov::hint::performance_mode`` property for the ``compile_model``. +An application that loads a single model and uses a single input at a time is +a widespread use case in deep learning. Surely, more requests can be created if +needed, for example to support :ref:`asynchronous input population `. +However, **the number of parallel requests affects inference performance** +of the application. + +Also, running inference of multiple models on the same device relies on whether the models +are executed simultaneously or in a chain: the more inference tasks at once, the higher the +latency. + +However, devices such as CPUs and GPUs may be composed of several "sub-devices". OpeVINO can +handle them transparently, when serving multiple clients, improving application's throughput +without impacting latency. What is more, multi-socket CPUs can deliver as many requests at the +same minimal latency as there are NUMA nodes in the system. Similarly, a multi-tile GPU, +which is essentially multiple GPUs in a single package, can deliver a multi-tile +scalability with the number of inference requests, while preserving the +single-tile latency. .. note:: - :doc:`OpenVINO performance hints ` is a recommended way for performance configuration, which is both device-agnostic and future-proof. + Balancing throughput and latency by manual configuration requires strong expertise + in this area. Instead, you should specify :doc:`performance hints ` + for ``compile_model``, which is a device-agnostic and future-proof option. -**When multiple models are to be used simultaneously**, consider running inference on separate devices for each of them. Finally, when multiple models are executed in parallel on a device, using additional ``ov::hint::model_priority`` may help to define relative priorities of the models. Refer to the documentation on the :doc:`OpenVINO feature support for devices <../../../../about-openvino/compatibility-and-support/supported-devices>` to check if your device supports the feature. +**For running multiple models simultaneously**, consider using separate devices for each of +them. When multiple models are executed in parallel on a device, use ``ov::hint::model_priority`` +to define relative priorities of the models. Note that this feature may not be available for +some devices. **First-Inference Latency and Model Load/Compile Time** -In some cases, model loading and compilation contribute to the "end-to-end" latency more than usual. -For example, when the model is used exactly once, or when it is unloaded and reloaded in a cycle, to free the memory for another inference due to on-device memory limitations. - -Such a "first-inference latency" scenario may pose an additional limitation on the model load\compilation time, as inference accelerators (other than the CPU) usually require a certain level of model compilation upon loading. -The :doc:`model caching ` option is a way to lessen the impact over multiple application runs. If model caching is not possible, for example, it may require write permissions for the application, the CPU offers the fastest model load time almost every time. +First-inference latency is the longest time the application requires to finish inference. +This means it includes the time to load and compile the model, which happens at the first +execution only. For some scenarios it may be a significant factor, for example, if the model is +always used just once or is unloaded after each run to free up the memory. + +In such cases the device choice is especially important. The CPU offers the fastest model load +time nearly every time. Other accelerators usually take longer to compile a model but may be +better for inference. In such cases, :doc:`Model caching ` +may reduce latency, as long as there are no additional limitations in write permissions +for the application. + +To improve "first-inference latency", you may choose between mapping the model into memory +(the default option) and reading it (the older solution). While mapping is better in most cases, +sometimes it may increase latency, especially when the model is located on a removable or a +network drive. To switch between the two, specify the +`ov::enable_mmap() <../../../api/ie_python_api/_autosummary/openvino.frontend.FrontEnd.html#openvino.frontend.FrontEnd.load>` +property for the ``ov::Core`` as either ``True`` or ``False``. + +You can also use :doc:`AUTO device selection inference mode <../inference-devices-and-modes/auto-device-selection>` +to deal with first-inference latency. +It starts inference on the CPU, while waiting for the proper accelerator to load +the model. At that point, it shifts to the new device seamlessly. -To improve common "first-inference latency" scenario, model reading was replaced with model mapping (using `mmap`) into a memory. But in some use cases (first of all, if model is located on removable or network drive) mapping may lead to latency increase. To switch mapping to reading, specify ``ov::enable_mmap(false)`` property for the ``ov::Core``. - -Another way of dealing with first-inference latency is using the :doc:`AUTO device selection inference mode <../inference-devices-and-modes/auto-device-selection>`. It starts inference on the CPU, while waiting for the actual accelerator to load the model. At that point, it shifts to the new device seamlessly. - -Finally, note that any :doc:`throughput-oriented options ` may significantly increase the model uptime. +.. note:: + Keep in mind that any :doc:`throughput-oriented options ` + may significantly increase inference time. diff --git a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-latency/model-caching-overview.rst b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-latency/model-caching-overview.rst index b3253f775bdb02..b1b6da190a0192 100644 --- a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-latency/model-caching-overview.rst +++ b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-latency/model-caching-overview.rst @@ -9,16 +9,16 @@ Model Caching Overview As described in :doc:`Integrate OpenVINO™ with Your Application <../../integrate-openvino-with-your-application>`, -a common application flow consists of the following steps: +a common workflow consists of the following steps: 1. | **Create a Core object**: | First step to manage available devices and read model objects 2. | **Read the Intermediate Representation**: - | Read an Intermediate Representation file into an object of the `ov::Model `__ + | Read an Intermediate Representation file into the `ov::Model `__ object 3. | **Prepare inputs and outputs**: | If needed, manipulate precision, memory layout, size or color format 4. | **Set configuration**: - | Pass device-specific loading configurations to the device + | Add device-specific loading configurations to the device 5. | **Compile and Load Network to device**: | Use the `ov::Core::compile_model() `__ method with a specific device 6. | **Set input data**: @@ -32,14 +32,15 @@ automatically and reuses it to significantly reduce the model compilation time. .. important:: - Not all devices support the network import/export feature. They will perform normally but will not + Not all devices support import/export of models. They will perform normally but will not enable the compilation stage speed-up. -Set "cache_dir" config option to enable model caching +Set configuration options +++++++++++++++++++++++++++++++++++++++++++++++++++++ -To enable model caching, the application must specify a folder to store the cached blobs: +| Use the ``device_name`` option to specify the inference device. +| Specify ``cache_dir`` to enable model caching. .. tab-set:: @@ -58,23 +59,25 @@ To enable model caching, the application must specify a folder to store the cach :fragment: [ov:caching:part0] -With this code, if the device specified by ``device_name`` supports import/export model capability, -a cached blob (the ``.cl_cache`` and ``.blob`` file for GPU and CPU respectively) is automatically +If the specified device supports import/export of models, +a cached blob file: ``.cl_cache`` (GPU) or ``.blob`` (CPU) is automatically created inside the ``/path/to/cache/dir`` folder. -If the device does not support the import/export capability, cache is not created and no error is thrown. +If the device does not support import/export of models, the cache is not +created and no error is thrown. -Note that the first ``compile_model`` operation takes slightly longer, as the cache needs to be created - -the compiled blob is saved into a cache file: +Note that the first ``compile_model`` operation takes slightly more time, +as the cache needs to be created - the compiled blob is saved into a file: .. image:: ../../../../assets/images/caching_enabled.svg -Make it even faster: use compile_model(modelPath) +Use optimized methods +++++++++++++++++++++++++++++++++++++++++++++++++++ -In some cases, applications do not need to customize inputs and outputs every time. Such application always -call ``model = core.read_model(...)``, then ``core.compile_model(model, ..)``, which can be further optimized. -For these cases, there is a more convenient API to compile the model in a single call, skipping the read step: +Applications do not always require an initial customization of inputs and +outputs, as they can call ``model = core.read_model(...)``, then ``core.compile_model(model, ..)``, +which can be further optimized. Thus, the model can be compiled conveniently in a single call, +skipping the read step: .. tab-set:: @@ -93,7 +96,7 @@ For these cases, there is a more convenient API to compile the model in a single :fragment: [ov:caching:part1] -With model caching enabled, the total load time is even shorter, if ``read_model`` is optimized as well. +The total load time is even shorter, when model caching is enabled and ``read_model`` is optimized as well. .. tab-set:: @@ -117,8 +120,9 @@ With model caching enabled, the total load time is even shorter, if ``read_model Advanced Examples ++++++++++++++++++++ -Not every device supports the network import/export capability. For those that don't, enabling caching has no effect. -To check in advance if a particular device supports model caching, your application can use the following code: +Enabling model caching has no effect when the specified device does not support +import/export of models. To check in advance if a particular device supports +model caching, use the following code in your application: .. tab-set:: @@ -136,10 +140,12 @@ To check in advance if a particular device supports model caching, your applicat :language: cpp :fragment: [ov:caching:part3] -Set "cache_encryption_callbacks" config option to enable cache encryption +Enable cache encryption +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -If model caching is enabled in the CPU Plugin, the model topology can be encrypted while it is saved to the cache and decrypted when it is loaded from the cache. Currently, this property can be set only in ``compile_model``. +If model caching is enabled in the CPU Plugin, set the "cache_encryption_callbacks" +config option to encrypt the model while caching it and decrypt it when +loading it from the cache. Currently, this property can be set only in ``compile_model``. .. tab-set:: @@ -157,7 +163,7 @@ If model caching is enabled in the CPU Plugin, the model topology can be encrypt :language: cpp :fragment: [ov:caching:part4] -If model caching is enabled in the GPU Plugin, the model topology can be encrypted while it is saved to the cache and decrypted when it is loaded from the cache. Full encryption only works when the ``CacheMode`` property is set to ``OPTIMIZE_SIZE``. +Full encryption only works when the ``CacheMode`` property is set to ``OPTIMIZE_SIZE``. .. tab-set:: @@ -177,4 +183,6 @@ If model caching is enabled in the GPU Plugin, the model topology can be encrypt .. important:: - Currently, this property is supported only by the CPU and GPU plugins. For other HW plugins, setting this property will not encrypt/decrypt the model topology in cache and will not affect performance. + Currently, encryption is supported only by the CPU and GPU plugins. Enabling this + feature for other HW plugins will not encrypt/decrypt model topology in the + cache and will not affect performance. diff --git a/docs/notebooks/llm-agent-functioncall-qwen-with-output.rst b/docs/notebooks/llm-agent-functioncall-qwen-with-output.rst index 051e83eff184bb..19b3f849a0f102 100644 --- a/docs/notebooks/llm-agent-functioncall-qwen-with-output.rst +++ b/docs/notebooks/llm-agent-functioncall-qwen-with-output.rst @@ -258,7 +258,7 @@ pipeline. You can get additional inference speed improvement with `Dynamic Quantization of activations and KV-cache quantization on -CPU `__. +CPU `__. These options can be enabled with ``ov_config`` as follows: .. code:: ipython3 diff --git a/docs/notebooks/llm-agent-react-langchain-with-output.rst b/docs/notebooks/llm-agent-react-langchain-with-output.rst index 7313d4c454c42a..34c81ef6e11e75 100644 --- a/docs/notebooks/llm-agent-react-langchain-with-output.rst +++ b/docs/notebooks/llm-agent-react-langchain-with-output.rst @@ -438,7 +438,7 @@ information `__. You can get additional inference speed improvement with `Dynamic Quantization of activations and KV-cache quantization on -CPU `__. +CPU `__. These options can be enabled with ``ov_config`` as follows: .. code:: ipython3 diff --git a/docs/notebooks/multilora-image-generation-with-output.rst b/docs/notebooks/multilora-image-generation-with-output.rst index f6445e5a2ec1f2..e2da1edafdd8f6 100644 --- a/docs/notebooks/multilora-image-generation-with-output.rst +++ b/docs/notebooks/multilora-image-generation-with-output.rst @@ -144,7 +144,7 @@ saved on disk before export. For avoiding this, we will use ``export_from_model`` function that accepts initialized model. Additionally, for using model with OpenVINO GenAI, we need to export tokenizers to OpenVINO format using `OpenVINO -Tokenizers `__ +Tokenizers `__ library. In this tutorial we will use `Stable Diffusion diff --git a/docs/notebooks/speculative-sampling-with-output.rst b/docs/notebooks/speculative-sampling-with-output.rst index 8ca9ca5bc7002c..8dd300fa4bbaff 100644 --- a/docs/notebooks/speculative-sampling-with-output.rst +++ b/docs/notebooks/speculative-sampling-with-output.rst @@ -136,7 +136,7 @@ In case, if you want run own models, you should convert them using Optimum `__ library accelerated by OpenVINO integration. More details about model preparation can be found in `OpenVINO LLM inference -guide `__ +guide `__ .. code:: ipython3 diff --git a/docs/notebooks/text-to-image-genai-with-output.rst b/docs/notebooks/text-to-image-genai-with-output.rst index a0f0af9ef41538..d43b900d9133db 100644 --- a/docs/notebooks/text-to-image-genai-with-output.rst +++ b/docs/notebooks/text-to-image-genai-with-output.rst @@ -23,7 +23,7 @@ the Hugging Face Transformers library to the OpenVINO™ IR format. For more details, refer to the `Hugging Face Optimum Intel documentation `__. 2. Run inference using the `Text-to-Image Generation -pipeline `__ +pipeline `__ from OpenVINO GenAI. diff --git a/docs/sphinx_setup/_static/download/GenAI_Quick_Start_Guide.pdf b/docs/sphinx_setup/_static/download/GenAI_Quick_Start_Guide.pdf index c5632a7e3f9627..2046f7d9427421 100644 Binary files a/docs/sphinx_setup/_static/download/GenAI_Quick_Start_Guide.pdf and b/docs/sphinx_setup/_static/download/GenAI_Quick_Start_Guide.pdf differ diff --git a/docs/sphinx_setup/_static/benchmarks_files/OV-2024.6-Performance-Data.xlsx b/docs/sphinx_setup/_static/download/benchmarking_OV_performance-data.xlsx similarity index 100% rename from docs/sphinx_setup/_static/benchmarks_files/OV-2024.6-Performance-Data.xlsx rename to docs/sphinx_setup/_static/download/benchmarking_OV_performance-data.xlsx diff --git a/docs/sphinx_setup/_static/benchmarks_files/OV-2024.6-platform_list.pdf b/docs/sphinx_setup/_static/download/benchmarking_OV_platform_list.pdf similarity index 100% rename from docs/sphinx_setup/_static/benchmarks_files/OV-2024.6-platform_list.pdf rename to docs/sphinx_setup/_static/download/benchmarking_OV_platform_list.pdf diff --git a/docs/sphinx_setup/_static/benchmarks_files/OV-2024.6-system-info-detailed.xlsx b/docs/sphinx_setup/_static/download/benchmarking_OV_system_info_detailed.xlsx similarity index 100% rename from docs/sphinx_setup/_static/benchmarks_files/OV-2024.6-system-info-detailed.xlsx rename to docs/sphinx_setup/_static/download/benchmarking_OV_system_info_detailed.xlsx diff --git a/docs/sphinx_setup/_static/benchmarks_files/llm_models_platform_list_.pdf b/docs/sphinx_setup/_static/download/benchmarking_genai_platform_list.pdf similarity index 100% rename from docs/sphinx_setup/_static/benchmarks_files/llm_models_platform_list_.pdf rename to docs/sphinx_setup/_static/download/benchmarking_genai_platform_list.pdf diff --git a/docs/sphinx_setup/api/nodejs_api/openvino-node/interfaces/Tensor.rst b/docs/sphinx_setup/api/nodejs_api/openvino-node/interfaces/Tensor.rst index 9b0e19b559cdf8..8e51702aa1baca 100644 --- a/docs/sphinx_setup/api/nodejs_api/openvino-node/interfaces/Tensor.rst +++ b/docs/sphinx_setup/api/nodejs_api/openvino-node/interfaces/Tensor.rst @@ -9,6 +9,7 @@ Interface Tensor getData(): SupportedTypedArray; getShape(): number[]; getSize(): number; + isContinuous(): boolean; } @@ -116,3 +117,19 @@ Methods * **Defined in:** `addon.ts:421 `__ + +.. rubric:: isContinuous + +* + + .. code-block:: ts + + isContinuous(): boolean; + + Reports whether the tensor is continuous or not. + + * **Returns:** boolean + + * **Defined in:** + `addon.ts:425 `__ + \ No newline at end of file diff --git a/src/bindings/python/src/pyopenvino/frontend/tensorflow/graph_iterator.cpp b/src/bindings/python/src/pyopenvino/frontend/tensorflow/graph_iterator.cpp index 1a5edab7de767a..8c97d0d49162c2 100644 --- a/src/bindings/python/src/pyopenvino/frontend/tensorflow/graph_iterator.cpp +++ b/src/bindings/python/src/pyopenvino/frontend/tensorflow/graph_iterator.cpp @@ -8,7 +8,7 @@ #include "graph_iterator.hpp" -#include "openvino/frontend/graph_iterator.hpp" +#include "openvino/frontend/tensorflow/graph_iterator.hpp" namespace py = pybind11; diff --git a/src/bindings/python/src/pyopenvino/frontend/tensorflow/graph_iterator.hpp b/src/bindings/python/src/pyopenvino/frontend/tensorflow/graph_iterator.hpp index b0e333657f91e2..46e6bdebedcd21 100644 --- a/src/bindings/python/src/pyopenvino/frontend/tensorflow/graph_iterator.hpp +++ b/src/bindings/python/src/pyopenvino/frontend/tensorflow/graph_iterator.hpp @@ -7,7 +7,7 @@ #include #include "openvino/frontend/decoder.hpp" -#include "openvino/frontend/graph_iterator.hpp" +#include "openvino/frontend/tensorflow/graph_iterator.hpp" namespace py = pybind11; diff --git a/src/bindings/python/src/pyopenvino/utils/utils.cpp b/src/bindings/python/src/pyopenvino/utils/utils.cpp index d223a400ea2a49..526be24b989b09 100644 --- a/src/bindings/python/src/pyopenvino/utils/utils.cpp +++ b/src/bindings/python/src/pyopenvino/utils/utils.cpp @@ -454,9 +454,9 @@ ov::Any py_object_to_any(const py::object& py_obj) { // FrontEnd Decoder } else if (py::isinstance(py_obj)) { return py::cast>(py_obj); - // TF FrontEnd GraphIterator - } else if (py::isinstance(py_obj)) { - return py::cast>(py_obj); + // FrontEnd GraphIterator + } else if (py::isinstance(py_obj)) { + return py::cast>(py_obj); // Custom FrontEnd Types } else if (py::isinstance(py_obj)) { return py::cast(py_obj); diff --git a/src/frontends/common/include/openvino/frontend/complex_type_mark.hpp b/src/frontends/common/include/openvino/frontend/complex_type_mark.hpp index e7a89f4f3e933a..b23004237622a2 100644 --- a/src/frontends/common/include/openvino/frontend/complex_type_mark.hpp +++ b/src/frontends/common/include/openvino/frontend/complex_type_mark.hpp @@ -5,6 +5,7 @@ #pragma once #include "openvino/core/type/element_type.hpp" +#include "openvino/frontend/visibility.hpp" #include "openvino/op/util/framework_node.hpp" namespace ov { @@ -17,7 +18,7 @@ namespace frontend { // into a floating-point tensor [N1, N2, ..., Nk, 2] // where a slice with index [..., 0] represents a real part and // a slice with index [..., 1] represents a imaginary part. -class ComplexTypeMark : public ov::op::util::FrameworkNode { +class FRONTEND_API ComplexTypeMark : public ov::op::util::FrameworkNode { public: OPENVINO_OP("ComplexTypeMark", "util", ov::op::util::FrameworkNode); @@ -27,6 +28,8 @@ class ComplexTypeMark : public ov::op::util::FrameworkNode { validate_and_infer_types(); } + ~ComplexTypeMark() override; + void validate_and_infer_types() override { set_output_type(0, ov::element::dynamic, PartialShape::dynamic()); } diff --git a/src/frontends/common/include/openvino/frontend/graph_iterator.hpp b/src/frontends/common/include/openvino/frontend/graph_iterator.hpp index 9b05849c78e8a1..d26895c6a05abb 100644 --- a/src/frontends/common/include/openvino/frontend/graph_iterator.hpp +++ b/src/frontends/common/include/openvino/frontend/graph_iterator.hpp @@ -10,7 +10,6 @@ namespace ov { namespace frontend { -namespace tensorflow { /// Abstract representation for an input model graph that gives nodes in topologically sorted order class FRONTEND_API GraphIterator : ::ov::RuntimeAttribute { @@ -51,6 +50,5 @@ class FRONTEND_API GraphIterator : ::ov::RuntimeAttribute { virtual std::map get_output_names_map() const; }; -} // namespace tensorflow } // namespace frontend } // namespace ov diff --git a/src/frontends/common/src/complex_type_mark.cpp b/src/frontends/common/src/complex_type_mark.cpp new file mode 100644 index 00000000000000..25ccb1fe4a2095 --- /dev/null +++ b/src/frontends/common/src/complex_type_mark.cpp @@ -0,0 +1,9 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/frontend/complex_type_mark.hpp" + +using namespace ov::frontend; + +ComplexTypeMark::~ComplexTypeMark() = default; diff --git a/src/frontends/common/src/graph_iterator.cpp b/src/frontends/common/src/graph_iterator.cpp index 1a97e35448cf06..c1293a7812a8a0 100644 --- a/src/frontends/common/src/graph_iterator.cpp +++ b/src/frontends/common/src/graph_iterator.cpp @@ -4,7 +4,7 @@ #include "openvino/frontend/graph_iterator.hpp" -using namespace ov::frontend::tensorflow; +using namespace ov::frontend; std::map GraphIterator::get_input_names_map() const { return {}; diff --git a/src/frontends/jax/include/openvino/frontend/jax/decoder.hpp b/src/frontends/jax/include/openvino/frontend/jax/decoder.hpp index 2cddb8b936d860..252feedb9f6115 100644 --- a/src/frontends/jax/include/openvino/frontend/jax/decoder.hpp +++ b/src/frontends/jax/include/openvino/frontend/jax/decoder.hpp @@ -11,12 +11,13 @@ #include "openvino/core/node.hpp" #include "openvino/core/node_vector.hpp" #include "openvino/frontend/decoder.hpp" +#include "openvino/frontend/jax/visibility.hpp" namespace ov { namespace frontend { namespace jax { -class JaxDecoder : public IDecoder { +class JAX_FRONTEND_API JaxDecoder : public IDecoder { public: virtual OutputVector as_constant() const = 0; @@ -64,6 +65,8 @@ class JaxDecoder : public IDecoder { /// If there is no query for specific sub-graph it shouldn't be converted // node_visitor is a function that will be fed by nodes in subgraph for all nodes in graph virtual void visit_subgraph(std::function)> node_visitor) const = 0; + + ~JaxDecoder() override; }; } // namespace jax diff --git a/src/frontends/jax/include/openvino/frontend/jax/extension/conversion.hpp b/src/frontends/jax/include/openvino/frontend/jax/extension/conversion.hpp index 490177dde53e5d..2fb18cd4907fe1 100644 --- a/src/frontends/jax/include/openvino/frontend/jax/extension/conversion.hpp +++ b/src/frontends/jax/include/openvino/frontend/jax/extension/conversion.hpp @@ -13,7 +13,7 @@ namespace ov { namespace frontend { namespace jax { -class JAX_API ConversionExtension : public ConversionExtensionBase { +class JAX_FRONTEND_API ConversionExtension : public ConversionExtensionBase { public: using Ptr = std::shared_ptr; diff --git a/src/frontends/jax/include/openvino/frontend/jax/frontend.hpp b/src/frontends/jax/include/openvino/frontend/jax/frontend.hpp index e40d6b772b95fa..94a6a859f76c04 100644 --- a/src/frontends/jax/include/openvino/frontend/jax/frontend.hpp +++ b/src/frontends/jax/include/openvino/frontend/jax/frontend.hpp @@ -14,7 +14,7 @@ namespace ov { namespace frontend { namespace jax { -class JAX_API FrontEnd : public ov::frontend::FrontEnd { +class JAX_FRONTEND_API FrontEnd : public ov::frontend::FrontEnd { public: using Ptr = std::shared_ptr; FrontEnd(); diff --git a/src/frontends/jax/include/openvino/frontend/jax/visibility.hpp b/src/frontends/jax/include/openvino/frontend/jax/visibility.hpp index 609248d4fc964e..f5c62f99c6a7c7 100644 --- a/src/frontends/jax/include/openvino/frontend/jax/visibility.hpp +++ b/src/frontends/jax/include/openvino/frontend/jax/visibility.hpp @@ -7,14 +7,14 @@ #include "openvino/frontend/visibility.hpp" #ifdef OPENVINO_STATIC_LIBRARY -# define JAX_API -# define JAX_C_API +# define JAX_FRONTEND_API +# define JAX_FRONTEND_C_API #else # ifdef openvino_jax_frontend_EXPORTS -# define JAX_API OPENVINO_CORE_EXPORTS -# define JAX_C_API OPENVINO_EXTERN_C OPENVINO_CORE_EXPORTS +# define JAX_FRONTEND_API OPENVINO_CORE_EXPORTS +# define JAX_FRONTEND_C_API OPENVINO_EXTERN_C OPENVINO_CORE_EXPORTS # else -# define JAX_API OPENVINO_CORE_IMPORTS -# define JAX_C_API OPENVINO_EXTERN_C OPENVINO_CORE_IMPORTS +# define JAX_FRONTEND_API OPENVINO_CORE_IMPORTS +# define JAX_FRONTEND_C_API OPENVINO_EXTERN_C OPENVINO_CORE_IMPORTS # endif // openvino_jax_frontend_EXPORTS #endif // OPENVINO_STATIC_LIBRARY diff --git a/src/frontends/jax/src/decoder.cpp b/src/frontends/jax/src/decoder.cpp new file mode 100644 index 00000000000000..d3eda7de215c37 --- /dev/null +++ b/src/frontends/jax/src/decoder.cpp @@ -0,0 +1,7 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/frontend/jax/decoder.hpp" + +ov::frontend::jax::JaxDecoder::~JaxDecoder() = default; diff --git a/src/frontends/jax/src/jax.cpp b/src/frontends/jax/src/jax.cpp index 62fdbb1f01797e..4fbb08a4099b01 100644 --- a/src/frontends/jax/src/jax.cpp +++ b/src/frontends/jax/src/jax.cpp @@ -6,11 +6,11 @@ #include "openvino/frontend/jax/visibility.hpp" #include "openvino/frontend/manager.hpp" -JAX_C_API ov::frontend::FrontEndVersion get_api_version() { +JAX_FRONTEND_C_API ov::frontend::FrontEndVersion get_api_version() { return OV_FRONTEND_API_VERSION; } -JAX_C_API void* get_front_end_data() { +JAX_FRONTEND_C_API void* get_front_end_data() { auto res = new ov::frontend::FrontEndPluginInfo(); res->m_name = "jax"; res->m_creator = []() { diff --git a/src/frontends/paddle/include/openvino/frontend/paddle/extension/conversion.hpp b/src/frontends/paddle/include/openvino/frontend/paddle/extension/conversion.hpp index f27ddd55169b6c..d1b32052eb08bb 100644 --- a/src/frontends/paddle/include/openvino/frontend/paddle/extension/conversion.hpp +++ b/src/frontends/paddle/include/openvino/frontend/paddle/extension/conversion.hpp @@ -13,7 +13,7 @@ namespace ov { namespace frontend { namespace paddle { -class PADDLE_API ConversionExtension : public ConversionExtensionBase { +class PADDLE_FRONTEND_API ConversionExtension : public ConversionExtensionBase { public: using Ptr = std::shared_ptr; diff --git a/src/frontends/paddle/include/openvino/frontend/paddle/frontend.hpp b/src/frontends/paddle/include/openvino/frontend/paddle/frontend.hpp index 54bf8961c48c8b..77b59aa4ca695d 100644 --- a/src/frontends/paddle/include/openvino/frontend/paddle/frontend.hpp +++ b/src/frontends/paddle/include/openvino/frontend/paddle/frontend.hpp @@ -21,7 +21,7 @@ namespace paddle { class OpPlace; class TensorPlace; -class PADDLE_API FrontEnd : public ov::frontend::FrontEnd { +class PADDLE_FRONTEND_API FrontEnd : public ov::frontend::FrontEnd { public: using Ptr = std::shared_ptr; FrontEnd(); diff --git a/src/frontends/paddle/include/openvino/frontend/paddle/visibility.hpp b/src/frontends/paddle/include/openvino/frontend/paddle/visibility.hpp index d2430e9489a282..e9ea45c727cea4 100644 --- a/src/frontends/paddle/include/openvino/frontend/paddle/visibility.hpp +++ b/src/frontends/paddle/include/openvino/frontend/paddle/visibility.hpp @@ -7,14 +7,14 @@ #include "openvino/frontend/exception.hpp" #ifdef OPENVINO_STATIC_LIBRARY -# define PADDLE_API -# define PADDLE_C_API +# define PADDLE_FRONTEND_API +# define PADDLE_FRONTEND_C_API #else # ifdef openvino_paddle_frontend_EXPORTS -# define PADDLE_API OPENVINO_CORE_EXPORTS -# define PADDLE_C_API OPENVINO_EXTERN_C OPENVINO_CORE_EXPORTS +# define PADDLE_FRONTEND_API OPENVINO_CORE_EXPORTS +# define PADDLE_FRONTEND_C_API OPENVINO_EXTERN_C OPENVINO_CORE_EXPORTS # else -# define PADDLE_API OPENVINO_CORE_IMPORTS -# define PADDLE_C_API OPENVINO_EXTERN_C OPENVINO_CORE_IMPORTS +# define PADDLE_FRONTEND_API OPENVINO_CORE_IMPORTS +# define PADDLE_FRONTEND_C_API OPENVINO_EXTERN_C OPENVINO_CORE_IMPORTS # endif // openvino_paddle_frontend_EXPORTS #endif // OPENVINO_STATIC_LIBRARY diff --git a/src/frontends/paddle/src/frontend.cpp b/src/frontends/paddle/src/frontend.cpp index 22d5547489e723..c20852ebff655d 100644 --- a/src/frontends/paddle/src/frontend.cpp +++ b/src/frontends/paddle/src/frontend.cpp @@ -586,11 +586,11 @@ void FrontEnd::normalize(const std::shared_ptr& model) const { } // namespace frontend } // namespace ov -PADDLE_C_API FrontEndVersion get_api_version() { +PADDLE_FRONTEND_C_API FrontEndVersion get_api_version() { return OV_FRONTEND_API_VERSION; } -PADDLE_C_API void* get_front_end_data() { +PADDLE_FRONTEND_C_API void* get_front_end_data() { FrontEndPluginInfo* res = new FrontEndPluginInfo(); res->m_name = "paddle"; res->m_creator = []() { diff --git a/src/frontends/pytorch/include/openvino/frontend/pytorch/decoder.hpp b/src/frontends/pytorch/include/openvino/frontend/pytorch/decoder.hpp index 3da741be645cc5..97546c03534c0a 100644 --- a/src/frontends/pytorch/include/openvino/frontend/pytorch/decoder.hpp +++ b/src/frontends/pytorch/include/openvino/frontend/pytorch/decoder.hpp @@ -6,6 +6,7 @@ #include "openvino/core/node.hpp" #include "openvino/frontend/decoder.hpp" +#include "openvino/frontend/pytorch/visibility.hpp" namespace ov { namespace frontend { @@ -14,8 +15,10 @@ namespace pytorch { using DecoderRTInfo = std::unordered_map; /// Plays a role of node, block and module decoder (kind of temporary fat API) -class TorchDecoder : public IDecoder { +class PYTORCH_FRONTEND_API TorchDecoder : public IDecoder { public: + ~TorchDecoder() override; + // Do not search for input in tensor map; try to access it as a constant of specified type T and return its value // Using Any here is an easy way to avoid template definition, returned object is supposed to be of one of the // fundamental types like int, float etc. diff --git a/src/frontends/pytorch/include/openvino/frontend/pytorch/extension/conversion.hpp b/src/frontends/pytorch/include/openvino/frontend/pytorch/extension/conversion.hpp index 6c321ccd22df05..cf1f8632045a46 100644 --- a/src/frontends/pytorch/include/openvino/frontend/pytorch/extension/conversion.hpp +++ b/src/frontends/pytorch/include/openvino/frontend/pytorch/extension/conversion.hpp @@ -13,7 +13,7 @@ namespace ov { namespace frontend { namespace pytorch { -class PYTORCH_API ConversionExtension : public ConversionExtensionBase { +class PYTORCH_FRONTEND_API ConversionExtension : public ConversionExtensionBase { public: using Ptr = std::shared_ptr; diff --git a/src/frontends/pytorch/include/openvino/frontend/pytorch/frontend.hpp b/src/frontends/pytorch/include/openvino/frontend/pytorch/frontend.hpp index 76baa62a334dcb..6575aeb4fd983e 100644 --- a/src/frontends/pytorch/include/openvino/frontend/pytorch/frontend.hpp +++ b/src/frontends/pytorch/include/openvino/frontend/pytorch/frontend.hpp @@ -14,7 +14,7 @@ namespace ov { namespace frontend { namespace pytorch { -class PYTORCH_API FrontEnd : public ov::frontend::FrontEnd { +class PYTORCH_FRONTEND_API FrontEnd : public ov::frontend::FrontEnd { public: using Ptr = std::shared_ptr; FrontEnd(); diff --git a/src/frontends/pytorch/include/openvino/frontend/pytorch/visibility.hpp b/src/frontends/pytorch/include/openvino/frontend/pytorch/visibility.hpp index 3911ad8693bcf0..da35017bb868fa 100644 --- a/src/frontends/pytorch/include/openvino/frontend/pytorch/visibility.hpp +++ b/src/frontends/pytorch/include/openvino/frontend/pytorch/visibility.hpp @@ -7,14 +7,14 @@ #include "openvino/frontend/visibility.hpp" #ifdef OPENVINO_STATIC_LIBRARY -# define PYTORCH_API -# define PYTORCH_C_API +# define PYTORCH_FRONTEND_API +# define PYTORCH_FRONTEND_C_API #else # ifdef openvino_pytorch_frontend_EXPORTS -# define PYTORCH_API OPENVINO_CORE_EXPORTS -# define PYTORCH_C_API OPENVINO_EXTERN_C OPENVINO_CORE_EXPORTS +# define PYTORCH_FRONTEND_API OPENVINO_CORE_EXPORTS +# define PYTORCH_FRONTEND_C_API OPENVINO_EXTERN_C OPENVINO_CORE_EXPORTS # else -# define PYTORCH_API OPENVINO_CORE_IMPORTS -# define PYTORCH_C_API OPENVINO_EXTERN_C OPENVINO_CORE_IMPORTS +# define PYTORCH_FRONTEND_API OPENVINO_CORE_IMPORTS +# define PYTORCH_FRONTEND_C_API OPENVINO_EXTERN_C OPENVINO_CORE_IMPORTS # endif // openvino_pytorch_frontend_EXPORTS #endif // OPENVINO_STATIC_LIBRARY diff --git a/src/frontends/pytorch/src/decoder.cpp b/src/frontends/pytorch/src/decoder.cpp new file mode 100644 index 00000000000000..b07e4cbbd8c3f5 --- /dev/null +++ b/src/frontends/pytorch/src/decoder.cpp @@ -0,0 +1,7 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/frontend/pytorch/decoder.hpp" + +ov::frontend::pytorch::TorchDecoder::~TorchDecoder() = default; diff --git a/src/frontends/pytorch/src/frontend.cpp b/src/frontends/pytorch/src/frontend.cpp index 6debdb8c33311e..29a81e65ea7647 100644 --- a/src/frontends/pytorch/src/frontend.cpp +++ b/src/frontends/pytorch/src/frontend.cpp @@ -403,8 +403,9 @@ ov::frontend::InputModel::Ptr FrontEnd::load_impl(const std::vector& va "PyTorch Frontend doesn't support provided model type. Please provide supported model " "object using Python API."); auto decoder = variants[0].as>(); + FRONT_END_GENERAL_CHECK(decoder, "Couldn't cast ov::Any to std::shared_ptr"); auto tdecoder = std::dynamic_pointer_cast(decoder); - FRONT_END_GENERAL_CHECK(tdecoder, "Couldn't cast ov::Any to TorchDecoder"); + FRONT_END_GENERAL_CHECK(tdecoder, "Couldn't cast IDecoder to TorchDecoder"); return std::make_shared(tdecoder); } diff --git a/src/frontends/pytorch/src/pytorch.cpp b/src/frontends/pytorch/src/pytorch.cpp index 967fb43aa5840e..82dda9f5a84a4a 100644 --- a/src/frontends/pytorch/src/pytorch.cpp +++ b/src/frontends/pytorch/src/pytorch.cpp @@ -6,11 +6,11 @@ #include "openvino/frontend/pytorch/frontend.hpp" #include "openvino/frontend/pytorch/visibility.hpp" -PYTORCH_C_API ov::frontend::FrontEndVersion get_api_version() { +PYTORCH_FRONTEND_C_API ov::frontend::FrontEndVersion get_api_version() { return OV_FRONTEND_API_VERSION; } -PYTORCH_C_API void* get_front_end_data() { +PYTORCH_FRONTEND_C_API void* get_front_end_data() { auto res = new ov::frontend::FrontEndPluginInfo(); res->m_name = "pytorch"; res->m_creator = []() { diff --git a/src/frontends/pytorch/src/transforms/aten_index_put_replacer.hpp b/src/frontends/pytorch/src/transforms/aten_index_put_replacer.hpp index ee887004fa30f4..6a795b2d1d4f48 100644 --- a/src/frontends/pytorch/src/transforms/aten_index_put_replacer.hpp +++ b/src/frontends/pytorch/src/transforms/aten_index_put_replacer.hpp @@ -4,7 +4,6 @@ #pragma once -#include "openvino/frontend/pytorch/visibility.hpp" #include "openvino/pass/graph_rewrite.hpp" #include "openvino/pass/pass.hpp" @@ -13,7 +12,7 @@ namespace frontend { namespace pytorch { namespace pass { -class PYTORCH_API AtenIndexPutReplacer : public ov::pass::MatcherPass { +class AtenIndexPutReplacer : public ov::pass::MatcherPass { public: OPENVINO_MATCHER_PASS_RTTI("ov::frontend::pytorch::pass::AtenIndexPutReplacer"); AtenIndexPutReplacer(); diff --git a/src/frontends/pytorch/src/transforms/aten_index_replacer.hpp b/src/frontends/pytorch/src/transforms/aten_index_replacer.hpp index 2aa81063f4e05f..5d1eb662dc687b 100644 --- a/src/frontends/pytorch/src/transforms/aten_index_replacer.hpp +++ b/src/frontends/pytorch/src/transforms/aten_index_replacer.hpp @@ -4,7 +4,6 @@ #pragma once -#include "openvino/frontend/pytorch/visibility.hpp" #include "openvino/pass/graph_rewrite.hpp" #include "openvino/pass/pass.hpp" @@ -14,7 +13,7 @@ namespace pytorch { namespace pass { // This transformation replaces pattern prim::ListConstruct->aten::index -class PYTORCH_API AtenIndexToSelect : public ov::pass::MatcherPass { +class AtenIndexToSelect : public ov::pass::MatcherPass { public: OPENVINO_MATCHER_PASS_RTTI("ov::frontend::pytorch::pass::AtenIndexToSelect"); AtenIndexToSelect(); diff --git a/src/frontends/tensorflow/include/openvino/frontend/tensorflow/extension/conversion.hpp b/src/frontends/tensorflow/include/openvino/frontend/tensorflow/extension/conversion.hpp index 0b6b2aa5414fc4..cec9d2b872c345 100644 --- a/src/frontends/tensorflow/include/openvino/frontend/tensorflow/extension/conversion.hpp +++ b/src/frontends/tensorflow/include/openvino/frontend/tensorflow/extension/conversion.hpp @@ -13,7 +13,7 @@ namespace ov { namespace frontend { namespace tensorflow { -class TENSORFLOW_API ConversionExtension : public ConversionExtensionBase { +class TENSORFLOW_FRONTEND_API ConversionExtension : public ConversionExtensionBase { public: using Ptr = std::shared_ptr; diff --git a/src/frontends/tensorflow/include/openvino/frontend/tensorflow/frontend.hpp b/src/frontends/tensorflow/include/openvino/frontend/tensorflow/frontend.hpp index 8ba6326b493236..d0bd54b07f8cc8 100644 --- a/src/frontends/tensorflow/include/openvino/frontend/tensorflow/frontend.hpp +++ b/src/frontends/tensorflow/include/openvino/frontend/tensorflow/frontend.hpp @@ -21,7 +21,7 @@ namespace ov { namespace frontend { namespace tensorflow { -class TENSORFLOW_API FrontEnd : public ov::frontend::FrontEnd { +class TENSORFLOW_FRONTEND_API FrontEnd : public ov::frontend::FrontEnd { public: using Ptr = std::shared_ptr; FrontEnd(); diff --git a/src/frontends/tensorflow/include/openvino/frontend/tensorflow/graph_iterator.hpp b/src/frontends/tensorflow/include/openvino/frontend/tensorflow/graph_iterator.hpp new file mode 100644 index 00000000000000..079b423dab3f28 --- /dev/null +++ b/src/frontends/tensorflow/include/openvino/frontend/tensorflow/graph_iterator.hpp @@ -0,0 +1,17 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/frontend/graph_iterator.hpp" + +namespace ov { +namespace frontend { +namespace tensorflow { + +using ov::frontend::GraphIterator; + +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow/include/openvino/frontend/tensorflow/node_context.hpp b/src/frontends/tensorflow/include/openvino/frontend/tensorflow/node_context.hpp index 84ab6dac099916..1f475aee50831f 100644 --- a/src/frontends/tensorflow/include/openvino/frontend/tensorflow/node_context.hpp +++ b/src/frontends/tensorflow/include/openvino/frontend/tensorflow/node_context.hpp @@ -19,7 +19,7 @@ class TranslateSession; /// Keep necessary data for a single node in the original FW graph to facilitate /// conversion process in the rules code. -class TENSORFLOW_API NodeContext : public ov::frontend::NodeContext { +class TENSORFLOW_FRONTEND_API NodeContext : public ov::frontend::NodeContext { public: using Ptr = std::shared_ptr; NodeContext(const std::shared_ptr& decoder, diff --git a/src/frontends/tensorflow/include/openvino/frontend/tensorflow/special_types.hpp b/src/frontends/tensorflow/include/openvino/frontend/tensorflow/special_types.hpp index c72ad868f2dfad..4a5f9384ed7604 100644 --- a/src/frontends/tensorflow/include/openvino/frontend/tensorflow/special_types.hpp +++ b/src/frontends/tensorflow/include/openvino/frontend/tensorflow/special_types.hpp @@ -11,7 +11,7 @@ namespace frontend { namespace tensorflow { // Type which is used to express empty list without any specific type -class TENSORFLOW_API EmptyList {}; +class TENSORFLOW_FRONTEND_API EmptyList {}; } // namespace tensorflow } // namespace frontend diff --git a/src/frontends/tensorflow/include/openvino/frontend/tensorflow/visibility.hpp b/src/frontends/tensorflow/include/openvino/frontend/tensorflow/visibility.hpp index 72359026d76d08..9901169bf3ec74 100644 --- a/src/frontends/tensorflow/include/openvino/frontend/tensorflow/visibility.hpp +++ b/src/frontends/tensorflow/include/openvino/frontend/tensorflow/visibility.hpp @@ -7,14 +7,14 @@ #include "openvino/frontend/exception.hpp" #ifdef OPENVINO_STATIC_LIBRARY -# define TENSORFLOW_API -# define TENSORFLOW_C_API +# define TENSORFLOW_FRONTEND_API +# define TENSORFLOW_FRONTEND_C_API #else # ifdef openvino_tensorflow_frontend_EXPORTS -# define TENSORFLOW_API OPENVINO_CORE_EXPORTS -# define TENSORFLOW_C_API OPENVINO_EXTERN_C OPENVINO_CORE_EXPORTS +# define TENSORFLOW_FRONTEND_API OPENVINO_CORE_EXPORTS +# define TENSORFLOW_FRONTEND_C_API OPENVINO_EXTERN_C OPENVINO_CORE_EXPORTS # else -# define TENSORFLOW_API OPENVINO_CORE_IMPORTS -# define TENSORFLOW_C_API OPENVINO_EXTERN_C OPENVINO_CORE_IMPORTS +# define TENSORFLOW_FRONTEND_API OPENVINO_CORE_IMPORTS +# define TENSORFLOW_FRONTEND_C_API OPENVINO_EXTERN_C OPENVINO_CORE_IMPORTS # endif // openvino_tensorflow_frontend_EXPORTS #endif // OPENVINO_STATIC_LIBRARY diff --git a/src/frontends/tensorflow/src/tensorflow.cpp b/src/frontends/tensorflow/src/tensorflow.cpp index 9a3ab3b242828e..7c9e2f727b6aa7 100644 --- a/src/frontends/tensorflow/src/tensorflow.cpp +++ b/src/frontends/tensorflow/src/tensorflow.cpp @@ -6,11 +6,11 @@ #include "openvino/frontend/tensorflow/frontend.hpp" #include "openvino/frontend/tensorflow/visibility.hpp" -TENSORFLOW_C_API ov::frontend::FrontEndVersion get_api_version() { +TENSORFLOW_FRONTEND_C_API ov::frontend::FrontEndVersion get_api_version() { return OV_FRONTEND_API_VERSION; } -TENSORFLOW_C_API void* get_front_end_data() { +TENSORFLOW_FRONTEND_C_API void* get_front_end_data() { auto res = new ov::frontend::FrontEndPluginInfo(); res->m_name = "tf"; res->m_creator = []() { diff --git a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/decoder.hpp b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/decoder.hpp index 49d6f7d576ecf8..424cec1f4d773b 100644 --- a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/decoder.hpp +++ b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/decoder.hpp @@ -14,7 +14,7 @@ namespace ov { namespace frontend { namespace tensorflow_lite { -struct TENSORFLOW_LITE_API TensorMetaInfo { +struct TENSORFLOW_LITE_FRONTEND_API TensorMetaInfo { std::shared_ptr m_quantization_info; std::shared_ptr m_sparsity_info; ov::PartialShape m_partial_shape; @@ -23,11 +23,14 @@ struct TENSORFLOW_LITE_API TensorMetaInfo { std::string m_tensor_name; }; -class TENSORFLOW_LITE_API DecoderBase : public ov::frontend::DecoderBase {}; +class TENSORFLOW_LITE_FRONTEND_API DecoderBase : public ov::frontend::DecoderBase { +public: + ~DecoderBase() override; +}; // DecoderBaseOperation corresponds to operation node to retrieve its attributes and information about input and output // tensors -class TENSORFLOW_LITE_API DecoderBaseOperation : public ov::frontend::tensorflow_lite::DecoderBase { +class TENSORFLOW_LITE_FRONTEND_API DecoderBaseOperation : public ov::frontend::tensorflow_lite::DecoderBase { public: /// \brief Get input tensor name by index /// Operation nodes are connected between each other by tensors. @@ -67,11 +70,13 @@ class TENSORFLOW_LITE_API DecoderBaseOperation : public ov::frontend::tensorflow /// \brief Get a number of outputs virtual size_t get_output_size() const = 0; + + ~DecoderBaseOperation() override; }; // DecoderBaseTensor corresponds to tensor node to retrieve information about type, shapem quantization and sparsity // information -class TENSORFLOW_LITE_API DecoderBaseTensor : public ov::frontend::tensorflow_lite::DecoderBase { +class TENSORFLOW_LITE_FRONTEND_API DecoderBaseTensor : public ov::frontend::tensorflow_lite::DecoderBase { public: /// \brief Get tensor info virtual TensorMetaInfo get_tensor_info() const = 0; @@ -87,6 +92,8 @@ class TENSORFLOW_LITE_API DecoderBaseTensor : public ov::frontend::tensorflow_li /// it must be from 0 to m-1, where m - number of outputs in the model /// if it is not input, returns -1 virtual int64_t get_output_idx() const = 0; + + ~DecoderBaseTensor() override; }; } // namespace tensorflow_lite diff --git a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/extension/conversion.hpp b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/extension/conversion.hpp index 4121f10d98968d..1f87626918277c 100644 --- a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/extension/conversion.hpp +++ b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/extension/conversion.hpp @@ -12,7 +12,7 @@ namespace ov { namespace frontend { namespace tensorflow_lite { -class TENSORFLOW_LITE_API ConversionExtension : public ConversionExtensionBase { +class TENSORFLOW_LITE_FRONTEND_API ConversionExtension : public ConversionExtensionBase { public: using Ptr = std::shared_ptr; diff --git a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/frontend.hpp b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/frontend.hpp index 4487509cd3864b..e2ec2943296a57 100644 --- a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/frontend.hpp +++ b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/frontend.hpp @@ -22,7 +22,7 @@ namespace tensorflow_lite { using CreatorFunction = std::function; using TranslatorDictionaryType = std::map; -class TENSORFLOW_LITE_API FrontEnd : public ov::frontend::FrontEnd { +class TENSORFLOW_LITE_FRONTEND_API FrontEnd : public ov::frontend::FrontEnd { public: FrontEnd(); /// \brief Completely convert the model diff --git a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/graph_iterator.hpp b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/graph_iterator.hpp index 35eb43af74c2d9..0b76336d822fe8 100644 --- a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/graph_iterator.hpp +++ b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/graph_iterator.hpp @@ -24,7 +24,7 @@ namespace tensorflow_lite { /// DecoderBaseOperation (for op 1), ..., DecoderBaseOperation (for op k), /// where n - number of inputs in the model, m - number of outputs in the model k - number of operation nodes. /// NOTE: constants are ignored and no decoder object is returned for constant. -class TENSORFLOW_LITE_API GraphIterator : ::ov::RuntimeAttribute { +class TENSORFLOW_LITE_FRONTEND_API GraphIterator : ::ov::RuntimeAttribute { public: using Ptr = std::shared_ptr; diff --git a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/node_context.hpp b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/node_context.hpp index faa496353c662c..77b1b1b02c0639 100644 --- a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/node_context.hpp +++ b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/node_context.hpp @@ -16,7 +16,7 @@ using SubGraphFuncs = std::vector()>>; /// Keep necessary data for a single node in the original FW graph to facilitate /// conversion process in the rules code. -class TENSORFLOW_LITE_API NodeContext : public ov::frontend::NodeContext { +class TENSORFLOW_LITE_FRONTEND_API NodeContext : public ov::frontend::NodeContext { public: using Ptr = std::shared_ptr; NodeContext(const std::shared_ptr& decoder, const OutputVector& inputs) diff --git a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/quantization_info.hpp b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/quantization_info.hpp index d9232b8d580a53..835aa242c3673d 100644 --- a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/quantization_info.hpp +++ b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/quantization_info.hpp @@ -15,13 +15,11 @@ namespace ov { namespace frontend { namespace tensorflow_lite { -class TENSORFLOW_LITE_API QuantizationInfo : public ov::RuntimeAttribute { +class TENSORFLOW_LITE_FRONTEND_API QuantizationInfo : public ov::RuntimeAttribute { public: OPENVINO_RTTI("QuantizationInfo", "0", RuntimeAttribute); QuantizationInfo() = default; - explicit QuantizationInfo(const std::vector& scale, - const std::vector& zero_point, - const int64_t& axis) + QuantizationInfo(const std::vector& scale, const std::vector& zero_point, const int64_t& axis) : m_scale(scale), m_zero_point(zero_point), m_axis(axis) {} diff --git a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/sparsity_info.hpp b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/sparsity_info.hpp index 440d6a81059c9d..bfcd1d9fc08f41 100644 --- a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/sparsity_info.hpp +++ b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/sparsity_info.hpp @@ -16,7 +16,7 @@ namespace ov { namespace frontend { namespace tensorflow_lite { -class TENSORFLOW_LITE_API SparsityInfo : public ov::RuntimeAttribute { +class TENSORFLOW_LITE_FRONTEND_API SparsityInfo : public ov::RuntimeAttribute { public: OPENVINO_RTTI("SparsityInfo", "0", RuntimeAttribute); @@ -28,13 +28,13 @@ class TENSORFLOW_LITE_API SparsityInfo : public ov::RuntimeAttribute { }; SparsityInfo() = default; - explicit SparsityInfo(const std::vector& shape, - const std::vector& traversal_order, - const std::vector& block_map, - const std::vector& dim_format, - const std::vector& data_desc, - const ov::element::Type target_type, - const uint8_t* values) + SparsityInfo(const std::vector& shape, + const std::vector& traversal_order, + const std::vector& block_map, + const std::vector& dim_format, + const std::vector& data_desc, + const ov::element::Type target_type, + const uint8_t* values) : m_shape(shape), m_traversal_order(traversal_order), m_block_map(block_map), diff --git a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/visibility.hpp b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/visibility.hpp index 381936419fb3aa..116c593ebfe9c9 100644 --- a/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/visibility.hpp +++ b/src/frontends/tensorflow_lite/include/openvino/frontend/tensorflow_lite/visibility.hpp @@ -7,14 +7,14 @@ #include "openvino/frontend/exception.hpp" #ifdef OPENVINO_STATIC_LIBRARY -# define TENSORFLOW_LITE_API -# define TENSORFLOW_LITE_C_API +# define TENSORFLOW_LITE_FRONTEND_API +# define TENSORFLOW_LITE_FRONTEND_C_API #else # ifdef openvino_tensorflow_lite_frontend_EXPORTS -# define TENSORFLOW_LITE_API OPENVINO_CORE_EXPORTS -# define TENSORFLOW_LITE_C_API OPENVINO_EXTERN_C OPENVINO_CORE_EXPORTS +# define TENSORFLOW_LITE_FRONTEND_API OPENVINO_CORE_EXPORTS +# define TENSORFLOW_LITE_FRONTEND_C_API OPENVINO_EXTERN_C OPENVINO_CORE_EXPORTS # else -# define TENSORFLOW_LITE_API OPENVINO_CORE_IMPORTS -# define TENSORFLOW_LITE_C_API OPENVINO_EXTERN_C OPENVINO_CORE_IMPORTS +# define TENSORFLOW_LITE_FRONTEND_API OPENVINO_CORE_IMPORTS +# define TENSORFLOW_LITE_FRONTEND_C_API OPENVINO_EXTERN_C OPENVINO_CORE_IMPORTS # endif // openvino_tensorflow_lite_frontend_EXPORTS #endif // OPENVINO_STATIC_LIBRARY diff --git a/src/frontends/tensorflow_lite/src/decoder.cpp b/src/frontends/tensorflow_lite/src/decoder.cpp new file mode 100644 index 00000000000000..e9ce02275e5961 --- /dev/null +++ b/src/frontends/tensorflow_lite/src/decoder.cpp @@ -0,0 +1,13 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/frontend/tensorflow_lite/decoder.hpp" + +using namespace ov::frontend::tensorflow_lite; + +DecoderBase::~DecoderBase() = default; + +DecoderBaseOperation::~DecoderBaseOperation() = default; + +DecoderBaseTensor::~DecoderBaseTensor() = default; diff --git a/src/frontends/tensorflow_lite/src/tensorflow_lite.cpp b/src/frontends/tensorflow_lite/src/tensorflow_lite.cpp index 2008a5bdc712c6..4d0b247f8708ef 100644 --- a/src/frontends/tensorflow_lite/src/tensorflow_lite.cpp +++ b/src/frontends/tensorflow_lite/src/tensorflow_lite.cpp @@ -6,11 +6,11 @@ #include "openvino/frontend/tensorflow_lite/frontend.hpp" #include "openvino/frontend/tensorflow_lite/visibility.hpp" -TENSORFLOW_LITE_C_API ov::frontend::FrontEndVersion get_api_version() { +TENSORFLOW_LITE_FRONTEND_C_API ov::frontend::FrontEndVersion get_api_version() { return OV_FRONTEND_API_VERSION; } -TENSORFLOW_LITE_C_API void* get_front_end_data() { +TENSORFLOW_LITE_FRONTEND_C_API void* get_front_end_data() { auto res = new ov::frontend::FrontEndPluginInfo(); res->m_name = "tflite"; res->m_creator = []() { diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp index 113fa73e979b1b..e725f3658dfda2 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp @@ -150,7 +150,6 @@ class ExecutionConfig { void apply_performance_hints(const cldnn::device_info& info); void apply_priority_hints(const cldnn::device_info& info); void apply_debug_options(const cldnn::device_info& info); - void update_specific_default_properties(const cldnn::device_info& info); template void apply_rt_info_property(const ov::Property& property, const ov::RTMap& rt_info) { @@ -169,7 +168,7 @@ class ExecutionConfig { std::map supported_properties; std::map property_validators; - bool specific_default_properties_is_set = false; + bool finalized = false; }; } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/paged_attention.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/paged_attention.cpp index 1bcd4b0bb10fe2..0f285f57e9d18f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/paged_attention.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/paged_attention.cpp @@ -346,7 +346,7 @@ struct paged_attention_impl : multi_stage_primitive { std::vector res_events; std::vector dep_events = events; - if (has_rotated_blocks) { + if (has_rotated_blocks && !_kernels_data[Stage::KV_CACHE_ROTATE].kernels[0].skip_execution) { execute_stage(dep_events, instance, res_events, Stage::KV_CACHE_ROTATE, is_mixed_mode); dep_events = res_events; } diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 5072740240e2a5..a8224c2e363f62 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -530,6 +530,7 @@ void program::init_graph() { node->get_output_layouts(); if (node->is_type()) { _config.set_property(ov::intel_gpu::use_onednn(true)); + _config.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); } } // Perform initial shape_of subgraphs markup diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index f98ffd0128bf6a..a02125d3b6cc18 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -28,6 +28,7 @@ #include "openvino/core/deprecated.hpp" #include "openvino/op/gather.hpp" #include "openvino/op/concat.hpp" +#include "openvino/op/paged_attention.hpp" #include "openvino/pass/manager.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "openvino/pass/pattern/op/or.hpp" @@ -84,7 +85,8 @@ const auto is_llm = [](const std::shared_ptr& model) -> bool { auto kvcache_matcher = std::make_shared(present, "KVCacheMatcher"); for (auto& op : model->get_ordered_ops()) { - if (kvcache_matcher->match(op)) { + if (kvcache_matcher->match(op) || + ov::is_type(op)) { return true; } } @@ -238,10 +240,12 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< auto context_impl = get_context_impl(context); auto device_id = ov::DeviceIDParser{context_impl->get_device_name()}.get_device_id(); - OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] LoadExeNetworkImpl: Couldn't find config for GPU with id ", device_id); + OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] compile_model: Couldn't find config for GPU with id ", device_id); ExecutionConfig config = m_configs_map.at(device_id); config.set_user_property(orig_config); + if (model->has_rt_info("runtime_options")) + config.apply_rt_info(context_impl->get_engine().get_device_info(), model->get_rt_info("runtime_options"), is_llm(model)); config.apply_user_properties(context_impl->get_engine().get_device_info()); set_cache_info(model, config); diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 89edba4a69eee1..51fadb49c286e7 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -5,6 +5,7 @@ #include "intel_gpu/runtime/execution_config.hpp" #include "intel_gpu/runtime/debug_configuration.hpp" #include "openvino/runtime/internal_properties.hpp" +#include "openvino/runtime/properties.hpp" #include @@ -59,7 +60,7 @@ void ExecutionConfig::set_default() { std::make_tuple(ov::cache_mode, ov::CacheMode::OPTIMIZE_SPEED), std::make_tuple(ov::cache_encryption_callbacks, EncryptionCallbacks{}), std::make_tuple(ov::hint::dynamic_quantization_group_size, 0), - std::make_tuple(ov::hint::kv_cache_precision, ov::element::undefined), + std::make_tuple(ov::hint::kv_cache_precision, ov::element::f16), std::make_tuple(ov::intel_gpu::hint::enable_kernels_reuse, false), std::make_tuple(ov::weights_path, ""), std::make_tuple(ov::hint::activations_scale_factor, -1.f), @@ -230,26 +231,9 @@ void ExecutionConfig::apply_hints(const cldnn::device_info& info) { apply_debug_options(info); } -void ExecutionConfig::update_specific_default_properties(const cldnn::device_info& info) { - // These default properties should be set once. - if (specific_default_properties_is_set) - return; - specific_default_properties_is_set = true; - - // Enable KV-cache compression by default for non-systolic platforms MFDNN-11755 - if (get_property(ov::hint::kv_cache_precision) == ov::element::undefined && !info.supports_immad) { - set_property(ov::hint::kv_cache_precision(ov::element::i8)); - } - - // Enable dynamic quantization by default for non-systolic platforms - if (get_property(ov::hint::dynamic_quantization_group_size) == 0 && !info.supports_immad) { - set_property(ov::hint::dynamic_quantization_group_size(32)); - } -} - void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) { - // Update specific default properties, call once before internal_properties updated. - update_specific_default_properties(info); + if (finalized) + return; // Copy internal properties before applying hints to ensure that // a property set by hint won't be overriden by a value in user config. @@ -280,6 +264,23 @@ void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) { } } + if (!is_set_by_user(ov::hint::kv_cache_precision) || get_property(ov::hint::kv_cache_precision) == ov::element::undefined) { + if (info.supports_immad) { // MFDNN-11755 + set_property(ov::hint::kv_cache_precision(get_property(ov::hint::inference_precision))); + } else { + // Enable KV-cache compression by default for non-systolic platforms only + set_property(ov::hint::kv_cache_precision(ov::element::i8)); + } + } + + // Enable dynamic quantization by default for non-systolic platforms + if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && + get_property(ov::hint::dynamic_quantization_group_size) == 0 && !info.supports_immad) { + set_property(ov::hint::dynamic_quantization_group_size(32)); + } + + finalized = true; + user_properties.clear(); } @@ -287,7 +288,7 @@ void ExecutionConfig::apply_rt_info(const cldnn::device_info& info, const ov::RT if (!info.supports_immad) { apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); } - if (!info.supports_immad || !is_llm) + if (!is_llm) apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); } diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp index 4945cc8d717be3..32adbeeba273f3 100644 --- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp @@ -539,7 +539,7 @@ class KVCacheIssueTests: public ::testing::Test { auto core = ov::test::utils::PluginCache::get().core(); ov::AnyMap properties = { - ov::hint::kv_cache_precision(ov::element::undefined) + ov::hint::kv_cache_precision(ov::element::f16) }; const size_t n_batch = 1; diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache_sdpa.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache_sdpa.cpp index 7bb4a7385bcdc4..71eeba9f6673a5 100644 --- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache_sdpa.cpp +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache_sdpa.cpp @@ -50,7 +50,7 @@ class SDPAWithKVCacheTest : public ::testing::Test, public ::testing::WithParamI if (p.compressed) { properties.emplace(ov::hint::kv_cache_precision(ov::element::i8)); } else { - properties.emplace(ov::hint::kv_cache_precision(ov::element::undefined)); + properties.emplace(ov::hint::kv_cache_precision(ov::element::f16)); } const size_t n_heads = 16; diff --git a/src/plugins/intel_npu/src/backend/include/zero_remote_tensor.hpp b/src/plugins/intel_npu/src/backend/include/zero_remote_tensor.hpp index 60578f3de64ef0..0e8ed4529a94d3 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_remote_tensor.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_remote_tensor.hpp @@ -26,6 +26,7 @@ class ZeroRemoteTensor final : public RemoteTensor { void* mem = nullptr); void* get_original_memory() const; + ze_context_handle_t get_zero_context_handle() const; ~ZeroRemoteTensor() override; diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp index 034f69f63e4158..aee73a2b73fa31 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp @@ -264,8 +264,7 @@ void ZeroInferRequest::set_remote_tensor_data(const std::shared_ptr( - zeroUtils::extract_object(tensor->get_context()->get_property(), ov::intel_npu::l0_context)); + auto l0_context = tensor->get_zero_context_handle(); if (_initStructs->getContext() != l0_context) { OPENVINO_THROW("Using different context for creating the tensor is not supported"); } @@ -276,7 +275,7 @@ void ZeroInferRequest::set_remote_tensor_data(const std::shared_ptrget_properties(), ov::intel_npu::mem_handle); + auto data = tensor->get_original_memory(); OPENVINO_ASSERT(data, "Empty buffer"); OV_ITT_TASK_NEXT(ZERO_SET_REMOTE_TENSOR, "updateCommandList"); @@ -388,7 +387,7 @@ void ZeroInferRequest::set_tensors(const ov::Output& port, } else { _logger.debug("ZeroInferRequest::set_tensors - remote tensor is used"); - data = zeroUtils::extract_object(remoteTensor->get_properties(), ov::intel_npu::mem_handle); + data = remoteTensor->get_original_memory(); get_level_zero_input(foundPort.idx, i) = tensors.at(i)._ptr; } @@ -530,9 +529,7 @@ void ZeroInferRequest::update_states_if_memory_changed() { if (zeroState->zero_tensor_should_be_updated()) { auto remoteTensor = std::dynamic_pointer_cast(zeroState->get_state()._ptr); - void* userBuffer = !remoteTensor ? zeroState->get_state()->data() - : zeroUtils::extract_object(remoteTensor->get_properties(), - ov::intel_npu::mem_handle); + void* userBuffer = !remoteTensor ? zeroState->get_state()->data() : remoteTensor->get_original_memory(); _pipeline->updateCommandList(_graphInputDescriptors.at(zeroState->get_tensor_index()).idx, userBuffer, @@ -609,10 +606,8 @@ void ZeroInferRequest::infer_async() { auto userBatchRemoteTensor = std::dynamic_pointer_cast(userTensor.at(i)._ptr); - void* userBuffer = !userBatchRemoteTensor - ? userTensor.at(i)->data() - : zeroUtils::extract_object(userBatchRemoteTensor->get_properties(), - ov::intel_npu::mem_handle); + void* userBuffer = !userBatchRemoteTensor ? userTensor.at(i)->data() + : userBatchRemoteTensor->get_original_memory(); if (userBuffer != levelZeroBuffer) { if (userBuffer == nullptr || levelZeroBuffer == nullptr) { @@ -634,10 +629,8 @@ void ZeroInferRequest::infer_async() { for (size_t i = 0; i < userTensor.size(); i++) { auto userBatchRemoteTensor = std::dynamic_pointer_cast(userTensor.at(i)._ptr); - void* userBuffer = !userBatchRemoteTensor - ? userTensor.at(i)->data() - : zeroUtils::extract_object(userBatchRemoteTensor->get_properties(), - ov::intel_npu::mem_handle); + void* userBuffer = !userBatchRemoteTensor ? userTensor.at(i)->data() + : userBatchRemoteTensor->get_original_memory(); std::memcpy(static_cast(levelZeroBuffer) + (i * userTensor.at(i)->get_byte_size()), userBuffer, @@ -650,9 +643,8 @@ void ZeroInferRequest::infer_async() { } auto userRemoteTensor = std::dynamic_pointer_cast(userTensor.at(SINGLE_TENSOR)._ptr); - void* userBuffer = !userRemoteTensor ? userTensor.at(SINGLE_TENSOR)->data() - : zeroUtils::extract_object(userRemoteTensor->get_properties(), - ov::intel_npu::mem_handle); + void* userBuffer = + !userRemoteTensor ? userTensor.at(SINGLE_TENSOR)->data() : userRemoteTensor->get_original_memory(); const auto& levelZeroTensor = get_level_zero_input(inputIndex); if (!is_remote_tensor(levelZeroTensor)) { @@ -701,9 +693,7 @@ void ZeroInferRequest::get_result() { } auto userRemoteTensor = std::dynamic_pointer_cast(userTensor._ptr); - void* userBuffer = !userRemoteTensor ? userTensor->data() - : zeroUtils::extract_object(userRemoteTensor->get_properties(), - ov::intel_npu::mem_handle); + void* userBuffer = !userRemoteTensor ? userTensor->data() : userRemoteTensor->get_original_memory(); const std::shared_ptr& levelZeroTensor = _levelZeroOutputTensors.at(outputIndex); if (!is_remote_tensor(levelZeroTensor)) { diff --git a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp index a01238a899e0dc..9f55897193aeeb 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp @@ -65,7 +65,7 @@ Pipeline::Pipeline(const Config& config, if (remote_tensor == nullptr) { data = input_tensors.at(io_index).at(i)->data(); } else { - data = zeroUtils::extract_object(remote_tensor->get_properties(), ov::intel_npu::mem_handle); + data = remote_tensor->get_original_memory(); } graph->set_argument_value(desc.idx, data); @@ -79,7 +79,7 @@ Pipeline::Pipeline(const Config& config, if (remote_tensor == nullptr) { data = input_tensors.at(io_index).at(0)->data(); } else { - data = zeroUtils::extract_object(remote_tensor->get_properties(), ov::intel_npu::mem_handle); + data = remote_tensor->get_original_memory(); } graph->set_argument_value( @@ -97,7 +97,7 @@ Pipeline::Pipeline(const Config& config, if (remote_tensor == nullptr) { data = output_tensors.at(io_index)->data(); } else { - data = zeroUtils::extract_object(remote_tensor->get_properties(), ov::intel_npu::mem_handle); + data = remote_tensor->get_original_memory(); } graph->set_argument_value( diff --git a/src/plugins/intel_npu/src/backend/src/zero_remote_tensor.cpp b/src/plugins/intel_npu/src/backend/src/zero_remote_tensor.cpp index c218aa14dd10a1..999cfe8114086d 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_remote_tensor.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_remote_tensor.cpp @@ -172,4 +172,8 @@ void* ZeroRemoteTensor::get_original_memory() const { return _data; } +ze_context_handle_t ZeroRemoteTensor::get_zero_context_handle() const { + return _init_structs->getContext(); +} + } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/backend/src/zero_variable_state.cpp b/src/plugins/intel_npu/src/backend/src/zero_variable_state.cpp index 19cabfb4246e5d..442ae3fe9b2f03 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_variable_state.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_variable_state.cpp @@ -46,9 +46,7 @@ void ZeroVariableState::set_state(const ov::SoPtr& new_state) { void ZeroVariableState::reset() { auto remoteTensor = std::dynamic_pointer_cast(m_state._ptr); - void* userBuffer = !remoteTensor - ? m_state->data() - : zeroUtils::extract_object(remoteTensor->get_properties(), ov::intel_npu::mem_handle); + void* userBuffer = !remoteTensor ? m_state->data() : remoteTensor->get_original_memory(); std::memset(userBuffer, 0, m_state->get_byte_size()); } diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp index 0c2367b680851e..e68eb0200a09ce 100644 --- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp +++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp @@ -277,17 +277,6 @@ static inline std::string getLatestBuildError(ze_graph_dditable_ext_curr_t& _gra } } -template -static inline Type extract_object(const ov::AnyMap& params, const ov::Property& p) { - auto itrHandle = params.find(p.name()); - ov::Any res = nullptr; - if (itrHandle == params.end()) { - OPENVINO_THROW("No parameter ", p.name(), " found in parameters map"); - } - res = itrHandle->second; - return res.as(); -} - static inline bool memory_was_allocated_in_the_same_l0_context(ze_context_handle_t hContext, const void* ptr) { ze_memory_allocation_properties_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES;