From dc1d9675cbfded2a3b3287081a225992bef6f23e Mon Sep 17 00:00:00 2001 From: Bogdan Pereanu Date: Thu, 23 Jan 2025 13:57:24 +0200 Subject: [PATCH] [NPU] Get remote tensors info from methods and not through properties to avoid CPU overhead (#28614) ### Details: - *Get remote tensors info from methods and not through properties to avoid CPU overhead* ### Tickets: - *CVS-160977* Signed-off-by: Bogdan Pereanu --- .../backend/include/zero_remote_tensor.hpp | 1 + .../src/backend/src/zero_infer_request.cpp | 32 +++++++------------ .../src/backend/src/zero_pipeline.cpp | 6 ++-- .../src/backend/src/zero_remote_tensor.cpp | 4 +++ .../src/backend/src/zero_variable_state.cpp | 4 +-- .../intel_npu/utils/zero/zero_utils.hpp | 11 ------- 6 files changed, 20 insertions(+), 38 deletions(-) diff --git a/src/plugins/intel_npu/src/backend/include/zero_remote_tensor.hpp b/src/plugins/intel_npu/src/backend/include/zero_remote_tensor.hpp index 60578f3de64ef0..0e8ed4529a94d3 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_remote_tensor.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_remote_tensor.hpp @@ -26,6 +26,7 @@ class ZeroRemoteTensor final : public RemoteTensor { void* mem = nullptr); void* get_original_memory() const; + ze_context_handle_t get_zero_context_handle() const; ~ZeroRemoteTensor() override; diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp index 034f69f63e4158..aee73a2b73fa31 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp @@ -264,8 +264,7 @@ void ZeroInferRequest::set_remote_tensor_data(const std::shared_ptr( - zeroUtils::extract_object(tensor->get_context()->get_property(), ov::intel_npu::l0_context)); + auto l0_context = tensor->get_zero_context_handle(); if (_initStructs->getContext() != l0_context) { OPENVINO_THROW("Using different context for creating the tensor is not supported"); } @@ -276,7 +275,7 @@ void ZeroInferRequest::set_remote_tensor_data(const std::shared_ptrget_properties(), ov::intel_npu::mem_handle); + auto data = tensor->get_original_memory(); OPENVINO_ASSERT(data, "Empty buffer"); OV_ITT_TASK_NEXT(ZERO_SET_REMOTE_TENSOR, "updateCommandList"); @@ -388,7 +387,7 @@ void ZeroInferRequest::set_tensors(const ov::Output& port, } else { _logger.debug("ZeroInferRequest::set_tensors - remote tensor is used"); - data = zeroUtils::extract_object(remoteTensor->get_properties(), ov::intel_npu::mem_handle); + data = remoteTensor->get_original_memory(); get_level_zero_input(foundPort.idx, i) = tensors.at(i)._ptr; } @@ -530,9 +529,7 @@ void ZeroInferRequest::update_states_if_memory_changed() { if (zeroState->zero_tensor_should_be_updated()) { auto remoteTensor = std::dynamic_pointer_cast(zeroState->get_state()._ptr); - void* userBuffer = !remoteTensor ? zeroState->get_state()->data() - : zeroUtils::extract_object(remoteTensor->get_properties(), - ov::intel_npu::mem_handle); + void* userBuffer = !remoteTensor ? zeroState->get_state()->data() : remoteTensor->get_original_memory(); _pipeline->updateCommandList(_graphInputDescriptors.at(zeroState->get_tensor_index()).idx, userBuffer, @@ -609,10 +606,8 @@ void ZeroInferRequest::infer_async() { auto userBatchRemoteTensor = std::dynamic_pointer_cast(userTensor.at(i)._ptr); - void* userBuffer = !userBatchRemoteTensor - ? userTensor.at(i)->data() - : zeroUtils::extract_object(userBatchRemoteTensor->get_properties(), - ov::intel_npu::mem_handle); + void* userBuffer = !userBatchRemoteTensor ? userTensor.at(i)->data() + : userBatchRemoteTensor->get_original_memory(); if (userBuffer != levelZeroBuffer) { if (userBuffer == nullptr || levelZeroBuffer == nullptr) { @@ -634,10 +629,8 @@ void ZeroInferRequest::infer_async() { for (size_t i = 0; i < userTensor.size(); i++) { auto userBatchRemoteTensor = std::dynamic_pointer_cast(userTensor.at(i)._ptr); - void* userBuffer = !userBatchRemoteTensor - ? userTensor.at(i)->data() - : zeroUtils::extract_object(userBatchRemoteTensor->get_properties(), - ov::intel_npu::mem_handle); + void* userBuffer = !userBatchRemoteTensor ? userTensor.at(i)->data() + : userBatchRemoteTensor->get_original_memory(); std::memcpy(static_cast(levelZeroBuffer) + (i * userTensor.at(i)->get_byte_size()), userBuffer, @@ -650,9 +643,8 @@ void ZeroInferRequest::infer_async() { } auto userRemoteTensor = std::dynamic_pointer_cast(userTensor.at(SINGLE_TENSOR)._ptr); - void* userBuffer = !userRemoteTensor ? userTensor.at(SINGLE_TENSOR)->data() - : zeroUtils::extract_object(userRemoteTensor->get_properties(), - ov::intel_npu::mem_handle); + void* userBuffer = + !userRemoteTensor ? userTensor.at(SINGLE_TENSOR)->data() : userRemoteTensor->get_original_memory(); const auto& levelZeroTensor = get_level_zero_input(inputIndex); if (!is_remote_tensor(levelZeroTensor)) { @@ -701,9 +693,7 @@ void ZeroInferRequest::get_result() { } auto userRemoteTensor = std::dynamic_pointer_cast(userTensor._ptr); - void* userBuffer = !userRemoteTensor ? userTensor->data() - : zeroUtils::extract_object(userRemoteTensor->get_properties(), - ov::intel_npu::mem_handle); + void* userBuffer = !userRemoteTensor ? userTensor->data() : userRemoteTensor->get_original_memory(); const std::shared_ptr& levelZeroTensor = _levelZeroOutputTensors.at(outputIndex); if (!is_remote_tensor(levelZeroTensor)) { diff --git a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp index a01238a899e0dc..9f55897193aeeb 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp @@ -65,7 +65,7 @@ Pipeline::Pipeline(const Config& config, if (remote_tensor == nullptr) { data = input_tensors.at(io_index).at(i)->data(); } else { - data = zeroUtils::extract_object(remote_tensor->get_properties(), ov::intel_npu::mem_handle); + data = remote_tensor->get_original_memory(); } graph->set_argument_value(desc.idx, data); @@ -79,7 +79,7 @@ Pipeline::Pipeline(const Config& config, if (remote_tensor == nullptr) { data = input_tensors.at(io_index).at(0)->data(); } else { - data = zeroUtils::extract_object(remote_tensor->get_properties(), ov::intel_npu::mem_handle); + data = remote_tensor->get_original_memory(); } graph->set_argument_value( @@ -97,7 +97,7 @@ Pipeline::Pipeline(const Config& config, if (remote_tensor == nullptr) { data = output_tensors.at(io_index)->data(); } else { - data = zeroUtils::extract_object(remote_tensor->get_properties(), ov::intel_npu::mem_handle); + data = remote_tensor->get_original_memory(); } graph->set_argument_value( diff --git a/src/plugins/intel_npu/src/backend/src/zero_remote_tensor.cpp b/src/plugins/intel_npu/src/backend/src/zero_remote_tensor.cpp index c218aa14dd10a1..999cfe8114086d 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_remote_tensor.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_remote_tensor.cpp @@ -172,4 +172,8 @@ void* ZeroRemoteTensor::get_original_memory() const { return _data; } +ze_context_handle_t ZeroRemoteTensor::get_zero_context_handle() const { + return _init_structs->getContext(); +} + } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/backend/src/zero_variable_state.cpp b/src/plugins/intel_npu/src/backend/src/zero_variable_state.cpp index 19cabfb4246e5d..442ae3fe9b2f03 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_variable_state.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_variable_state.cpp @@ -46,9 +46,7 @@ void ZeroVariableState::set_state(const ov::SoPtr& new_state) { void ZeroVariableState::reset() { auto remoteTensor = std::dynamic_pointer_cast(m_state._ptr); - void* userBuffer = !remoteTensor - ? m_state->data() - : zeroUtils::extract_object(remoteTensor->get_properties(), ov::intel_npu::mem_handle); + void* userBuffer = !remoteTensor ? m_state->data() : remoteTensor->get_original_memory(); std::memset(userBuffer, 0, m_state->get_byte_size()); } diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp index 0c2367b680851e..e68eb0200a09ce 100644 --- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp +++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp @@ -277,17 +277,6 @@ static inline std::string getLatestBuildError(ze_graph_dditable_ext_curr_t& _gra } } -template -static inline Type extract_object(const ov::AnyMap& params, const ov::Property& p) { - auto itrHandle = params.find(p.name()); - ov::Any res = nullptr; - if (itrHandle == params.end()) { - OPENVINO_THROW("No parameter ", p.name(), " found in parameters map"); - } - res = itrHandle->second; - return res.as(); -} - static inline bool memory_was_allocated_in_the_same_l0_context(ze_context_handle_t hContext, const void* ptr) { ze_memory_allocation_properties_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES;