Skip to content

Commit

Permalink
[NPU] Get remote tensors info from methods and not through properties…
Browse files Browse the repository at this point in the history
… to avoid CPU overhead (#28614)

### Details:
- *Get remote tensors info from methods and not through properties to
avoid CPU overhead*

### Tickets:
 - *CVS-160977*

Signed-off-by: Bogdan Pereanu <[email protected]>
  • Loading branch information
pereanub authored Jan 23, 2025
1 parent b65a324 commit dc1d967
Show file tree
Hide file tree
Showing 6 changed files with 20 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class ZeroRemoteTensor final : public RemoteTensor {
void* mem = nullptr);

void* get_original_memory() const;
ze_context_handle_t get_zero_context_handle() const;

~ZeroRemoteTensor() override;

Expand Down
32 changes: 11 additions & 21 deletions src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,8 +264,7 @@ void ZeroInferRequest::set_remote_tensor_data(const std::shared_ptr<ZeroRemoteTe
const bool isInput) {
OV_ITT_TASK_CHAIN(ZERO_SET_REMOTE_TENSOR, itt::domains::LevelZeroBackend, "set_tensor", "set_remote_tensor_data");

auto l0_context = reinterpret_cast<ze_context_handle_t>(
zeroUtils::extract_object(tensor->get_context()->get_property(), ov::intel_npu::l0_context));
auto l0_context = tensor->get_zero_context_handle();
if (_initStructs->getContext() != l0_context) {
OPENVINO_THROW("Using different context for creating the tensor is not supported");
}
Expand All @@ -276,7 +275,7 @@ void ZeroInferRequest::set_remote_tensor_data(const std::shared_ptr<ZeroRemoteTe
if (_pipelineIsCreated) {
_logger.debug("ZeroInferRequest::infer_async - update command list");

auto data = zeroUtils::extract_object(tensor->get_properties(), ov::intel_npu::mem_handle);
auto data = tensor->get_original_memory();
OPENVINO_ASSERT(data, "Empty buffer");

OV_ITT_TASK_NEXT(ZERO_SET_REMOTE_TENSOR, "updateCommandList");
Expand Down Expand Up @@ -388,7 +387,7 @@ void ZeroInferRequest::set_tensors(const ov::Output<const ov::Node>& port,
} else {
_logger.debug("ZeroInferRequest::set_tensors - remote tensor is used");

data = zeroUtils::extract_object(remoteTensor->get_properties(), ov::intel_npu::mem_handle);
data = remoteTensor->get_original_memory();

get_level_zero_input(foundPort.idx, i) = tensors.at(i)._ptr;
}
Expand Down Expand Up @@ -530,9 +529,7 @@ void ZeroInferRequest::update_states_if_memory_changed() {
if (zeroState->zero_tensor_should_be_updated()) {
auto remoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(zeroState->get_state()._ptr);

void* userBuffer = !remoteTensor ? zeroState->get_state()->data()
: zeroUtils::extract_object(remoteTensor->get_properties(),
ov::intel_npu::mem_handle);
void* userBuffer = !remoteTensor ? zeroState->get_state()->data() : remoteTensor->get_original_memory();

_pipeline->updateCommandList(_graphInputDescriptors.at(zeroState->get_tensor_index()).idx,
userBuffer,
Expand Down Expand Up @@ -609,10 +606,8 @@ void ZeroInferRequest::infer_async() {

auto userBatchRemoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(userTensor.at(i)._ptr);

void* userBuffer = !userBatchRemoteTensor
? userTensor.at(i)->data()
: zeroUtils::extract_object(userBatchRemoteTensor->get_properties(),
ov::intel_npu::mem_handle);
void* userBuffer = !userBatchRemoteTensor ? userTensor.at(i)->data()
: userBatchRemoteTensor->get_original_memory();

if (userBuffer != levelZeroBuffer) {
if (userBuffer == nullptr || levelZeroBuffer == nullptr) {
Expand All @@ -634,10 +629,8 @@ void ZeroInferRequest::infer_async() {
for (size_t i = 0; i < userTensor.size(); i++) {
auto userBatchRemoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(userTensor.at(i)._ptr);

void* userBuffer = !userBatchRemoteTensor
? userTensor.at(i)->data()
: zeroUtils::extract_object(userBatchRemoteTensor->get_properties(),
ov::intel_npu::mem_handle);
void* userBuffer = !userBatchRemoteTensor ? userTensor.at(i)->data()
: userBatchRemoteTensor->get_original_memory();

std::memcpy(static_cast<unsigned char*>(levelZeroBuffer) + (i * userTensor.at(i)->get_byte_size()),
userBuffer,
Expand All @@ -650,9 +643,8 @@ void ZeroInferRequest::infer_async() {
}

auto userRemoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(userTensor.at(SINGLE_TENSOR)._ptr);
void* userBuffer = !userRemoteTensor ? userTensor.at(SINGLE_TENSOR)->data()
: zeroUtils::extract_object(userRemoteTensor->get_properties(),
ov::intel_npu::mem_handle);
void* userBuffer =
!userRemoteTensor ? userTensor.at(SINGLE_TENSOR)->data() : userRemoteTensor->get_original_memory();

const auto& levelZeroTensor = get_level_zero_input(inputIndex);
if (!is_remote_tensor(levelZeroTensor)) {
Expand Down Expand Up @@ -701,9 +693,7 @@ void ZeroInferRequest::get_result() {
}

auto userRemoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(userTensor._ptr);
void* userBuffer = !userRemoteTensor ? userTensor->data()
: zeroUtils::extract_object(userRemoteTensor->get_properties(),
ov::intel_npu::mem_handle);
void* userBuffer = !userRemoteTensor ? userTensor->data() : userRemoteTensor->get_original_memory();

const std::shared_ptr<ov::ITensor>& levelZeroTensor = _levelZeroOutputTensors.at(outputIndex);
if (!is_remote_tensor(levelZeroTensor)) {
Expand Down
6 changes: 3 additions & 3 deletions src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ Pipeline::Pipeline(const Config& config,
if (remote_tensor == nullptr) {
data = input_tensors.at(io_index).at(i)->data();
} else {
data = zeroUtils::extract_object(remote_tensor->get_properties(), ov::intel_npu::mem_handle);
data = remote_tensor->get_original_memory();
}

graph->set_argument_value(desc.idx, data);
Expand All @@ -79,7 +79,7 @@ Pipeline::Pipeline(const Config& config,
if (remote_tensor == nullptr) {
data = input_tensors.at(io_index).at(0)->data();
} else {
data = zeroUtils::extract_object(remote_tensor->get_properties(), ov::intel_npu::mem_handle);
data = remote_tensor->get_original_memory();
}

graph->set_argument_value(
Expand All @@ -97,7 +97,7 @@ Pipeline::Pipeline(const Config& config,
if (remote_tensor == nullptr) {
data = output_tensors.at(io_index)->data();
} else {
data = zeroUtils::extract_object(remote_tensor->get_properties(), ov::intel_npu::mem_handle);
data = remote_tensor->get_original_memory();
}

graph->set_argument_value(
Expand Down
4 changes: 4 additions & 0 deletions src/plugins/intel_npu/src/backend/src/zero_remote_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,4 +172,8 @@ void* ZeroRemoteTensor::get_original_memory() const {
return _data;
}

ze_context_handle_t ZeroRemoteTensor::get_zero_context_handle() const {
return _init_structs->getContext();
}

} // namespace intel_npu
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,7 @@ void ZeroVariableState::set_state(const ov::SoPtr<ov::ITensor>& new_state) {
void ZeroVariableState::reset() {
auto remoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(m_state._ptr);

void* userBuffer = !remoteTensor
? m_state->data()
: zeroUtils::extract_object(remoteTensor->get_properties(), ov::intel_npu::mem_handle);
void* userBuffer = !remoteTensor ? m_state->data() : remoteTensor->get_original_memory();

std::memset(userBuffer, 0, m_state->get_byte_size());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -277,17 +277,6 @@ static inline std::string getLatestBuildError(ze_graph_dditable_ext_curr_t& _gra
}
}

template <typename Type>
static inline Type extract_object(const ov::AnyMap& params, const ov::Property<Type>& p) {
auto itrHandle = params.find(p.name());
ov::Any res = nullptr;
if (itrHandle == params.end()) {
OPENVINO_THROW("No parameter ", p.name(), " found in parameters map");
}
res = itrHandle->second;
return res.as<Type>();
}

static inline bool memory_was_allocated_in_the_same_l0_context(ze_context_handle_t hContext, const void* ptr) {
ze_memory_allocation_properties_t desc = {};
desc.stype = ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES;
Expand Down

0 comments on commit dc1d967

Please sign in to comment.