From de87bcf8c22ae2ec6260dd3d52f72a41fbdfafbd Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Thu, 23 Jan 2025 16:40:22 +0100 Subject: [PATCH 1/2] Do not use infer_request.results --- nncf/openvino/optimized_functions/models.py | 22 +++++++++------------ 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/nncf/openvino/optimized_functions/models.py b/nncf/openvino/optimized_functions/models.py index bbb5e94d9df..e741dc68a72 100644 --- a/nncf/openvino/optimized_functions/models.py +++ b/nncf/openvino/optimized_functions/models.py @@ -134,20 +134,16 @@ def _infer_ov_model( raise ValueError(f"Expected input '{input_name}' to be {expected_dtype}. But found: {actual_dtype}.") # Infer the model - # TODO (Nikita Savelyev): Investigate the approach when we always infer via infer request creation + if compiled_model._infer_request is None: + compiled_model._infer_request = compiled_model.create_infer_request() + infer_request = compiled_model._infer_request + inputs = [inp.data for inp in inputs] - if ov_model_params.return_ov_tensors: - infer_request = compiled_model.create_infer_request() - infer_request.infer( - inputs, share_inputs=ov_model_params.share_inputs, share_outputs=ov_model_params.share_outputs - ) - outputs = [infer_request.get_output_tensor(i) for i in range(len(infer_request.results))] - else: - outputs = compiled_model( - inputs, share_inputs=ov_model_params.share_inputs, share_outputs=ov_model_params.share_outputs - ) - outputs = [outputs[i] for i in range(len(outputs))] - outputs = [Tensor(it) for it in outputs] + outputs = infer_request.infer( + inputs, share_inputs=ov_model_params.share_inputs, share_outputs=ov_model_params.share_outputs + ) + outputs = [infer_request.get_output_tensor(i) for i in range(len(outputs))] + outputs = [Tensor(it if ov_model_params.return_ov_tensors else it.data) for it in outputs] if ov_model_params.release_memory: compiled_model.release_memory() From d86cf5443a14fe24a0f6a12ef7e39961bc5b1602 Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Thu, 23 Jan 2025 17:07:03 +0100 Subject: [PATCH 2/2] Using >= is being deprecated --- nncf/openvino/optimized_functions/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nncf/openvino/optimized_functions/models.py b/nncf/openvino/optimized_functions/models.py index e741dc68a72..dcd59612ba8 100644 --- a/nncf/openvino/optimized_functions/models.py +++ b/nncf/openvino/optimized_functions/models.py @@ -363,7 +363,7 @@ def _build_compress_model( w_max = opset.reduce_max(weight, reduction_axes=reduction_axes, keep_dims=True) w_abs_min, w_max = opset.convert(w_abs_min, ov.Type.f32), opset.convert(w_max, ov.Type.f32) - scale = opset.select(w_abs_min >= w_max, w_abs_min, opset.negative(w_max)) + scale = opset.select(opset.greater_equal(w_abs_min, w_max, w_abs_min), opset.negative(w_max)) scale = divide_op(scale, opset.constant(-level_low, ov.Type.f32)) scale = opset.select(opset.less(opset.abs(scale), eps), eps, scale)