Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Follow up to #2727 #3211

Draft
wants to merge 2 commits into
base: develop
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 10 additions & 14 deletions nncf/openvino/optimized_functions/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,20 +134,16 @@ def _infer_ov_model(
raise ValueError(f"Expected input '{input_name}' to be {expected_dtype}. But found: {actual_dtype}.")

# Infer the model
# TODO (Nikita Savelyev): Investigate the approach when we always infer via infer request creation
if compiled_model._infer_request is None:
compiled_model._infer_request = compiled_model.create_infer_request()
infer_request = compiled_model._infer_request

inputs = [inp.data for inp in inputs]
if ov_model_params.return_ov_tensors:
infer_request = compiled_model.create_infer_request()
infer_request.infer(
inputs, share_inputs=ov_model_params.share_inputs, share_outputs=ov_model_params.share_outputs
)
outputs = [infer_request.get_output_tensor(i) for i in range(len(infer_request.results))]
else:
outputs = compiled_model(
inputs, share_inputs=ov_model_params.share_inputs, share_outputs=ov_model_params.share_outputs
)
outputs = [outputs[i] for i in range(len(outputs))]
outputs = [Tensor(it) for it in outputs]
outputs = infer_request.infer(
inputs, share_inputs=ov_model_params.share_inputs, share_outputs=ov_model_params.share_outputs
)
outputs = [infer_request.get_output_tensor(i) for i in range(len(outputs))]
outputs = [Tensor(it if ov_model_params.return_ov_tensors else it.data) for it in outputs]

if ov_model_params.release_memory:
compiled_model.release_memory()
Expand Down Expand Up @@ -367,7 +363,7 @@ def _build_compress_model(
w_max = opset.reduce_max(weight, reduction_axes=reduction_axes, keep_dims=True)
w_abs_min, w_max = opset.convert(w_abs_min, ov.Type.f32), opset.convert(w_max, ov.Type.f32)

scale = opset.select(w_abs_min >= w_max, w_abs_min, opset.negative(w_max))
scale = opset.select(opset.greater_equal(w_abs_min, w_max, w_abs_min), opset.negative(w_max))
scale = divide_op(scale, opset.constant(-level_low, ov.Type.f32))
scale = opset.select(opset.less(opset.abs(scale), eps), eps, scale)

Expand Down
Loading