From 3fbf0b7ee41b4bafb5aea64ec11df453f50e89ea Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Thu, 23 Jan 2025 17:28:33 +0100 Subject: [PATCH] Use regular division inside Scale Estimation --- nncf/openvino/optimized_functions/functions.py | 12 ++---------- .../weight_compression/scale_estimation.py | 8 -------- .../algorithms/weight_compression/weight_lowering.py | 7 +------ 3 files changed, 3 insertions(+), 24 deletions(-) diff --git a/nncf/openvino/optimized_functions/functions.py b/nncf/openvino/optimized_functions/functions.py index 1a6ca5a3980..02043319cfd 100644 --- a/nncf/openvino/optimized_functions/functions.py +++ b/nncf/openvino/optimized_functions/functions.py @@ -32,7 +32,6 @@ def do_int_quantization( reduction_axes: Optional[ReductionAxes] = None, precomputed_scale: Tensor = None, precomputed_zero_point: Tensor = None, - **kwargs, ) -> Tuple[Tensor, Tensor, Tensor]: """ Quantizes the given weight tensor. @@ -49,10 +48,7 @@ def do_int_quantization( scale_shape = None if precomputed_scale is None else precomputed_scale.shape zero_point_shape = None if precomputed_zero_point is None else precomputed_zero_point.shape - ov_model_params = OVModelParameters( - dynamic_shapes=kwargs.get("dynamic_shapes") is True, - convertable_division=kwargs.get("convertable_division") is True, - ) + ov_model_params = OVModelParameters() ov_model_params.input_dtypes["weight"] = weight.dtype if precomputed_scale is not None: ov_model_params.input_dtypes["scale"] = precomputed_scale.dtype @@ -107,7 +103,6 @@ def quantize_dequantize_weight( precomputed_scale: Optional[Tensor] = None, precomputed_zero_point: Optional[Tensor] = None, return_compressed_weight: Optional[bool] = False, - **kwargs, ) -> Union[Tensor, Tuple[Tensor, Tensor, Tensor, Tensor]]: """ Quantizes the given weight tensor and then dequantizes it back to obtain float32 values. @@ -132,10 +127,7 @@ def quantize_dequantize_weight( scale_shape = precomputed_scale.shape if precomputed_scale is not None else None zero_point_shape = precomputed_zero_point.shape if precomputed_zero_point is not None else None - ov_model_params = OVModelParameters( - dynamic_shapes=kwargs.get("dynamic_shapes") is True, - convertable_division=kwargs.get("convertable_division") is True, - ) + ov_model_params = OVModelParameters() ov_model_params.input_dtypes["weight"] = weight.dtype if precomputed_scale is not None: ov_model_params.input_dtypes["scale"] = precomputed_scale.dtype diff --git a/nncf/quantization/algorithms/weight_compression/scale_estimation.py b/nncf/quantization/algorithms/weight_compression/scale_estimation.py index 1f68e3cbd60..066b529011e 100644 --- a/nncf/quantization/algorithms/weight_compression/scale_estimation.py +++ b/nncf/quantization/algorithms/weight_compression/scale_estimation.py @@ -255,10 +255,6 @@ def calculate_quantization_params( zero_scale = 0.001 zero_mask = zero_scale * zero_mask.astype(original_weight.dtype) - # This is required for alignment with a previous OpenVINO models implementation - # TODO(Nikita Savelyev): remove this - opt_fns_kwargs = dict(dynamic_shapes=False, convertable_division=True) - # iterative rectification of initial scale for i in range(initial_steps): near_to_ideal_scale = estimate_scales(original_weight, target, zero_mask, importance) @@ -273,7 +269,6 @@ def calculate_quantization_params( config, precomputed_scale=near_to_ideal_scale, precomputed_zero_point=zp, - **opt_fns_kwargs, ) q_weights_ = fns.zeros_like(original_weight) + out @@ -308,7 +303,6 @@ def calculate_quantization_params( config, precomputed_scale=near_to_ideal_scale, precomputed_zero_point=zp, - **opt_fns_kwargs, ) compressed_weights = fns.zeros_like(original_weight) + out target, zero_mask = get_target_zero_mask(compressed_weights, zp) @@ -327,7 +321,6 @@ def calculate_quantization_params( config, precomputed_scale=scaled_scale, precomputed_zero_point=zp, - **opt_fns_kwargs, ) compressed_weights = fns.zeros_like(original_weight) + out @@ -345,7 +338,6 @@ def calculate_quantization_params( config, precomputed_scale=near_to_ideal_scale, precomputed_zero_point=zp, - **opt_fns_kwargs, ) q_weights_ = fns.zeros_like(original_weight) + out diff --git a/nncf/quantization/algorithms/weight_compression/weight_lowering.py b/nncf/quantization/algorithms/weight_compression/weight_lowering.py index 012d308087c..bc73364efab 100644 --- a/nncf/quantization/algorithms/weight_compression/weight_lowering.py +++ b/nncf/quantization/algorithms/weight_compression/weight_lowering.py @@ -431,7 +431,6 @@ def do_int_quantization( reduction_axes: Optional[ReductionAxes] = None, precomputed_scale: Tensor = None, precomputed_zero_point: Tensor = None, - **kwargs, ) -> Tuple[Tensor, Tensor, Tensor]: """ Performs integer quantization on the given weight tensor. @@ -461,9 +460,7 @@ def do_int_quantization( if is_openvino_available() and weight.backend in [TensorBackend.ov, TensorBackend.numpy]: from nncf.openvino.optimized_functions import do_int_quantization as do_int_quantization_ov - return do_int_quantization_ov( - weight, config, reduction_axes, precomputed_scale, precomputed_zero_point, **kwargs - ) + return do_int_quantization_ov(weight, config, reduction_axes, precomputed_scale, precomputed_zero_point) if not is_openvino_available() and weight.backend in [TensorBackend.ov, TensorBackend.numpy]: nncf_logger.info_once( "OpenVINO optimizations are disabled. Install OpenVINO to enable them and improve the performance." @@ -496,7 +493,6 @@ def quantize_dequantize_weight( precomputed_scale: Optional[Tensor] = None, precomputed_zero_point: Optional[Tensor] = None, return_compressed_weight: Optional[bool] = False, - **kwargs, ) -> Union[Tensor, Tuple[Tensor, Tensor, Tensor, Tensor]]: """ First quantizes the given weight tensor and then dequantizes it back to obtain float32 values. @@ -522,7 +518,6 @@ def quantize_dequantize_weight( precomputed_scale, precomputed_zero_point, return_compressed_weight, - **kwargs, ) if not is_openvino_available() and weight.backend in [TensorBackend.ov, TensorBackend.numpy]: nncf_logger.info_once(