diff --git a/nncf/common/hardware/config.py b/nncf/common/hardware/config.py index 799261ced0c..1c93f317d43 100644 --- a/nncf/common/hardware/config.py +++ b/nncf/common/hardware/config.py @@ -176,8 +176,13 @@ def get_qconf_from_hw_config_subdict(quantization_subdict: Dict[str, Any]) -> Qu ), "Invalid value of quantizer parameter `level_high`.\ The parameter must be consistent with other parameters!" + narrow_range = quantization_subdict["narrow_range"] return QuantizerConfig( - num_bits=bits, mode=mode, per_channel=is_per_channel, signedness_to_force=signedness_to_force + num_bits=bits, + mode=mode, + per_channel=is_per_channel, + signedness_to_force=signedness_to_force, + narrow_range=narrow_range, ) @staticmethod diff --git a/nncf/common/hardware/configs/cpu.json b/nncf/common/hardware/configs/cpu.json index ae6a6645044..d7313c26f14 100644 --- a/nncf/common/hardware/configs/cpu.json +++ b/nncf/common/hardware/configs/cpu.json @@ -7,7 +7,8 @@ "mode": [ "symmetric" ], - "granularity": "pertensor" + "granularity": "pertensor", + "narrow_range": false }, "q8_a": { "bits": 8, @@ -15,7 +16,8 @@ "symmetric", "asymmetric" ], - "granularity": "pertensor" + "granularity": "pertensor", + "narrow_range": false }, "q8_a_ch": { "bits": 8, @@ -26,19 +28,22 @@ "granularity": [ "perchannel", "pertensor" - ] + ], + "narrow_range": false }, "q8_w_sym": { "bits": 8, "mode": "symmetric", "level_low": -128, "level_high": 127, - "granularity": ["perchannel", "pertensor"] + "granularity": ["perchannel", "pertensor"], + "narrow_range": true }, "q8_w_asym": { "bits": 8, "mode": "asymmetric", - "granularity": ["perchannel", "pertensor"] + "granularity": ["perchannel", "pertensor"], + "narrow_range": false } } }, diff --git a/nncf/common/hardware/configs/gpu.json b/nncf/common/hardware/configs/gpu.json index 39cb1cc6573..3ef23f083df 100644 --- a/nncf/common/hardware/configs/gpu.json +++ b/nncf/common/hardware/configs/gpu.json @@ -8,7 +8,8 @@ "symmetric", "asymmetric" ], - "granularity": "pertensor" + "granularity": "pertensor", + "narrow_range": false }, "q8_a_ch": { "bits": 8, @@ -19,7 +20,8 @@ "granularity": [ "perchannel", "pertensor" - ] + ], + "narrow_range": false }, "q8_w_sym": { "bits": 8, @@ -29,7 +31,8 @@ "granularity": [ "perchannel", "pertensor" - ] + ], + "narrow_range": true }, "q8_w_asym": { "bits": 8, @@ -37,7 +40,8 @@ "granularity": [ "perchannel", "pertensor" - ] + ], + "narrow_range": false } } }, diff --git a/nncf/common/hardware/configs/npu.json b/nncf/common/hardware/configs/npu.json index 15af02a9f60..5cfcb3d2045 100644 --- a/nncf/common/hardware/configs/npu.json +++ b/nncf/common/hardware/configs/npu.json @@ -7,7 +7,8 @@ "mode": [ "symmetric" ], - "granularity": "pertensor" + "granularity": "pertensor", + "narrow_range": false }, "q8_a": { "bits": 8, @@ -15,7 +16,8 @@ "symmetric", "asymmetric" ], - "granularity": "pertensor" + "granularity": "pertensor", + "narrow_range": false }, "q8_a_ch": { "bits": 8, @@ -26,30 +28,35 @@ "granularity": [ "perchannel", "pertensor" - ] + ], + "narrow_range": false }, "q8_w_sym": { "bits": 8, "mode": "symmetric", "level_low": -128, "level_high": 127, - "granularity": ["perchannel", "pertensor"] + "granularity": ["perchannel", "pertensor"], + "narrow_range": true }, "q8_w_asym": { "bits": 8, "mode": "asymmetric", - "granularity": ["perchannel", "pertensor"] + "granularity": ["perchannel", "pertensor"], + "narrow_range": false }, // 4-bit configs "q4_tn": { "bits": 4, "mode": "symmetric", - "granularity": "pertensor" + "granularity": "pertensor", + "narrow_range": false }, "q4_ch": { "bits": 4, "mode": "symmetric", - "granularity": "perchannel" + "granularity": "perchannel", + "narrow_range": false }, "q4_w": { "bits": 4, @@ -57,13 +64,15 @@ "granularity": [ "perchannel", "pertensor" - ] + ], + "narrow_range": false }, // 2-bit configs "q2_ch": { "bits": 2, "mode": "symmetric", - "granularity": "perchannel" + "granularity": "perchannel", + "narrow_range": false }, "q2_w": { "bits": 2, @@ -71,7 +80,8 @@ "granularity": [ "perchannel", "pertensor" - ] + ], + "narrow_range": false } } }, diff --git a/nncf/common/hardware/configs/template.json b/nncf/common/hardware/configs/template.json index 2e1ccd09b4f..ec581ca0467 100644 --- a/nncf/common/hardware/configs/template.json +++ b/nncf/common/hardware/configs/template.json @@ -16,14 +16,19 @@ "granularity": [ "pertensor", "perchannel" - ] + ], + /* + * Narrow range: should NNCF use 2**num_bits quants or 2**num_bits - 1 + */ + "narrow_range": false }, "q8_sym_tnr_-128_127": { // Alias name for set of hyperparameters "bits": 8, // Number of quantization bits "mode": "symmetric", // Quantization mode "granularity": "pertensor", // Granularity: one scale for output tensor "level_low": -128, // Low quantization level - "level_high": 127 // High quantization level + "level_high": 127, // High quantization level + "narrow_range": false } } }, diff --git a/nncf/common/quantization/config_assignment.py b/nncf/common/quantization/config_assignment.py index 61895aaf0bb..d064a9fdd52 100644 --- a/nncf/common/quantization/config_assignment.py +++ b/nncf/common/quantization/config_assignment.py @@ -47,6 +47,8 @@ def get_scoped_quantizer_config( qconfig.per_channel = config_overrides["per_channel"] if config_overrides.get("signed") is not None: qconfig.signedness_to_force = config_overrides["signed"] + if config_overrides.get("narrow_range") is not None: + qconfig.narrow_range = config_overrides["narrow_range"] return qconfig diff --git a/nncf/common/quantization/quantizer_propagation/graph.py b/nncf/common/quantization/quantizer_propagation/graph.py index e23e2a21bf5..c1b8b260db8 100644 --- a/nncf/common/quantization/quantizer_propagation/graph.py +++ b/nncf/common/quantization/quantizer_propagation/graph.py @@ -1145,6 +1145,10 @@ def is_downstream_quantizer_redundant( (ds_config.per_channel == us_config.per_channel) or (ds_config.per_channel is True and us_config.per_channel is False) ) + + # Strictly prohibit merging of config with different narrow_range params + is_redundant = is_redundant and (ds_config.narrow_range == us_config.narrow_range) + return is_redundant def merge_traverse_fn( diff --git a/nncf/common/quantization/quantizer_propagation/solver.py b/nncf/common/quantization/quantizer_propagation/solver.py index c2861841e6a..a7c135dd320 100644 --- a/nncf/common/quantization/quantizer_propagation/solver.py +++ b/nncf/common/quantization/quantizer_propagation/solver.py @@ -159,6 +159,7 @@ def is_final_qconfig_compatible_to_initial(initial_qconfig: QuantizerConfig): final_qconfig.per_channel == initial_qconfig.per_channel and final_qconfig.mode == initial_qconfig.mode and final_qconfig.num_bits == initial_qconfig.num_bits + and final_qconfig.narrow_range == initial_qconfig.narrow_range and ( final_qconfig.signedness_to_force == initial_qconfig.signedness_to_force or initial_qconfig.signedness_to_force is None @@ -296,7 +297,9 @@ class QuantizerPropagationSolver: """ DEFAULT_QUANTIZATION_TYPES = [ - QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=None, per_channel=False) + QuantizerConfig( + num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=None, per_channel=False, narrow_range=False + ) ] DEFAULT_PROPAGATION_STRATEGY = QuantizerPropagationRule.MERGE_ALL_IN_ONE @@ -1374,7 +1377,7 @@ def get_merged_qconfigs_for_downward_branching_case( Returns a tuple, of which the first node is the qconfig list for the quantizer to be placed above the branching node (i.e. that will affect all of the downward branches), and a list of nodes which are either None (which means that the corresponding branch quantizer has been successfully - merged, or qconfigs list to be set for the corresponding branch quantizer if it cannot be merged (e.g. if + merged), or qconfigs list to be set for the corresponding branch quantizer if it cannot be merged (e.g. if requantization to a lower bitwidth has to be done for this branch) :param potential_qconfigs_for_each_branch: For each branch defines the list of available configurations @@ -1492,7 +1495,8 @@ def __disambiguate_config_list( """ The input list should be sorted in descending order of priority. In case some qconfigs in the list have the same priority, this function will resolve the ambiguity in ordering these qconfigs in the final returned - list. + list. Quantization configs could not contain different narrow range parameters, so it does + not participate in __lt__ method of the QConfigComparator. """ class QConfigComparator: diff --git a/nncf/common/quantization/quantizer_setup.py b/nncf/common/quantization/quantizer_setup.py index ff2f44af96f..180dbfa0b2a 100644 --- a/nncf/common/quantization/quantizer_setup.py +++ b/nncf/common/quantization/quantizer_setup.py @@ -28,7 +28,11 @@ QuantizationPointId = int DEFAULT_QUANTIZER_CONFIG = QuantizerConfig( - num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=None, per_channel=False + num_bits=8, + mode=QuantizationMode.SYMMETRIC, + signedness_to_force=None, + per_channel=False, + narrow_range=False, ) diff --git a/nncf/common/quantization/structs.py b/nncf/common/quantization/structs.py index 507f0779653..3d57806f758 100644 --- a/nncf/common/quantization/structs.py +++ b/nncf/common/quantization/structs.py @@ -18,6 +18,7 @@ from nncf.common.graph import NNCFNodeName from nncf.common.utils.api_marker import api from nncf.config.schemata.defaults import QUANTIZATION_BITS +from nncf.config.schemata.defaults import QUANTIZATION_NARROW_RANGE from nncf.config.schemata.defaults import QUANTIZATION_PER_CHANNEL from nncf.parameters import StrEnum from nncf.parameters import TargetDevice @@ -48,6 +49,7 @@ def __init__( mode: QuantizationScheme = QuantizationScheme.SYMMETRIC, signedness_to_force: Optional[bool] = None, per_channel: bool = QUANTIZATION_PER_CHANNEL, + narrow_range: bool = QUANTIZATION_NARROW_RANGE, ): """ :param num_bits: Bitwidth of the quantization. @@ -61,6 +63,7 @@ def __init__( self.mode = mode self.signedness_to_force = signedness_to_force self.per_channel = per_channel + self.narrow_range = narrow_range def __eq__(self, other: object) -> bool: if not isinstance(other, QuantizerConfig): @@ -68,11 +71,12 @@ def __eq__(self, other: object) -> bool: return self.__dict__ == other.__dict__ def __str__(self) -> str: - return "B:{bits} M:{mode} SGN:{signedness} PC:{per_channel}".format( + return "B:{bits} M:{mode} SGN:{signedness} PC:{per_channel} NR:{narrow_range}".format( bits=self.num_bits, mode="S" if self.mode == QuantizationScheme.SYMMETRIC else "A", signedness="ANY" if self.signedness_to_force is None else ("S" if self.signedness_to_force else "U"), per_channel="Y" if self.per_channel else "N", + narrow_range="Y" if self.narrow_range else "N", ) def __hash__(self) -> int: @@ -93,26 +97,9 @@ def is_valid_requantization_for(self, other: "QuantizerConfig") -> bool: self.mode is QuantizationScheme.ASYMMETRIC and other.mode is QuantizationScheme.SYMMETRIC, self.signedness_to_force is None and other.signedness_to_force is not None, self.signedness_to_force is True and other.signedness_to_force is False, + self.narrow_range != other.narrow_range, ] - if any(fail_conditions): - return False - return True - - def compatible_with_a_unified_scale_linked_qconfig(self, linked_qconfig: "QuantizerConfig") -> bool: - """ - For two configs to be compatible in a unified scale scenario, all of their fundamental parameters - must be aligned. - - :param linked_qconfig: A QuantizerConfig that is compared against the current config. - :return: A boolean value specifying whether `linked_qconfig` is compatible with the current config in terms - of scale unification. - """ - return ( - self.num_bits == linked_qconfig.num_bits - and self.mode == linked_qconfig.mode - and self.signedness_to_force == linked_qconfig.signedness_to_force - and self.per_channel == linked_qconfig.per_channel - ) + return not any(fail_conditions) def is_a_bitwidth_variant(self, other_qconfig: "QuantizerConfig") -> bool: """ @@ -138,6 +125,7 @@ def get_state(self) -> Dict[str, Any]: "mode": self.mode, "signedness_to_force": self.signedness_to_force, "per_channel": self.per_channel, + "narrow_range": self.narrow_range, } @classmethod diff --git a/nncf/config/schemata/defaults.py b/nncf/config/schemata/defaults.py index f3d63a81eea..1aa4a0baab2 100644 --- a/nncf/config/schemata/defaults.py +++ b/nncf/config/schemata/defaults.py @@ -35,6 +35,7 @@ QUANTIZATION_OVERFLOW_FIX = "enable" QUANTIZATION_BITS = 8 QUANTIZATION_PER_CHANNEL = False +QUANTIZATION_NARROW_RANGE = False QUANTIZATION_LOGARITHM_SCALE = False ACTIVATIONS_QUANT_START_EPOCH = 1 diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py index 8dbc6da0d64..fbffe926c8a 100644 --- a/nncf/quantization/algorithms/min_max/algorithm.py +++ b/nncf/quantization/algorithms/min_max/algorithm.py @@ -62,7 +62,6 @@ from nncf.quantization.algorithms.algorithm import Algorithm from nncf.quantization.fake_quantize import calculate_convert_parameters from nncf.quantization.fake_quantize import calculate_quantizer_parameters -from nncf.quantization.fake_quantize import get_quantizer_narrow_range from nncf.quantization.passes import transform_to_inference_graph from nncf.quantization.range_estimator import AggregatorType from nncf.quantization.range_estimator import RangeEstimatorParameters @@ -370,6 +369,8 @@ def _get_quantizer_constraints( constraints["per_channel"] = quantization_params.per_channel if quantization_params.signedness_to_force is not None: constraints["signedness_to_force"] = quantization_params.signedness_to_force + if quantization_params.narrow_range is not None: + constraints["narrow_range"] = quantization_params.narrow_range return QuantizationConstraints(**constraints) @@ -992,7 +993,6 @@ def filter_func(point: StatisticPoint) -> bool: raise nncf.InternalError(f"QConfigs for unified scale group {unified_scale_group} are not equal") qconfig = qconfigs[0] q_group = QuantizerGroup.ACTIVATIONS - narrow_range = get_quantizer_narrow_range(qconfig, q_group) if self._mode is not None: destination_type = self._quantization_params[q_group].destination_type parameters = calculate_convert_parameters( @@ -1004,7 +1004,7 @@ def filter_func(point: StatisticPoint) -> bool: ) unified_ops_list.add(quantization_target_point) continue - parameters = calculate_quantizer_parameters(unified_values, qconfig, q_group, narrow_range) + parameters = calculate_quantizer_parameters(unified_values, qconfig, q_group) commands = self._backend_entity.create_unified_scales_quantizers_insertion_commands( graph, unified_scale_group, qconfig, parameters ) @@ -1029,7 +1029,6 @@ def filter_func(point: StatisticPoint) -> bool: quant_group = QuantizerGroup.ACTIVATIONS half_range = quantization_target_point in quantization_points_overflow_fix - narrow_range = get_quantizer_narrow_range(qconfig, quant_group) statistics = tensor_collector.get_statistics() if statistics.min_values is None or statistics.max_values is None: raise nncf.InternalError(f"Statistics were not collected for the node {target_node_name}") @@ -1042,9 +1041,7 @@ def filter_func(point: StatisticPoint) -> bool: quantization_target_point, parameters ) else: - parameters = calculate_quantizer_parameters( - statistics, qconfig, quant_group, narrow_range, half_range - ) + parameters = calculate_quantizer_parameters(statistics, qconfig, quant_group, half_range) command = self._backend_entity.create_quantizer_insertion_command( graph, quantization_target_point, qconfig, parameters ) diff --git a/nncf/quantization/algorithms/min_max/torch_backend.py b/nncf/quantization/algorithms/min_max/torch_backend.py index f0882ffa92c..dc0097e080b 100644 --- a/nncf/quantization/algorithms/min_max/torch_backend.py +++ b/nncf/quantization/algorithms/min_max/torch_backend.py @@ -22,7 +22,6 @@ from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.commands import TransformationCommand from nncf.common.hardware.config import HWConfig -from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig from nncf.experimental.common.tensor_statistics.collectors import REDUCERS_MAP from nncf.experimental.common.tensor_statistics.collectors import TensorReducerBase @@ -225,10 +224,9 @@ def _create_quantizer( ) -> BaseQuantizer: mode = quantizer_config.mode quantizer_cls = QUANTIZATION_MODULES.get(mode) - narrow_range = target_type == TargetType.OPERATION_WITH_WEIGHTS and mode == QuantizationMode.SYMMETRIC quantizer_spec = PTQuantizerSpec.from_config( quantizer_config, - narrow_range=narrow_range, + narrow_range=quantizer_config.narrow_range, scale_shape=scale_shape, half_range=False, logarithm_scale=False, diff --git a/nncf/quantization/algorithms/min_max/torch_fx_backend.py b/nncf/quantization/algorithms/min_max/torch_fx_backend.py index 9a406f435dd..4742bb51c16 100644 --- a/nncf/quantization/algorithms/min_max/torch_fx_backend.py +++ b/nncf/quantization/algorithms/min_max/torch_fx_backend.py @@ -22,7 +22,6 @@ from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.commands import TransformationCommand from nncf.common.hardware.config import HWConfig -from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode from nncf.common.quantization.structs import QuantizerConfig from nncf.experimental.common.tensor_statistics.collectors import REDUCERS_MAP from nncf.experimental.common.tensor_statistics.collectors import TensorReducerBase @@ -199,10 +198,9 @@ def _create_quantizer( ) -> FakeQuantize: mode = quantizer_config.mode quantizer_cls = QUANTIZATION_MODULES.get(mode) - narrow_range = target_type == TargetType.OPERATION_WITH_WEIGHTS and mode == QuantizationMode.SYMMETRIC quantizer_spec = PTQuantizerSpec.from_config( quantizer_config, - narrow_range=narrow_range, + narrow_range=quantizer_config.narrow_range, scale_shape=scale_shape, half_range=False, logarithm_scale=False, diff --git a/nncf/quantization/fake_quantize.py b/nncf/quantization/fake_quantize.py index ead4463dc86..2d7d8320728 100644 --- a/nncf/quantization/fake_quantize.py +++ b/nncf/quantization/fake_quantize.py @@ -209,25 +209,10 @@ def asymmetric_range( return level_low, level_high -def get_quantizer_narrow_range(quantizer_config: QuantizerConfig, quant_group: QuantizerGroup) -> bool: - """ - Returns narrow_range parameter: True if the range of quantized values is reduced by 1 compared to the - naive case, False otherwise. - - :param quantizer_config: Config of the quantization configuration. - :param quant_group: Group of the quantizer. - :return: narrow_range parameter. - """ - if quantizer_config.mode == QuantizationMode.SYMMETRIC: - return quant_group == QuantizerGroup.WEIGHTS - return False - - def calculate_quantizer_parameters( statistics: MinMaxTensorStatistic, quantizer_config: QuantizerConfig, quant_group: QuantizerGroup, - narrow_range: bool, half_range: bool = False, ) -> FakeQuantizeParameters: """ @@ -236,8 +221,6 @@ def calculate_quantizer_parameters( :param statistics: Collected statistics for the quantized insertion. :param quantizer_config: Config of the quantization configuration. :param quantizer_group: Group of the quantizer. - :param narrow_range: True if the range of quantized values is reduced by 1 compared to the - naive case, False otherwise. :param half_range: If True effectively only a half of a quantizer range is used. False - the full range is used. :return: Parameters of the FakeQuantize layer. @@ -247,16 +230,23 @@ def calculate_quantizer_parameters( if half_range: input_low, input_high, levels = _calculate_scaled_parameters( - min_values, max_values, quantizer_config, quant_group, narrow_range + min_values, + max_values, + quantizer_config, + quant_group, ) else: num_bits = quantizer_config.num_bits if quantizer_config.mode == QuantizationMode.SYMMETRIC: - level_low, level_high = calculate_symmetric_level_ranges(num_bits, signed=True, narrow_range=narrow_range) + level_low, level_high = calculate_symmetric_level_ranges( + num_bits, signed=True, narrow_range=quantizer_config.narrow_range + ) levels = get_num_levels(level_low, level_high) input_low, input_high = symmetric_range(min_values, max_values, levels, quantizer_config, quant_group) else: - level_low, level_high = calculate_asymmetric_level_ranges(num_bits, narrow_range=narrow_range) + level_low, level_high = calculate_asymmetric_level_ranges( + num_bits, narrow_range=quantizer_config.narrow_range + ) levels = get_num_levels(level_low, level_high) input_low, input_high = asymmetric_range(min_values, max_values, quantizer_config, quant_group) @@ -304,7 +294,6 @@ def _calculate_scaled_parameters( max_values: Tensor, quantizer_config: QuantizerConfig, quant_group: QuantizerGroup, - narrow_range: bool, ) -> Tuple[Tensor, Tensor, int]: """ Calculates FakeQuantize layer attributes scaled to effectively use a half range of the quantization range. @@ -313,8 +302,6 @@ def _calculate_scaled_parameters( :param max_values: Maximum values of statistics for the quantizer. :param quantizer_config: Config of the quantization configuration. :param quantizer_group: Group of the quantizer. - :param narrow_range: True if the range of quantized values is reduced by 1 compared to the - naive case, False otherwise. :return: A Tuple input_low: Tensor with minimum limit for input value. input_high: Tensor with maximum limit for input value. @@ -331,7 +318,7 @@ def _calculate_scaled_parameters( input_low, input_high = symmetric_range(min_values, max_values, levels, quantizer_config, quant_group) export_level_low, export_level_high = calculate_symmetric_level_ranges( - num_bits, signed=True, narrow_range=narrow_range + num_bits, signed=True, narrow_range=quantizer_config.narrow_range ) export_levels = get_num_levels(export_level_low, export_level_high) input_high *= (export_levels - 1) / (levels - 1) diff --git a/tests/common/quantization/test_quantizer_propagation_graph.py b/tests/common/quantization/test_quantizer_propagation_graph.py index 60d26e2dbb8..46ef772d159 100644 --- a/tests/common/quantization/test_quantizer_propagation_graph.py +++ b/tests/common/quantization/test_quantizer_propagation_graph.py @@ -986,6 +986,9 @@ def _setup_and_propagate_quantizers(self, qpsg: QPSG) -> QPSG: return qpsg class NoRedundancyState0(RedundantQuantizerMergeTestStruct): + def __init__(self, second_quantizer_q_config: QuantizerConfig): + self._non_compatible_q_config = second_quantizer_q_config + ref_remaining_pq_positions = { InsertionPointGraph.get_post_hook_node_key("2 /C_0"), InsertionPointGraph.get_pre_hook_node_key("5 /F_0"), @@ -1007,7 +1010,7 @@ def _setup_and_propagate_quantizers(self, qpsg: QPSG) -> QPSG: ], ) _ = qpsg.add_propagating_quantizer( - [QuantizerConfig(num_bits=6)], InsertionPointGraph.get_pre_hook_node_key("5 /F_0") + [self._non_compatible_q_config], InsertionPointGraph.get_pre_hook_node_key("5 /F_0") ) return qpsg @@ -1096,7 +1099,8 @@ def _setup_and_propagate_quantizers(self, qpsg: QPSG) -> QPSG: BranchHandlingState0(), MergeState0(), MergeState1(), - NoRedundancyState0(), + NoRedundancyState0(QuantizerConfig(num_bits=6)), + NoRedundancyState0(QuantizerConfig(narrow_range=True)), NoRedundancyState1(), NoRedundancyState2(), ] diff --git a/tests/common/quantization/test_quantizer_propagation_solver.py b/tests/common/quantization/test_quantizer_propagation_solver.py index 99ac4d91979..72a40e3a509 100644 --- a/tests/common/quantization/test_quantizer_propagation_solver.py +++ b/tests/common/quantization/test_quantizer_propagation_solver.py @@ -837,7 +837,45 @@ def test_setup_initial_quantizers_sdpa(self): None, None ]) - }) + }), + # Check configs with different narrow range params + # are not merging + MergeQConfigTestStruct( + branch_qconfig_lists_before_merge=[ + [QuantizerConfig(narrow_range=True)], + [QuantizerConfig(narrow_range=True)], + [QuantizerConfig(narrow_range=False)], + ], + strategy_vs_solution_dict={ + QuantizerPropagationRule.DO_NOT_MERGE_BRANCHES: MergeQConfigSolution( + merge_qconfig_list=None, + branch_qconfig_lists_after_merge=[ + [QuantizerConfig(narrow_range=True)], + [QuantizerConfig(narrow_range=True)], + [QuantizerConfig(narrow_range=False)], + ], ), + QuantizerPropagationRule.MERGE_IF_ALL_BRANCHES_SAME : MergeQConfigSolution( + merge_qconfig_list=None, + branch_qconfig_lists_after_merge=[ + [QuantizerConfig(narrow_range=True)], + [QuantizerConfig(narrow_range=True)], + [QuantizerConfig(narrow_range=False)], + ], ), + QuantizerPropagationRule.MERGE_WITH_POTENTIAL_REQUANTIZATION: MergeQConfigSolution( + merge_qconfig_list=None, + branch_qconfig_lists_after_merge=[ + [QuantizerConfig(narrow_range=True)], + [QuantizerConfig(narrow_range=True)], + [QuantizerConfig(narrow_range=False)], + ], ), + QuantizerPropagationRule.MERGE_ALL_IN_ONE: MergeQConfigSolution( + merge_qconfig_list=None, + branch_qconfig_lists_after_merge=[ + [QuantizerConfig(narrow_range=True)], + [QuantizerConfig(narrow_range=True)], + [QuantizerConfig(narrow_range=False)], + ], ) + }), ] # fmt: skip @staticmethod @@ -1942,3 +1980,65 @@ def test_metatypes_to_ignore(mocker): solver._add_node_to_ignored.assert_called_once() assert "1 B" in solver._add_node_to_ignored.call_args[0] + + +@pytest.mark.parametrize( + ("requanting_qconf", "base_qconf", "is_valid_requant"), + ( + (QuantizerConfig(), QuantizerConfig(), True), + (QuantizerConfig(num_bits=8), QuantizerConfig(num_bits=6), False), + (QuantizerConfig(num_bits=6), QuantizerConfig(num_bits=8), True), + # Technically placing a per-channel quantization after a per-tensor should not break + # anything or limit the set of output values w.r.t to a single per-tensor quantizer. + (QuantizerConfig(num_bits=6, per_channel=True), QuantizerConfig(num_bits=6, per_channel=False), True), + (QuantizerConfig(num_bits=6, per_channel=False), QuantizerConfig(num_bits=6, per_channel=True), True), + (QuantizerConfig(num_bits=5, per_channel=True), QuantizerConfig(num_bits=6, per_channel=False), True), + (QuantizerConfig(num_bits=5, per_channel=False), QuantizerConfig(num_bits=6, per_channel=True), True), + ( + QuantizerConfig(num_bits=5, mode=QuantizationMode.SYMMETRIC), + QuantizerConfig(num_bits=5, mode=QuantizationMode.ASYMMETRIC), + True, + ), + ( + QuantizerConfig(num_bits=5, mode=QuantizationMode.ASYMMETRIC), + QuantizerConfig(num_bits=5, mode=QuantizationMode.SYMMETRIC), + False, + ), + (QuantizerConfig(signedness_to_force=True), QuantizerConfig(), True), + (QuantizerConfig(), QuantizerConfig(signedness_to_force=True), False), + (QuantizerConfig(signedness_to_force=False), QuantizerConfig(), True), + (QuantizerConfig(), QuantizerConfig(signedness_to_force=False), False), + (QuantizerConfig(signedness_to_force=True), QuantizerConfig(signedness_to_force=False), False), + (QuantizerConfig(signedness_to_force=False), QuantizerConfig(signedness_to_force=True), True), + ( + QuantizerConfig(num_bits=4, mode=QuantizationMode.SYMMETRIC, per_channel=False), + QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC, per_channel=True), + True, + ), + ( + QuantizerConfig(num_bits=4, mode=QuantizationMode.SYMMETRIC, per_channel=False), + QuantizerConfig(num_bits=8, mode=QuantizationMode.ASYMMETRIC, per_channel=False), + True, + ), + # Neither of the two configs here can requantize the other + ( + QuantizerConfig(num_bits=6, mode=QuantizationMode.ASYMMETRIC), + QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC), + False, + ), + ( + QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC), + QuantizerConfig(num_bits=6, mode=QuantizationMode.ASYMMETRIC), + False, + ), + # Check narrow range requantization rule + ( + QuantizerConfig(narrow_range=True), + QuantizerConfig(narrow_range=False), + False, + ), + ), +) +def test_quantizer_ordering(requanting_qconf: QuantizerConfig, base_qconf: QuantizerConfig, is_valid_requant: bool): + test_result = requanting_qconf.is_valid_requantization_for(base_qconf) + assert test_result == is_valid_requant diff --git a/tests/cross_fw/shared/json.py b/tests/cross_fw/shared/json.py index ab7172a8c52..74822efaa36 100644 --- a/tests/cross_fw/shared/json.py +++ b/tests/cross_fw/shared/json.py @@ -15,6 +15,13 @@ import numpy as np +from nncf.tensor import Tensor + +try: + import torch +except ModuleNotFoundError: + torch = None + def load_json(stats_path: Path): with open(stats_path, "r", encoding="utf8") as json_file: @@ -29,8 +36,12 @@ def default(self, o): return int(o) if isinstance(o, np.floating): return float(o) + if isinstance(o, Tensor): + o = o.data if isinstance(o, np.ndarray): return o.tolist() + if isinstance(o, torch.Tensor): + return o.cpu().detach().numpy().tolist() return json.JSONEncoder.default(self, o) diff --git a/tests/cross_fw/test_templates/fq_params/fq_params.json b/tests/cross_fw/test_templates/fq_params/fq_params.json index 3aeb683c991..6b704bbf4ce 100644 --- a/tests/cross_fw/test_templates/fq_params/fq_params.json +++ b/tests/cross_fw/test_templates/fq_params/fq_params.json @@ -1,24 +1,10 @@ { - "weights_symmetric_sign_None_per_ch_False_narrow_range_True_hf_range_True": { - "levels": 255, - "input_low": -1.994419813156128, - "input_high": 1.994419813156128, - "output_low": -1.994419813156128, - "output_high": 1.994419813156128 - }, - "weights_symmetric_sign_None_per_ch_False_narrow_range_True_hf_range_False": { - "levels": 255, - "input_low": -0.997209906578064, - "input_high": 0.997209906578064, - "output_low": -0.997209906578064, - "output_high": 0.997209906578064 - }, "weights_symmetric_sign_None_per_ch_False_narrow_range_False_hf_range_True": { "levels": 256, - "input_low": -2.0022718596646167, - "input_high": 2.0022718596646167, - "output_low": -2.0022718596646167, - "output_high": 2.0022718596646167 + "input_low": -2.002271890640259, + "input_high": 2.002271890640259, + "output_low": -2.002271890640259, + "output_high": 2.002271890640259 }, "weights_symmetric_sign_None_per_ch_False_narrow_range_False_hf_range_False": { "levels": 256, @@ -255,5 +241,248 @@ ] ] ] + }, + "weights_symmetric_sign_None_per_ch_False_narrow_range_True_hf_range_True": { + "levels": 255, + "input_low": -1.994419813156128, + "input_high": 1.994419813156128, + "output_low": -1.994419813156128, + "output_high": 1.994419813156128 + }, + "weights_symmetric_sign_None_per_ch_False_narrow_range_True_hf_range_False": { + "levels": 255, + "input_low": -0.997209906578064, + "input_high": 0.997209906578064, + "output_low": -0.997209906578064, + "output_high": 0.997209906578064 + }, + "weights_asymmetric_sign_None_per_ch_False_narrow_range_True_hf_range_False": { + "levels": 255, + "input_low": 0.0, + "input_high": 0.997209906578064, + "output_low": 0.0, + "output_high": 0.997209906578064 + }, + "activations_symmetric_sign_None_per_ch_False_narrow_range_True_hf_range_False": { + "levels": 255, + "input_low": 0.0, + "input_high": 0.997209906578064, + "output_low": 0.0, + "output_high": 0.997209906578064 + }, + "activations_symmetric_sign_None_per_ch_True_narrow_range_True_hf_range_False": { + "levels": 255, + "input_low": [ + [ + [ + [ + 0.0 + ] + ] + ], + [ + [ + [ + 0.0 + ] + ] + ] + ], + "input_high": [ + [ + [ + [ + 0.997209906578064 + ] + ] + ], + [ + [ + [ + 0.9950965046882629 + ] + ] + ] + ], + "output_low": [ + [ + [ + [ + 0.0 + ] + ] + ], + [ + [ + [ + 0.0 + ] + ] + ] + ], + "output_high": [ + [ + [ + [ + 0.997209906578064 + ] + ] + ], + [ + [ + [ + 0.9950965046882629 + ] + ] + ] + ] + }, + "activations_asymmetric_sign_None_per_ch_False_narrow_range_True_hf_range_False": { + "levels": 255, + "input_low": 0.0, + "input_high": 0.997209906578064, + "output_low": 0.0, + "output_high": 0.997209906578064 + }, + "activations_asymmetric_sign_None_per_ch_True_narrow_range_True_hf_range_False": { + "levels": 255, + "input_low": [ + [ + [ + [ + 0.0 + ] + ] + ], + [ + [ + [ + 0.0 + ] + ] + ] + ], + "input_high": [ + [ + [ + [ + 0.997209906578064 + ] + ] + ], + [ + [ + [ + 0.9950965046882629 + ] + ] + ] + ], + "output_low": [ + [ + [ + [ + 0.0 + ] + ] + ], + [ + [ + [ + 0.0 + ] + ] + ] + ], + "output_high": [ + [ + [ + [ + 0.997209906578064 + ] + ] + ], + [ + [ + [ + 0.9950965046882629 + ] + ] + ] + ] + }, + "activations_asymmetric_sign_True_per_ch_False_narrow_range_True_hf_range_False": { + "levels": 255, + "input_low": 0.0, + "input_high": 0.997209906578064, + "output_low": 0.0, + "output_high": 0.997209906578064 + }, + "activations_asymmetric_sign_True_per_ch_True_narrow_range_True_hf_range_False": { + "levels": 255, + "input_low": [ + [ + [ + [ + 0.0 + ] + ] + ], + [ + [ + [ + 0.0 + ] + ] + ] + ], + "input_high": [ + [ + [ + [ + 0.997209906578064 + ] + ] + ], + [ + [ + [ + 0.9950965046882629 + ] + ] + ] + ], + "output_low": [ + [ + [ + [ + 0.0 + ] + ] + ], + [ + [ + [ + 0.0 + ] + ] + ] + ], + "output_high": [ + [ + [ + [ + 0.997209906578064 + ] + ] + ], + [ + [ + [ + 0.9950965046882629 + ] + ] + ] + ] } } \ No newline at end of file diff --git a/tests/cross_fw/test_templates/test_calculate_quantizer_parameters.py b/tests/cross_fw/test_templates/test_calculate_quantizer_parameters.py index 797f107a1a7..d0dada80f76 100644 --- a/tests/cross_fw/test_templates/test_calculate_quantizer_parameters.py +++ b/tests/cross_fw/test_templates/test_calculate_quantizer_parameters.py @@ -84,7 +84,6 @@ def parse_fq_params_to_dict(fq_params): class CaseFQParams: q_config: QuantizerConfig q_group: QuantizerGroup - narrow_range: bool half_range: bool should_fail: bool @@ -94,42 +93,36 @@ class CaseFQParams: CaseFQParams( q_config=QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC), q_group=QuantizerGroup.WEIGHTS, - narrow_range=True, half_range=True, should_fail=False, ), CaseFQParams( q_config=QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC), q_group=QuantizerGroup.WEIGHTS, - narrow_range=True, half_range=False, should_fail=False, ), CaseFQParams( q_config=QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC), q_group=QuantizerGroup.WEIGHTS, - narrow_range=False, half_range=True, should_fail=False, ), CaseFQParams( q_config=QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC), q_group=QuantizerGroup.WEIGHTS, - narrow_range=False, half_range=False, should_fail=False, ), CaseFQParams( q_config=QuantizerConfig(num_bits=8, mode=QuantizationMode.ASYMMETRIC), q_group=QuantizerGroup.WEIGHTS, - narrow_range=False, half_range=True, should_fail=True, ), CaseFQParams( q_config=QuantizerConfig(num_bits=8, mode=QuantizationMode.ASYMMETRIC), q_group=QuantizerGroup.WEIGHTS, - narrow_range=False, half_range=False, should_fail=False, ), @@ -137,55 +130,57 @@ class CaseFQParams: CaseFQParams( q_config=QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC, per_channel=False), q_group=QuantizerGroup.ACTIVATIONS, - narrow_range=False, half_range=False, should_fail=False, ), CaseFQParams( q_config=QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC, per_channel=True), q_group=QuantizerGroup.ACTIVATIONS, - narrow_range=False, half_range=False, should_fail=False, ), CaseFQParams( q_config=QuantizerConfig(num_bits=8, mode=QuantizationMode.ASYMMETRIC, per_channel=False), q_group=QuantizerGroup.ACTIVATIONS, - narrow_range=False, half_range=False, should_fail=False, ), CaseFQParams( q_config=QuantizerConfig(num_bits=8, mode=QuantizationMode.ASYMMETRIC, per_channel=True), q_group=QuantizerGroup.ACTIVATIONS, - narrow_range=False, half_range=False, should_fail=False, ), CaseFQParams( q_config=QuantizerConfig( - num_bits=8, mode=QuantizationMode.ASYMMETRIC, signedness_to_force=True, per_channel=False + num_bits=8, + mode=QuantizationMode.ASYMMETRIC, + signedness_to_force=True, + per_channel=False, ), q_group=QuantizerGroup.ACTIVATIONS, - narrow_range=False, half_range=False, should_fail=False, ), CaseFQParams( q_config=QuantizerConfig( - num_bits=8, mode=QuantizationMode.ASYMMETRIC, signedness_to_force=True, per_channel=True + num_bits=8, + mode=QuantizationMode.ASYMMETRIC, + signedness_to_force=True, + per_channel=True, ), q_group=QuantizerGroup.ACTIVATIONS, - narrow_range=False, half_range=False, should_fail=False, ), CaseFQParams( q_config=QuantizerConfig( - num_bits=8, mode=QuantizationMode.ASYMMETRIC, signedness_to_force=True, per_channel=True + num_bits=8, + mode=QuantizationMode.ASYMMETRIC, + signedness_to_force=True, + per_channel=True, ), q_group=QuantizerGroup.ACTIVATIONS, - narrow_range=False, half_range=True, should_fail=True, ), @@ -198,10 +193,11 @@ def to_nncf_tensor(self, t: np.array): raise NotImplementedError @pytest.mark.parametrize("case_to_test", TO_TEST) - def test_calculate_quantizer_parameters(self, case_to_test): + @pytest.mark.parametrize("narrow_range", [False, True], ids=["", "narrow_range"]) + def test_calculate_quantizer_parameters(self, case_to_test, narrow_range): q_config = case_to_test.q_config + q_config.narrow_range = narrow_range quant_group = case_to_test.q_group - narrow_range = case_to_test.narrow_range half_range = case_to_test.half_range rng = np.random.default_rng(0) @@ -222,11 +218,12 @@ def test_calculate_quantizer_parameters(self, case_to_test): max_values=self.to_nncf_tensor(max_values), ) if not case_to_test.should_fail: - fq_params = calculate_quantizer_parameters(statistics, q_config, quant_group, narrow_range, half_range) + fq_params = calculate_quantizer_parameters(statistics, q_config, quant_group, half_range) # Uncomment lines below to generate reference for new models. + # Does not work with torch backend! # dump_fq_params(fq_params, quant_group, q_config, narrow_range, half_range) - ref_fq_params = read_ref_fq_params(quant_group, q_config, narrow_range, half_range) + ref_fq_params = read_ref_fq_params(quant_group, q_config, q_config.narrow_range, half_range) compare_fq_parameters(fq_params, ref_fq_params) else: with pytest.raises(nncf.ValidationError): - calculate_quantizer_parameters(statistics, q_config, quant_group, narrow_range, half_range) + calculate_quantizer_parameters(statistics, q_config, quant_group, half_range) diff --git a/tests/torch/ptq/test_calculation_quantizer_params.py b/tests/torch/ptq/test_calculation_quantizer_params.py index 83cc3f36e29..60d6f64a385 100644 --- a/tests/torch/ptq/test_calculation_quantizer_params.py +++ b/tests/torch/ptq/test_calculation_quantizer_params.py @@ -29,7 +29,6 @@ from nncf.quantization.algorithms.min_max.torch_backend import PTMinMaxAlgoBackend from nncf.quantization.fake_quantize import FakeQuantizeParameters from nncf.quantization.fake_quantize import calculate_quantizer_parameters -from nncf.quantization.fake_quantize import get_quantizer_narrow_range from nncf.tensor import Tensor from nncf.tensor import functions as fns from nncf.torch.model_creation import wrap_model @@ -47,8 +46,10 @@ class CaseSymParams: per_channel: bool quant_group: QuantizerGroup ref_scale: np.ndarray + narrow_range: bool +# TODO(dlyakhov): test cases for every narrow_range conbination SYM_CASES = ( CaseSymParams( fq_params=FakeQuantizeParameters( @@ -59,6 +60,7 @@ class CaseSymParams: 256, ), per_channel=False, + narrow_range=False, quant_group=QuantizerGroup.ACTIVATIONS, ref_scale=0.49530452, ), @@ -71,6 +73,7 @@ class CaseSymParams: 255, ), per_channel=False, + narrow_range=True, quant_group=QuantizerGroup.WEIGHTS, ref_scale=0.49530452, ), @@ -83,6 +86,7 @@ class CaseSymParams: 256, ), per_channel=True, + narrow_range=False, quant_group=QuantizerGroup.ACTIVATIONS, ref_scale=torch.tensor([0.4797816, 0.49920455, 0.48837382]).reshape(1, 3, 1, 1), ), @@ -95,6 +99,7 @@ class CaseSymParams: 255, ), per_channel=True, + narrow_range=True, quant_group=QuantizerGroup.WEIGHTS, ref_scale=torch.tensor([0.48837382, 0.49530452]).reshape(2, 1, 1, 1), ), @@ -106,7 +111,10 @@ def test_quantizer_params_sym(case_to_test: CaseSymParams): per_ch = case_to_test.per_channel fq_params = case_to_test.fq_params quant_group = case_to_test.quant_group - qconfig = QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC, per_channel=per_ch) + narrow_range = case_to_test.narrow_range + qconfig = QuantizerConfig( + num_bits=8, mode=QuantizationMode.SYMMETRIC, per_channel=per_ch, narrow_range=narrow_range + ) if not per_ch: scale_shape = [1] @@ -135,6 +143,7 @@ class CaseAsymParams: ref_inp_range: np.ndarray +# TODO(dlyakhov): generate test cases with narrow_range=True ASYM_CASES = ( CaseAsymParams( fq_params=FakeQuantizeParameters( @@ -272,9 +281,12 @@ def calculate_statistics(data, mode, qgroup, half_range=False): min_values=Tensor(torch.tensor(min_values)), max_values=Tensor(torch.tensor(max_values)) ) signedness_to_force = True if qgroup == QuantizerGroup.WEIGHTS else None - qconfig = QuantizerConfig(num_bits=8, mode=mode, per_channel=per_ch, signedness_to_force=signedness_to_force) - narrow_range = get_quantizer_narrow_range(qconfig, qgroup) - fq_params = calculate_quantizer_parameters(statistics, qconfig, qgroup, narrow_range, half_range) + narrow_range = mode == QuantizationMode.SYMMETRIC and qgroup == QuantizerGroup.WEIGHTS + qconfig = QuantizerConfig( + num_bits=8, mode=mode, per_channel=per_ch, signedness_to_force=signedness_to_force, narrow_range=narrow_range + ) + + fq_params = calculate_quantizer_parameters(statistics, qconfig, qgroup, half_range) return {"input_low": fq_params.input_low, "input_high": fq_params.input_high} diff --git a/tests/torch/quantization/test_algo_quantization.py b/tests/torch/quantization/test_algo_quantization.py index 0cd1933fbe6..1b242dcb78a 100644 --- a/tests/torch/quantization/test_algo_quantization.py +++ b/tests/torch/quantization/test_algo_quantization.py @@ -26,7 +26,6 @@ from nncf.common.hardware.config import HWConfigType from nncf.common.quantization.structs import NonWeightQuantizerId from nncf.common.quantization.structs import QuantizationScheme as QuantizationMode -from nncf.common.quantization.structs import QuantizerConfig from nncf.common.quantization.structs import WeightQuantizerId from nncf.common.utils.debug import nncf_debug from nncf.torch import create_compressed_model @@ -430,62 +429,6 @@ def test_quantize_inputs(): assert isinstance(quantizer, SymmetricQuantizer) -@pytest.mark.parametrize( - ("requanting_qconf", "base_qconf", "is_valid_requant"), - ( - (QuantizerConfig(), QuantizerConfig(), True), - (QuantizerConfig(num_bits=8), QuantizerConfig(num_bits=6), False), - (QuantizerConfig(num_bits=6), QuantizerConfig(num_bits=8), True), - # Technically placing a per-channel quantization after a per-tensor should not break - # anything or limit the set of output values w.r.t to a single per-tensor quantizer. - (QuantizerConfig(num_bits=6, per_channel=True), QuantizerConfig(num_bits=6, per_channel=False), True), - (QuantizerConfig(num_bits=6, per_channel=False), QuantizerConfig(num_bits=6, per_channel=True), True), - (QuantizerConfig(num_bits=5, per_channel=True), QuantizerConfig(num_bits=6, per_channel=False), True), - (QuantizerConfig(num_bits=5, per_channel=False), QuantizerConfig(num_bits=6, per_channel=True), True), - ( - QuantizerConfig(num_bits=5, mode=QuantizationMode.SYMMETRIC), - QuantizerConfig(num_bits=5, mode=QuantizationMode.ASYMMETRIC), - True, - ), - ( - QuantizerConfig(num_bits=5, mode=QuantizationMode.ASYMMETRIC), - QuantizerConfig(num_bits=5, mode=QuantizationMode.SYMMETRIC), - False, - ), - (QuantizerConfig(signedness_to_force=True), QuantizerConfig(), True), - (QuantizerConfig(), QuantizerConfig(signedness_to_force=True), False), - (QuantizerConfig(signedness_to_force=False), QuantizerConfig(), True), - (QuantizerConfig(), QuantizerConfig(signedness_to_force=False), False), - (QuantizerConfig(signedness_to_force=True), QuantizerConfig(signedness_to_force=False), False), - (QuantizerConfig(signedness_to_force=False), QuantizerConfig(signedness_to_force=True), True), - ( - QuantizerConfig(num_bits=4, mode=QuantizationMode.SYMMETRIC, per_channel=False), - QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC, per_channel=True), - True, - ), - ( - QuantizerConfig(num_bits=4, mode=QuantizationMode.SYMMETRIC, per_channel=False), - QuantizerConfig(num_bits=8, mode=QuantizationMode.ASYMMETRIC, per_channel=False), - True, - ), - # Neither of the two configs here can requantize the other - ( - QuantizerConfig(num_bits=6, mode=QuantizationMode.ASYMMETRIC), - QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC), - False, - ), - ( - QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC), - QuantizerConfig(num_bits=6, mode=QuantizationMode.ASYMMETRIC), - False, - ), - ), -) -def test_quantizer_ordering(requanting_qconf: QuantizerConfig, base_qconf: QuantizerConfig, is_valid_requant: bool): - test_result = requanting_qconf.is_valid_requantization_for(base_qconf) - assert test_result == is_valid_requant - - class QuantizeOutputsTestModel(nn.Module): def __init__(self): super().__init__()