Skip to content

Commit

Permalink
[HWConfig] narrow_range parameter is introduced in hardware config
Browse files Browse the repository at this point in the history
  • Loading branch information
daniil-lyakhov committed Jan 17, 2025
1 parent 0b80812 commit a9a0c19
Show file tree
Hide file tree
Showing 22 changed files with 496 additions and 188 deletions.
7 changes: 6 additions & 1 deletion nncf/common/hardware/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,13 @@ def get_qconf_from_hw_config_subdict(quantization_subdict: Dict[str, Any]) -> Qu
), "Invalid value of quantizer parameter `level_high`.\
The parameter must be consistent with other parameters!"

narrow_range = quantization_subdict["narrow_range"]
return QuantizerConfig(
num_bits=bits, mode=mode, per_channel=is_per_channel, signedness_to_force=signedness_to_force
num_bits=bits,
mode=mode,
per_channel=is_per_channel,
signedness_to_force=signedness_to_force,
narrow_range=narrow_range,
)

@staticmethod
Expand Down
15 changes: 10 additions & 5 deletions nncf/common/hardware/configs/cpu.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,17 @@
"mode": [
"symmetric"
],
"granularity": "pertensor"
"granularity": "pertensor",
"narrow_range": false
},
"q8_a": {
"bits": 8,
"mode": [
"symmetric",
"asymmetric"
],
"granularity": "pertensor"
"granularity": "pertensor",
"narrow_range": false
},
"q8_a_ch": {
"bits": 8,
Expand All @@ -26,19 +28,22 @@
"granularity": [
"perchannel",
"pertensor"
]
],
"narrow_range": false
},
"q8_w_sym": {
"bits": 8,
"mode": "symmetric",
"level_low": -128,
"level_high": 127,
"granularity": ["perchannel", "pertensor"]
"granularity": ["perchannel", "pertensor"],
"narrow_range": true
},
"q8_w_asym": {
"bits": 8,
"mode": "asymmetric",
"granularity": ["perchannel", "pertensor"]
"granularity": ["perchannel", "pertensor"],
"narrow_range": false
}
}
},
Expand Down
12 changes: 8 additions & 4 deletions nncf/common/hardware/configs/gpu.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
"symmetric",
"asymmetric"
],
"granularity": "pertensor"
"granularity": "pertensor",
"narrow_range": false
},
"q8_a_ch": {
"bits": 8,
Expand All @@ -19,7 +20,8 @@
"granularity": [
"perchannel",
"pertensor"
]
],
"narrow_range": false
},
"q8_w_sym": {
"bits": 8,
Expand All @@ -29,15 +31,17 @@
"granularity": [
"perchannel",
"pertensor"
]
],
"narrow_range": true
},
"q8_w_asym": {
"bits": 8,
"mode": "asymmetric",
"granularity": [
"perchannel",
"pertensor"
]
],
"narrow_range": false
}
}
},
Expand Down
30 changes: 20 additions & 10 deletions nncf/common/hardware/configs/npu.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,17 @@
"mode": [
"symmetric"
],
"granularity": "pertensor"
"granularity": "pertensor",
"narrow_range": false
},
"q8_a": {
"bits": 8,
"mode": [
"symmetric",
"asymmetric"
],
"granularity": "pertensor"
"granularity": "pertensor",
"narrow_range": false
},
"q8_a_ch": {
"bits": 8,
Expand All @@ -26,52 +28,60 @@
"granularity": [
"perchannel",
"pertensor"
]
],
"narrow_range": false
},
"q8_w_sym": {
"bits": 8,
"mode": "symmetric",
"level_low": -128,
"level_high": 127,
"granularity": ["perchannel", "pertensor"]
"granularity": ["perchannel", "pertensor"],
"narrow_range": true
},
"q8_w_asym": {
"bits": 8,
"mode": "asymmetric",
"granularity": ["perchannel", "pertensor"]
"granularity": ["perchannel", "pertensor"],
"narrow_range": false
},
// 4-bit configs
"q4_tn": {
"bits": 4,
"mode": "symmetric",
"granularity": "pertensor"
"granularity": "pertensor",
"narrow_range": false
},
"q4_ch": {
"bits": 4,
"mode": "symmetric",
"granularity": "perchannel"
"granularity": "perchannel",
"narrow_range": false
},
"q4_w": {
"bits": 4,
"mode": "symmetric",
"granularity": [
"perchannel",
"pertensor"
]
],
"narrow_range": false
},
// 2-bit configs
"q2_ch": {
"bits": 2,
"mode": "symmetric",
"granularity": "perchannel"
"granularity": "perchannel",
"narrow_range": false
},
"q2_w": {
"bits": 2,
"mode": "symmetric",
"granularity": [
"perchannel",
"pertensor"
]
],
"narrow_range": false
}
}
},
Expand Down
9 changes: 7 additions & 2 deletions nncf/common/hardware/configs/template.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,19 @@
"granularity": [
"pertensor",
"perchannel"
]
],
/*
* Narrow range: should NNCF use 2**num_bits quants or 2**num_bits - 1
*/
"narrow_range": false
},
"q8_sym_tnr_-128_127": { // Alias name for set of hyperparameters
"bits": 8, // Number of quantization bits
"mode": "symmetric", // Quantization mode
"granularity": "pertensor", // Granularity: one scale for output tensor
"level_low": -128, // Low quantization level
"level_high": 127 // High quantization level
"level_high": 127, // High quantization level
"narrow_range": false
}
}
},
Expand Down
2 changes: 2 additions & 0 deletions nncf/common/quantization/config_assignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ def get_scoped_quantizer_config(
qconfig.per_channel = config_overrides["per_channel"]
if config_overrides.get("signed") is not None:
qconfig.signedness_to_force = config_overrides["signed"]
if config_overrides.get("narrow_range") is not None:
qconfig.narrow_range = config_overrides["narrow_range"]
return qconfig


Expand Down
4 changes: 4 additions & 0 deletions nncf/common/quantization/quantizer_propagation/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -1145,6 +1145,10 @@ def is_downstream_quantizer_redundant(
(ds_config.per_channel == us_config.per_channel)
or (ds_config.per_channel is True and us_config.per_channel is False)
)

# Strictly prohibit merging of config with different narrow_range params
is_redundant = is_redundant and (ds_config.narrow_range == us_config.narrow_range)

return is_redundant

def merge_traverse_fn(
Expand Down
10 changes: 7 additions & 3 deletions nncf/common/quantization/quantizer_propagation/solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ def is_final_qconfig_compatible_to_initial(initial_qconfig: QuantizerConfig):
final_qconfig.per_channel == initial_qconfig.per_channel
and final_qconfig.mode == initial_qconfig.mode
and final_qconfig.num_bits == initial_qconfig.num_bits
and final_qconfig.narrow_range == initial_qconfig.narrow_range
and (
final_qconfig.signedness_to_force == initial_qconfig.signedness_to_force
or initial_qconfig.signedness_to_force is None
Expand Down Expand Up @@ -296,7 +297,9 @@ class QuantizerPropagationSolver:
"""

DEFAULT_QUANTIZATION_TYPES = [
QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=None, per_channel=False)
QuantizerConfig(
num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=None, per_channel=False, narrow_range=False
)
]

DEFAULT_PROPAGATION_STRATEGY = QuantizerPropagationRule.MERGE_ALL_IN_ONE
Expand Down Expand Up @@ -1374,7 +1377,7 @@ def get_merged_qconfigs_for_downward_branching_case(
Returns a tuple, of which the first node is the qconfig list for the quantizer to be placed
above the branching node (i.e. that will affect all of the downward branches), and a list
of nodes which are either None (which means that the corresponding branch quantizer has been successfully
merged, or qconfigs list to be set for the corresponding branch quantizer if it cannot be merged (e.g. if
merged), or qconfigs list to be set for the corresponding branch quantizer if it cannot be merged (e.g. if
requantization to a lower bitwidth has to be done for this branch)
:param potential_qconfigs_for_each_branch: For each branch defines the list of available configurations
Expand Down Expand Up @@ -1492,7 +1495,8 @@ def __disambiguate_config_list(
"""
The input list should be sorted in descending order of priority. In case some qconfigs in the list have the
same priority, this function will resolve the ambiguity in ordering these qconfigs in the final returned
list.
list. Quantization configs could not contain different narrow range parameters, so it does
not participate in __lt__ method of the QConfigComparator.
"""

class QConfigComparator:
Expand Down
6 changes: 5 additions & 1 deletion nncf/common/quantization/quantizer_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@
QuantizationPointId = int

DEFAULT_QUANTIZER_CONFIG = QuantizerConfig(
num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=None, per_channel=False
num_bits=8,
mode=QuantizationMode.SYMMETRIC,
signedness_to_force=None,
per_channel=False,
narrow_range=False,
)


Expand Down
28 changes: 8 additions & 20 deletions nncf/common/quantization/structs.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from nncf.common.graph import NNCFNodeName
from nncf.common.utils.api_marker import api
from nncf.config.schemata.defaults import QUANTIZATION_BITS
from nncf.config.schemata.defaults import QUANTIZATION_NARROW_RANGE
from nncf.config.schemata.defaults import QUANTIZATION_PER_CHANNEL
from nncf.parameters import StrEnum
from nncf.parameters import TargetDevice
Expand Down Expand Up @@ -48,6 +49,7 @@ def __init__(
mode: QuantizationScheme = QuantizationScheme.SYMMETRIC,
signedness_to_force: Optional[bool] = None,
per_channel: bool = QUANTIZATION_PER_CHANNEL,
narrow_range: bool = QUANTIZATION_NARROW_RANGE,
):
"""
:param num_bits: Bitwidth of the quantization.
Expand All @@ -61,18 +63,20 @@ def __init__(
self.mode = mode
self.signedness_to_force = signedness_to_force
self.per_channel = per_channel
self.narrow_range = narrow_range

def __eq__(self, other: object) -> bool:
if not isinstance(other, QuantizerConfig):
return False
return self.__dict__ == other.__dict__

def __str__(self) -> str:
return "B:{bits} M:{mode} SGN:{signedness} PC:{per_channel}".format(
return "B:{bits} M:{mode} SGN:{signedness} PC:{per_channel} NR:{narrow_range}".format(
bits=self.num_bits,
mode="S" if self.mode == QuantizationScheme.SYMMETRIC else "A",
signedness="ANY" if self.signedness_to_force is None else ("S" if self.signedness_to_force else "U"),
per_channel="Y" if self.per_channel else "N",
narrow_range="Y" if self.narrow_range else "N",
)

def __hash__(self) -> int:
Expand All @@ -93,26 +97,9 @@ def is_valid_requantization_for(self, other: "QuantizerConfig") -> bool:
self.mode is QuantizationScheme.ASYMMETRIC and other.mode is QuantizationScheme.SYMMETRIC,
self.signedness_to_force is None and other.signedness_to_force is not None,
self.signedness_to_force is True and other.signedness_to_force is False,
self.narrow_range != other.narrow_range,
]
if any(fail_conditions):
return False
return True

def compatible_with_a_unified_scale_linked_qconfig(self, linked_qconfig: "QuantizerConfig") -> bool:
"""
For two configs to be compatible in a unified scale scenario, all of their fundamental parameters
must be aligned.
:param linked_qconfig: A QuantizerConfig that is compared against the current config.
:return: A boolean value specifying whether `linked_qconfig` is compatible with the current config in terms
of scale unification.
"""
return (
self.num_bits == linked_qconfig.num_bits
and self.mode == linked_qconfig.mode
and self.signedness_to_force == linked_qconfig.signedness_to_force
and self.per_channel == linked_qconfig.per_channel
)
return not any(fail_conditions)

def is_a_bitwidth_variant(self, other_qconfig: "QuantizerConfig") -> bool:
"""
Expand All @@ -138,6 +125,7 @@ def get_state(self) -> Dict[str, Any]:
"mode": self.mode,
"signedness_to_force": self.signedness_to_force,
"per_channel": self.per_channel,
"narrow_range": self.narrow_range,
}

@classmethod
Expand Down
1 change: 1 addition & 0 deletions nncf/config/schemata/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
QUANTIZATION_OVERFLOW_FIX = "enable"
QUANTIZATION_BITS = 8
QUANTIZATION_PER_CHANNEL = False
QUANTIZATION_NARROW_RANGE = False
QUANTIZATION_LOGARITHM_SCALE = False

ACTIVATIONS_QUANT_START_EPOCH = 1
Expand Down
Loading

0 comments on commit a9a0c19

Please sign in to comment.