Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[HWConfig] narrow_range parameter is introduced in hardware config #3196

Open
wants to merge 6 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion nncf/common/hardware/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,13 @@ def get_qconf_from_hw_config_subdict(quantization_subdict: Dict[str, Any]) -> Qu
), "Invalid value of quantizer parameter `level_high`.\
The parameter must be consistent with other parameters!"

narrow_range = quantization_subdict["narrow_range"]
return QuantizerConfig(
num_bits=bits, mode=mode, per_channel=is_per_channel, signedness_to_force=signedness_to_force
num_bits=bits,
mode=mode,
per_channel=is_per_channel,
signedness_to_force=signedness_to_force,
narrow_range=narrow_range,
)

@staticmethod
Expand Down
25 changes: 19 additions & 6 deletions nncf/common/hardware/configs/cpu.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,17 @@
"mode": [
"symmetric"
],
"granularity": "pertensor"
"granularity": "pertensor",
"narrow_range": false
},
"q8_a": {
"bits": 8,
"mode": [
"symmetric",
"asymmetric"
],
"granularity": "pertensor"
"granularity": "pertensor",
"narrow_range": false
},
"q8_a_ch": {
"bits": 8,
Expand All @@ -26,19 +28,30 @@
"granularity": [
"perchannel",
"pertensor"
]
],
"narrow_range": false
},
"q8_w_sym": {
"bits": 8,
"mode": "symmetric",
"level_low": -128,
"level_high": 127,
"granularity": ["perchannel", "pertensor"]
"granularity": ["perchannel", "pertensor"],
"narrow_range": true
},
"q8_w_sym_any_nr": {
"bits": 8,
"mode": "symmetric",
"level_low": -128,
"level_high": 127,
"granularity": ["perchannel", "pertensor"],
"narrow_range": [true, false]
},
"q8_w_asym": {
"bits": 8,
"mode": "asymmetric",
"granularity": ["perchannel", "pertensor"]
"granularity": ["perchannel", "pertensor"],
"narrow_range": false
}
}
},
Expand Down Expand Up @@ -288,7 +301,7 @@
{
"type": "Embedding",
"quantization": {
"weights": ["q8_w_sym", "q8_w_asym"]
"weights": ["q8_w_sym_any_nr", "q8_w_asym"]
}
},
{"type": "EmbeddingBag"}
Expand Down
12 changes: 8 additions & 4 deletions nncf/common/hardware/configs/gpu.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
"symmetric",
"asymmetric"
],
"granularity": "pertensor"
"granularity": "pertensor",
"narrow_range": false
},
"q8_a_ch": {
"bits": 8,
Expand All @@ -19,7 +20,8 @@
"granularity": [
"perchannel",
"pertensor"
]
],
"narrow_range": false
},
"q8_w_sym": {
"bits": 8,
Expand All @@ -29,15 +31,17 @@
"granularity": [
"perchannel",
"pertensor"
]
],
"narrow_range": true
},
"q8_w_asym": {
"bits": 8,
"mode": "asymmetric",
"granularity": [
"perchannel",
"pertensor"
]
],
"narrow_range": false
}
}
},
Expand Down
42 changes: 30 additions & 12 deletions nncf/common/hardware/configs/npu.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,17 @@
"mode": [
"symmetric"
],
"granularity": "pertensor"
"granularity": "pertensor",
"narrow_range": false
},
"q8_a": {
"bits": 8,
"mode": [
"symmetric",
"asymmetric"
],
"granularity": "pertensor"
"granularity": "pertensor",
"narrow_range": false
},
"q8_a_ch": {
"bits": 8,
Expand All @@ -26,52 +28,68 @@
"granularity": [
"perchannel",
"pertensor"
]
],
"narrow_range": false
},
"q8_w_sym": {
"bits": 8,
"mode": "symmetric",
"level_low": -128,
"level_high": 127,
"granularity": ["perchannel", "pertensor"]
"granularity": ["perchannel", "pertensor"],
"narrow_range": true
},
"q8_w_sym_any_nr": {
"bits": 8,
"mode": "symmetric",
"level_low": -128,
"level_high": 127,
"granularity": ["perchannel", "pertensor"],
"narrow_range": [true, false]
},
"q8_w_asym": {
"bits": 8,
"mode": "asymmetric",
"granularity": ["perchannel", "pertensor"]
"granularity": ["perchannel", "pertensor"],
"narrow_range": false
},
// 4-bit configs
"q4_tn": {
"bits": 4,
"mode": "symmetric",
"granularity": "pertensor"
"granularity": "pertensor",
"narrow_range": false
},
"q4_ch": {
"bits": 4,
"mode": "symmetric",
"granularity": "perchannel"
"granularity": "perchannel",
"narrow_range": false
},
"q4_w": {
"bits": 4,
"mode": "symmetric",
"granularity": [
"perchannel",
"pertensor"
]
],
"narrow_range": false
},
// 2-bit configs
"q2_ch": {
"bits": 2,
"mode": "symmetric",
"granularity": "perchannel"
"granularity": "perchannel",
"narrow_range": false
},
"q2_w": {
"bits": 2,
"mode": "symmetric",
"granularity": [
"perchannel",
"pertensor"
]
],
"narrow_range": false
}
}
},
Expand Down Expand Up @@ -382,15 +400,15 @@
"type": "Embedding",
"quantization": {
"weights": [
"q8_w_sym", "q8_w_asym"
"q8_w_sym_any_nr", "q8_w_asym"
]
}
},
{
"type": "EmbeddingBag",
"quantization": {
"weights": [
"q8_w_sym", "q8_w_asym"
"q8_w_sym_any_nr", "q8_w_asym"
]
}
},
Expand Down
9 changes: 7 additions & 2 deletions nncf/common/hardware/configs/template.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,19 @@
"granularity": [
"pertensor",
"perchannel"
]
],
/*
* Narrow range: should NNCF use 2**num_bits quants or 2**num_bits - 1
*/
"narrow_range": false
},
"q8_sym_tnr_-128_127": { // Alias name for set of hyperparameters
"bits": 8, // Number of quantization bits
"mode": "symmetric", // Quantization mode
"granularity": "pertensor", // Granularity: one scale for output tensor
"level_low": -128, // Low quantization level
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does NNCF support for level_low and level_high?

Copy link
Contributor

@ljaljushkin ljaljushkin Jan 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't it redundant, since other params define how to calculate them?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"level_high": 127 // High quantization level
"level_high": 127, // High quantization level
"narrow_range": false
}
}
},
Expand Down
2 changes: 2 additions & 0 deletions nncf/common/quantization/config_assignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ def get_scoped_quantizer_config(
qconfig.per_channel = config_overrides["per_channel"]
if config_overrides.get("signed") is not None:
qconfig.signedness_to_force = config_overrides["signed"]
if config_overrides.get("narrow_range") is not None:
qconfig.narrow_range = config_overrides["narrow_range"]
return qconfig


Expand Down
4 changes: 4 additions & 0 deletions nncf/common/quantization/quantizer_propagation/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -1165,6 +1165,10 @@ def is_downstream_quantizer_redundant(
(ds_config.per_channel == us_config.per_channel)
or (ds_config.per_channel is True and us_config.per_channel is False)
)

# Strictly prohibit merging of config with different narrow_range params
is_redundant = is_redundant and (ds_config.narrow_range == us_config.narrow_range)

return is_redundant

def merge_traverse_fn(
Expand Down
15 changes: 12 additions & 3 deletions nncf/common/quantization/quantizer_propagation/solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
from nncf.common.utils.debug import DEBUG_LOG_DIR
from nncf.common.utils.debug import is_debug
from nncf.common.utils.dot_file_rw import write_dot_graph
from nncf.config.schemata.defaults import QUANTIZATION_NARROW_RANGE


class TransitionStatus(Enum):
Expand Down Expand Up @@ -161,6 +162,7 @@ def is_final_qconfig_compatible_to_initial(initial_qconfig: QuantizerConfig) ->
final_qconfig.per_channel == initial_qconfig.per_channel
and final_qconfig.mode == initial_qconfig.mode
and final_qconfig.num_bits == initial_qconfig.num_bits
and final_qconfig.narrow_range == initial_qconfig.narrow_range
and (
final_qconfig.signedness_to_force == initial_qconfig.signedness_to_force
or initial_qconfig.signedness_to_force is None
Expand Down Expand Up @@ -301,7 +303,13 @@ class QuantizerPropagationSolver:
"""

DEFAULT_QUANTIZATION_TYPES = [
QuantizerConfig(num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=None, per_channel=False)
QuantizerConfig(
num_bits=8,
mode=QuantizationMode.SYMMETRIC,
signedness_to_force=None,
per_channel=False,
narrow_range=QUANTIZATION_NARROW_RANGE,
)
]

DEFAULT_PROPAGATION_STRATEGY = QuantizerPropagationRule.MERGE_ALL_IN_ONE
Expand Down Expand Up @@ -1373,7 +1381,7 @@ def get_merged_qconfigs_for_downward_branching_case(
Returns a tuple, of which the first node is the qconfig list for the quantizer to be placed
above the branching node (i.e. that will affect all of the downward branches), and a list
of nodes which are either None (which means that the corresponding branch quantizer has been successfully
merged, or qconfigs list to be set for the corresponding branch quantizer if it cannot be merged (e.g. if
merged), or qconfigs list to be set for the corresponding branch quantizer if it cannot be merged (e.g. if
requantization to a lower bitwidth has to be done for this branch)

:param potential_qconfigs_for_each_branch: For each branch defines the list of available configurations
Expand Down Expand Up @@ -1494,7 +1502,8 @@ def __disambiguate_config_list(
"""
The input list should be sorted in descending order of priority. In case some qconfigs in the list have the
same priority, this function will resolve the ambiguity in ordering these qconfigs in the final returned
list.
list. Quantization configs could not contain different narrow range parameters, so it does
not participate in __lt__ method of the QConfigComparator.
"""

class QConfigComparator:
Expand Down
7 changes: 6 additions & 1 deletion nncf/common/quantization/quantizer_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,16 @@
from nncf.common.quantization.structs import UnifiedScaleType
from nncf.common.quantization.structs import WeightQuantizerId
from nncf.common.stateful_classes_registry import CommonStatefulClassesRegistry
from nncf.config.schemata.defaults import QUANTIZATION_NARROW_RANGE

QuantizationPointId = int

DEFAULT_QUANTIZER_CONFIG = QuantizerConfig(
num_bits=8, mode=QuantizationMode.SYMMETRIC, signedness_to_force=None, per_channel=False
num_bits=8,
KodiaqQ marked this conversation as resolved.
Show resolved Hide resolved
mode=QuantizationMode.SYMMETRIC,
signedness_to_force=None,
per_channel=False,
narrow_range=QUANTIZATION_NARROW_RANGE,
)


Expand Down
Loading