From b1df29f0bc860c5aaff0c7b5dd97adc2bd5c659c Mon Sep 17 00:00:00 2001 From: Liubov Talamanova Date: Thu, 16 May 2024 12:08:31 +0100 Subject: [PATCH] minor fixes --- .../weights_compression/Usage.md | 2 +- nncf/parameters.py | 4 ++-- .../algorithms/weight_compression/algorithm.py | 4 ++-- nncf/quantization/quantize_model.py | 6 +++--- tests/torch/ptq/test_weights_compression.py | 11 ++++++++--- 5 files changed, 16 insertions(+), 11 deletions(-) diff --git a/docs/usage/post_training_compression/weights_compression/Usage.md b/docs/usage/post_training_compression/weights_compression/Usage.md index a27bb2bb50a..8d2f56143f5 100644 --- a/docs/usage/post_training_compression/weights_compression/Usage.md +++ b/docs/usage/post_training_compression/weights_compression/Usage.md @@ -9,7 +9,7 @@ The Weights Compression algorithm is aimed at compressing the weights of the mod #### Supported modes By default, weights are compressed asymmetrically to 8-bit integer data type - "INT8_ASYM" mode. -OpenVINO backend also supports 3 modes of mixed precision weight quantization with a 4-bit data type as a primary precision - INT4_SYM, INT4_ASYM and NF4. The primary precision in case of INT4_SYM mode is signed 4-bit integer and weights are quantized to it [symmetrically](/docs/usage/training_time_compression/other_algorithms/LegacyQuantization.md#symmetric-quantization) without zero point. In case of INT4_ASYM mode - unsigned 4-bit integer, but weight are quantized to it [asymmetrically](/docs/usage/training_time_compression/other_algorithms/LegacyQuantization.md#asymmetric-quantization) with a typical non-fixed zero point. In case of NF4 mode - [nf4](https://arxiv.org/pdf/2305.14314v1.pdf) data type without zero point. +OpenVINO backend also supports 3 modes of mixed precision weight quantization with a 4-bit data type as a primary precision - INT4_SYM, INT4_ASYM and NF4. The primary precision in case of INT4_SYM mode is signed 4-bit integer and weights are quantized to it [symmetrically](/docs/usage/training_time_compression/other_algorithms/LegacyQuantization.md#symmetric-quantization) without zero point. In case of INT4_ASYM mode - unsigned 4-bit integer and weight are quantized to it [asymmetrically](/docs/usage/training_time_compression/other_algorithms/LegacyQuantization.md#asymmetric-quantization) with a typical non-fixed zero point. In case of NF4 mode - [nf4](https://arxiv.org/pdf/2305.14314v1.pdf) data type without zero point. All 4-bit modes have a grouped quantization support, when small group of weights (e.g. 128) in the channel dimension share quantization parameters (scale). All embeddings, convolutions and last linear layers are always compressed to 8-bit integer data type. To quantize embeddings and last linear layers to 4-bit, use `all_layers=True`. Percent of the rest layers compressed to 4-bit can be configured by "ratio" parameter. E.g. ratio=0.9 means 90% of layers compressed to the corresponding 4-bit data type and the rest to 8-bit asymmetric integer data type. diff --git a/nncf/parameters.py b/nncf/parameters.py index e8ef12b4979..7c4db15ac54 100644 --- a/nncf/parameters.py +++ b/nncf/parameters.py @@ -68,13 +68,13 @@ class CompressWeightsMode(StrEnum): """ Defines a mode for weight compression. :param INT8_SYM: Stands for 8-bit integer symmetric quantization of all weights. - Weights are quantized symmetrically with a fixed zero point equals to 128. + Weights are quantized symmetrically without zero point. https://github.com/openvinotoolkit/nncf/blob/develop/docs/compression_algorithms/Quantization.md#symmetric-quantization :param INT8_ASYM: The same as INT8_SYM mode, but weights are quantized to a primary precision asymmetrically with a typical non-fixed zero point. https://github.com/openvinotoolkit/nncf/blob/develop/docs/compression_algorithms/Quantization.md#asymmetric-quantization :param INT4_SYM: Stands for a mixed-precision weights quantization with 4-bit integer as a primary precision. - Weights are quantized to a primary precision symmetrically with a fixed zero point equals to 8. + Weights are quantized to a primary precision symmetrically without zero point. All embeddings and the last layer are always compressed to a backup precision, which is INT8_ASYM, by default. All others are quantized whether to 4-bit integer or to a backup precision depending on criteria and the given ratio. diff --git a/nncf/quantization/algorithms/weight_compression/algorithm.py b/nncf/quantization/algorithms/weight_compression/algorithm.py index 1852ff546ed..3b9eade5ea7 100644 --- a/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -70,11 +70,11 @@ def __init__( """ :param mode: Defines a mode for weight compression. INT8_SYM stands for 8-bit integer symmetric quantization of all weights. - Weights are quantized symmetrically with a fixed zero point equals to 128. + Weights are quantized symmetrically without zero point. INT8_ASYM is the same as INT8_SYM mode, but weights are quantized to a primary precision asymmetrically with a typical non-fixed zero point. INT4_SYM stands for a mixed-precision weights quantization with 4-bit integer as a primary precision. - Weights are quantized to a primary precision symmetrically with a fixed zero point equals to 8. + Weights are quantized to a primary precision symmetrically without zero point. All embeddings and the last layer are always compressed to a backup precision, which is INT8_ASYM, by default. All others are quantized whether to 4-bit integer or to a backup precision depending on criteria and the given ratio. diff --git a/nncf/quantization/quantize_model.py b/nncf/quantization/quantize_model.py index fc6f5c07cde..46426d77c21 100644 --- a/nncf/quantization/quantize_model.py +++ b/nncf/quantization/quantize_model.py @@ -348,11 +348,11 @@ def compress_weights( :param model: A model to be compressed. :type model: TModel :param mode: Defines a mode for weight compression. - INT8_SYM stands for 8-bit integer symmetric quantization of all weights. + INT8_SYM stands for 8-bit integer symmetric quantization of all weights without zero point. INT8_ASYM is the same as INT8_SYM mode, but weights are quantized to a primary precision asymmetrically with a typical non-fixed zero point. INT4_SYM stands for a mixed-precision weights quantization with 4-bit integer as a primary precision. - Weights are quantized to a primary precision symmetrically with a fixed zero point equals to 8. + Weights are quantized to a primary precision symmetrically without zero point. All embeddings and the last layer are always compressed to a backup precision, which is INT8_ASYM, by default. All others are quantized whether to 4-bit integer or to a backup precision depending on criteria and the given ratio. @@ -393,7 +393,7 @@ def compress_weights( """ if mode == CompressWeightsMode.INT8: warning_deprecated( - "`CompressWeightsMode.INT8` is deprecated." "Please, use `CompressWeightsMode.INT8_ASYM` as value instead." + "`CompressWeightsMode.INT8` is deprecated. Please, use `CompressWeightsMode.INT8_ASYM` as value instead." ) mode = CompressWeightsMode.INT8_ASYM diff --git a/tests/torch/ptq/test_weights_compression.py b/tests/torch/ptq/test_weights_compression.py index a4d165c9077..30c704e5435 100644 --- a/tests/torch/ptq/test_weights_compression.py +++ b/tests/torch/ptq/test_weights_compression.py @@ -17,6 +17,7 @@ from nncf import SensitivityMetric from nncf.quantization import compress_weights from nncf.torch import wrap_model +from nncf.torch.quantization.layers import AsymmetricWeightsDecompressor from nncf.torch.quantization.layers import SymmetricWeightsDecompressor DATA_BASED_SENSITIVITY_METRICS = ( @@ -127,16 +128,20 @@ def test_compress_weights(mode): assert n_compressed_weights == n_target_modules -def test_compress_weights_functional_model(): +@pytest.mark.parametrize("mode", (CompressWeightsMode.INT8_SYM, CompressWeightsMode.INT8_ASYM)) +def test_compress_weights_functional_model(mode): model = FunctionalModel() + decompressor_type = ( + SymmetricWeightsDecompressor if mode == CompressWeightsMode.INT8_SYM else AsymmetricWeightsDecompressor + ) input_ids = torch.randint(0, 10, [1, 3, 300, 300]) wrapped_model = wrap_model(model, example_input=input_ids, trace_parameters=True) - compressed_model = compress_weights(wrapped_model, mode=CompressWeightsMode.INT8_SYM) + compressed_model = compress_weights(wrapped_model, mode=mode) n_compressed_weights = 0 for layer in compressed_model.nncf.external_op.values(): - if isinstance(layer, SymmetricWeightsDecompressor): + if isinstance(layer, decompressor_type): n_compressed_weights += 1 assert n_compressed_weights == 4