[Experimental][TorchFX] quantize_pt2e + X86Quantizer introduction (op…

…envinotoolkit#3121) ### Changes Introduction of `quantize_pt2e` method ### Reason for changes ### Related tickets openvinotoolkit#2766 ### Tests graph tests: `tests/torch/fx/test_quantizer.py`
daniil-lyakhov · Jan 21, 2025 · d1b5229 · d1b5229
1 parent 0b80812
commit d1b5229
Show file tree

Hide file tree

Showing 20 changed files with 10,263 additions and 65 deletions.
diff --git a/nncf/experimental/quantization/algorithms/post_training/__init__.py b/nncf/experimental/quantization/algorithms/post_training/__init__.py
@@ -0,0 +1,10 @@
+# Copyright (c) 2025 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/nncf/experimental/quantization/algorithms/post_training/algorithm.py b/nncf/experimental/quantization/algorithms/post_training/algorithm.py
@@ -0,0 +1,106 @@
+# Copyright (c) 2025 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import itertools
+from typing import Callable, List, Optional, TypeVar
+
+from nncf import Dataset
+from nncf.common.graph.graph import NNCFGraph
+from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer
+from nncf.common.utils.backend import BackendType
+from nncf.experimental.quantization.algorithms.post_training.pipeline import experimental_create_ptq_pipeline
+from nncf.experimental.quantization.quantizers.quantizer import Quantizer
+from nncf.quantization.advanced_parameters import AdvancedBiasCorrectionParameters
+from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters
+from nncf.quantization.advanced_parameters import RangeEstimatorParameters
+from nncf.quantization.algorithms.algorithm import Algorithm
+
+TModel = TypeVar("TModel")
+TPass = Callable[[TModel], TModel]
+
+
+class ExperimentalPostTrainingQuantization(Algorithm):
+    """
+    Implements Experimental Post-Training Quantization algorithm, which basically includes:
+    1) ChannelAlignment
+    2) MinMaxRangeInit
+    3) FastBiasCorrection or BiasCorrection
+    """
+
+    def __init__(
+        self,
+        quantizer: Quantizer,
+        subset_size: int = 300,
+        fast_bias_correction: Optional[bool] = True,
+        smooth_quant: bool = False,
+        bias_correction_params: Optional[AdvancedBiasCorrectionParameters] = None,
+        smooth_quant_params: Optional[AdvancedSmoothQuantParameters] = None,
+        activations_range_estimator_params: Optional[RangeEstimatorParameters] = None,
+        weights_range_estimator_params: Optional[RangeEstimatorParameters] = None,
+        batchwise_statistics: bool = False,
+    ):
+        """
+        :param quantizer: Quantizer to use in MiMaxRangeInit algorithm.
+        :param subset_size: Size of a subset to calculate activations
+            statistics used for quantization.
+        :param fast_bias_correction: Setting this option to `False` enables a different
+            bias correction method which is more accurate, in general, and takes
+            more time but requires less memory. None disables the bias correction algorithm.
+        :param smooth_quant: Setting this option to `True` enables the SmoothQuant algorithm.
+        :param bias_correction_params: Contains advanced parameters for fine-tuning bias correction algorithm.
+        :param smooth_quant_params: Contains advanced alpha parameters for SmoothQuant algorithm.
+        :param activations_range_estimator_params: Contains parameters for estimating the range
+            of activations of the model.
+        :param weights_range_estimator_params: Contains parameters for estimating the range
+            of weights of the model.
+        :param batchwise_statistics: Determines whether quantizer statistics should be calculated
+            for each item of the batch or for the entire batch, default is False.
+        """
+        self._pipeline = experimental_create_ptq_pipeline(
+            quantizer=quantizer,
+            subset_size=subset_size,
+            fast_bias_correction=fast_bias_correction,
+            smooth_quant=smooth_quant,
+            bias_correction_params=bias_correction_params,
+            smooth_quant_params=smooth_quant_params,
+            activations_range_estimator_params=activations_range_estimator_params,
+            weights_range_estimator_params=weights_range_estimator_params,
+            batchwise_statistics=batchwise_statistics,
+        )
+
+    @property
+    def available_backends(self) -> List[BackendType]:
+        backends = set(BackendType)
+        for algorithm in itertools.chain.from_iterable(self._pipeline.pipeline_steps):
+            backends = backends.intersection(algorithm.available_backends)
+        return list(backends)
+
+    def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer:
+        return self._pipeline.get_statistic_points_for_step(0, model, graph)
+
+    def apply(
+        self,
+        model: TModel,
+        graph: NNCFGraph,
+        statistic_points: Optional[StatisticPointsContainer] = None,
+        dataset: Optional[Dataset] = None,
+    ) -> TModel:
+        if dataset is None and len(self._pipeline.pipeline_steps) > 1:
+            raise ValueError(
+                "A dataset is required for the post-training quantization "
+                "algorithm to collect statistics for intermediate models."
+            )
+
+        step_index_to_statistics = None
+        if statistic_points:
+            step_index_to_statistics = {0: statistic_points}
+
+        return self._pipeline.run_from_step(model, dataset, graph, 0, step_index_to_statistics)
diff --git a/nncf/experimental/quantization/algorithms/post_training/pipeline.py b/nncf/experimental/quantization/algorithms/post_training/pipeline.py
@@ -0,0 +1,117 @@
+# Copyright (c) 2025 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional, TypeVar
+
+from nncf.experimental.quantization.algorithms.range_estimator.algorithm import MinMaxRangeEstimator
+from nncf.experimental.quantization.quantizers.quantizer import Quantizer
+from nncf.quantization.advanced_parameters import AdvancedBiasCorrectionParameters
+from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters
+from nncf.quantization.advanced_parameters import RangeEstimatorParameters
+from nncf.quantization.algorithms.bias_correction.algorithm import BIAS_CORRECTION_THRESHOLD
+from nncf.quantization.algorithms.bias_correction.algorithm import BiasCorrection
+from nncf.quantization.algorithms.fast_bias_correction.algorithm import FAST_BIAS_CORRECTION_THRESHOLD
+from nncf.quantization.algorithms.fast_bias_correction.algorithm import FastBiasCorrection
+from nncf.quantization.algorithms.pipeline import Pipeline
+from nncf.quantization.algorithms.smooth_quant.algorithm import SmoothQuant
+
+TModel = TypeVar("TModel")
+
+
+def experimental_create_ptq_pipeline(
+    quantizer: Quantizer,
+    subset_size: int = 300,
+    fast_bias_correction: Optional[bool] = True,
+    smooth_quant: bool = False,
+    bias_correction_params: Optional[AdvancedBiasCorrectionParameters] = None,
+    smooth_quant_params: Optional[AdvancedSmoothQuantParameters] = None,
+    activations_range_estimator_params: Optional[RangeEstimatorParameters] = None,
+    weights_range_estimator_params: Optional[RangeEstimatorParameters] = None,
+    batchwise_statistics: bool = False,
+) -> Pipeline:
+    """
+    Creates an experimental post-training quantization pipeline.
+
+    The experimental post-training quantization pipeline includes the following steps:
+        1) SmoothQuant
+        2) MinMaxRangeInit
+        3) FastBiasCorrection or BiasCorrection
+
+    :param quantizer: Quantizer to use in MiMaxRangeInit algorithm.
+    :param subset_size: Size of a subset to calculate activations
+        statistics used for quantization.
+    :param fast_bias_correction: Setting this option to `False` enables a different
+        bias correction method which is more accurate, in general, and takes
+        more time but requires less memory. None disables the bias correction algorithm.
+    :param smooth_quant: Setting this option to `True` enables the SmoothQuant algorithm.
+    :param bias_correction_params: Contains advanced parameters for fine-tuning bias correction algorithm.
+    :param smooth_quant_params: Contains advanced alpha parameters for SmoothQuant algorithm.
+    :param activations_range_estimator_params: Contains parameters for estimating the range
+        of activations of the model.
+    :param weights_range_estimator_params: Contains parameters for estimating the range
+        of weights of the model.
+    :param batchwise_statistics: Determines whether quantizer statistics should be calculated
+        for each item of the batch or for the entire batch, default is False.
+    :return: An experimental post-training quantization pipeline.
+    """
+
+    # Build the post-training quantization pipeline.
+    pipeline_steps = []
+
+    if smooth_quant_params is None:
+        smooth_quant_params = AdvancedSmoothQuantParameters()
+
+    if smooth_quant and (smooth_quant_params.convolution >= 0 or smooth_quant_params.matmul >= 0):
+        alpha_map = {"convolution": smooth_quant_params.convolution, "matmul": smooth_quant_params.matmul}
+        pipeline_steps.append([SmoothQuant(subset_size, False, alpha_map=alpha_map)])
+
+    # Add the `MinMaxQuantization` algorithm as the third step of the pipeline.
+    pipeline_steps.append(
+        [
+            MinMaxRangeEstimator(
+                quantizer=quantizer,
+                subset_size=subset_size,
+                inplace_statistics=False,
+                batchwise_statistics=batchwise_statistics,
+                activations_range_estimator_params=activations_range_estimator_params,
+                weights_range_estimator_params=weights_range_estimator_params,
+            )
+        ]
+    )
+
+    if fast_bias_correction is not None:
+        # Add the `FastBiasCorrection` or `BiasCorrection` as additional algorithm
+        # inside the third step of the pipeline. It is added after `MinMaxQuantization`
+        # algorithm.
+        if fast_bias_correction:
+            threshold = FAST_BIAS_CORRECTION_THRESHOLD
+            bias_correction_subset_size = subset_size
+            bias_correction_cls = FastBiasCorrection
+        else:
+            threshold = BIAS_CORRECTION_THRESHOLD
+            bias_correction_subset_size = max(int(subset_size * 0.2), 1)
+            bias_correction_cls = BiasCorrection
+
+        if bias_correction_params is None:
+            bias_correction_params = AdvancedBiasCorrectionParameters()
+
+        if bias_correction_params.threshold is not None:
+            threshold = bias_correction_params.threshold
+
+        pipeline_steps[-1].append(
+            bias_correction_cls(
+                bias_correction_subset_size,
+                threshold,
+                bias_correction_params.apply_for_all_nodes,
+            )
+        )
+
+    return Pipeline(pipeline_steps)
diff --git a/nncf/experimental/quantization/algorithms/range_estimator/algorithm.py b/nncf/experimental/quantization/algorithms/range_estimator/algorithm.py
@@ -0,0 +1,83 @@
+# Copyright (c) 2025 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List, Optional, TypeVar
+
+from nncf import Dataset
+from nncf.common.graph.graph import NNCFGraph
+from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer
+from nncf.common.utils.backend import BackendType
+from nncf.experimental.quantization.quantizers.quantizer import Quantizer
+from nncf.quantization.algorithms.algorithm import Algorithm
+from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization
+from nncf.quantization.range_estimator import RangeEstimatorParameters
+
+TModel = TypeVar("TModel")
+
+
+class MinMaxRangeEstimator(Algorithm):
+    def __init__(
+        self,
+        quantizer: Quantizer,
+        subset_size: int = 300,
+        inplace_statistics: bool = True,
+        batchwise_statistics: bool = False,
+        activations_range_estimator_params: Optional[RangeEstimatorParameters] = None,
+        weights_range_estimator_params: Optional[RangeEstimatorParameters] = None,
+    ):
+        """
+        :param quantizer: Instance of Quantizer to retrieve a quantization config
+            for the given model.
+        :param subset_size: Size of a subset to calculate activations statistics used
+            for quantization, defaults to 300.
+        :param inplace_statistics: Defines wheather to calculate quantizers statistics
+            by backend graph operations or by default Python implementation, defaults
+            to True.
+        :param batchwise_statistics: Determines whether quantizer statistics should be calculated
+            for each item of the batch or for the entire batch, default is False.
+        :param activations_range_estimator_params: Quantization range estimation
+            parameters for activation.
+        :param weights_range_estimator_params: Quantization range estimation parameters
+            for weights.
+        """
+        self._quantizer = quantizer
+        self._min_max_algo = MinMaxQuantization(
+            subset_size=subset_size,
+            inplace_statistics=inplace_statistics,
+            batchwise_statistics=batchwise_statistics,
+            activations_range_estimator_params=activations_range_estimator_params,
+            weights_range_estimator_params=weights_range_estimator_params,
+        )
+
+    @property
+    def available_backends(self) -> List[BackendType]:
+        return [BackendType.TORCH_FX]
+
+    def apply(
+        self,
+        model: TModel,
+        graph: NNCFGraph,
+        statistic_points: Optional[StatisticPointsContainer] = None,
+        dataset: Optional[Dataset] = None,
+    ) -> TModel:
+        if self._min_max_algo._quantization_target_points_to_qconfig is None:
+            raise RuntimeError(
+                "Statistic points are not available."
+                " Please call `get_statistic_points` before calling the `apply` method."
+            )
+        return self._min_max_algo.apply(model=model, graph=graph, statistic_points=statistic_points)
+
+    def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer:
+        quantizer_setup = self._quantizer.get_quantization_setup(model, graph)
+        self._min_max_algo._set_backend_entity(model)
+        self._min_max_algo._init_cache()
+        self._min_max_algo.fill_quantization_target_points(quantizer_setup, graph)
+        return self._min_max_algo.get_cached_statistic_points(model, graph)
diff --git a/nncf/experimental/quantization/quantizers/__init__.py b/nncf/experimental/quantization/quantizers/__init__.py
@@ -0,0 +1,10 @@
+# Copyright (c) 2025 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/nncf/experimental/quantization/quantizers/quantizer.py b/nncf/experimental/quantization/quantizers/quantizer.py
@@ -0,0 +1,45 @@
+# Copyright (c) 2025 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from abc import ABC
+from abc import abstractmethod
+from typing import TypeVar
+
+from nncf.common.graph.graph import NNCFGraph
+from nncf.common.quantization.quantizer_setup import SingleConfigQuantizerSetup
+
+TModel = TypeVar("TModel")
+
+
+class Quantizer(ABC):
+    """
+    Quantizer is an interface for the RangeEstimator algorithm
+    which specifies all the required methods to retrieve quantization setup from the given model.
+    """
+
+    @abstractmethod
+    def transform_prior_quantization(self, model: TModel) -> TModel:
+        """
+        Transforms the given model in-place with the necessary modifications required prior to quantization.
+
+        :param model: Backend-specific model to be transformed.
+        :return: Transformed backend-specific model.
+        """
+
+    @abstractmethod
+    def get_quantization_setup(self, model: TModel, nncf_graph: NNCFGraph) -> SingleConfigQuantizerSetup:
+        """
+        Builds SingleConfigQuantizerSetup for the given model.
+
+        :param model: Backend-specific model, for which Quantization Target Points are being seek.
+        :param nncf_graph: NNCFGraph instance.
+        :return: SingleConfigQuantizerSetup for the given model.
+        """