From 2d1403ef4519cfc42ab04b151a9699180aef4c4e Mon Sep 17 00:00:00 2001 From: Alexander Suslov Date: Thu, 27 Jul 2023 20:59:20 +0400 Subject: [PATCH] reduced the number of graphs rebuilt --- nncf/common/tensor_statistics/aggregator.py | 11 +++-- .../torch/quantization/quantize_model.py | 6 +-- nncf/onnx/graph/model_utils.py | 6 +-- nncf/onnx/quantization/quantize_model.py | 4 +- nncf/onnx/statistics/aggregator.py | 11 +++-- nncf/openvino/graph/model_utils.py | 18 ++++---- nncf/openvino/quantization/quantize_model.py | 4 +- nncf/openvino/statistics/aggregator.py | 13 +++--- nncf/quantization/algorithms/algorithm.py | 42 ++++++++----------- .../algorithms/bias_correction/algorithm.py | 15 ++++--- .../algorithms/bias_correction/backend.py | 6 ++- .../bias_correction/onnx_backend.py | 6 +-- .../bias_correction/openvino_backend.py | 8 ++-- .../algorithms/channel_alignment/algorithm.py | 21 ++++------ .../fast_bias_correction/algorithm.py | 32 ++++++-------- .../algorithms/min_max/algorithm.py | 23 +++++----- .../algorithms/post_training/algorithm.py | 29 +++++++++---- .../algorithms/smooth_quant/algorithm.py | 18 ++++---- nncf/quantization/passes.py | 10 ++--- nncf/torch/statistics/aggregator.py | 7 ++-- tests/common/test_statistics_aggregator.py | 16 ++++--- tests/onnx/quantization/common.py | 6 ++- tests/onnx/quantization/test_ptq_params.py | 15 ++++++- .../test_fq_params_calculation.py | 8 ++-- .../native/quantization/test_graphs.py | 8 ++-- .../native/quantization/test_ptq_params.py | 23 +++++++--- .../test_templates/test_bias_correction.py | 4 +- .../test_templates/test_channel_alignment.py | 4 +- .../test_fast_bias_correction.py | 4 +- .../test_templates/test_ptq_params.py | 2 +- .../test_templates/test_smooth_quant.py | 4 +- .../ptq/test_calculation_quantizer_params.py | 6 +-- tests/torch/ptq/test_fq_params_calculation.py | 2 +- tests/torch/ptq/test_graphs.py | 2 +- tests/torch/ptq/test_ptq_params.py | 12 +++++- 35 files changed, 225 insertions(+), 181 deletions(-) diff --git a/nncf/common/tensor_statistics/aggregator.py b/nncf/common/tensor_statistics/aggregator.py index 38028f6b240..3253b1d63ff 100644 --- a/nncf/common/tensor_statistics/aggregator.py +++ b/nncf/common/tensor_statistics/aggregator.py @@ -17,6 +17,7 @@ from nncf.common.factory import EngineFactory from nncf.common.factory import ModelTransformerFactory +from nncf.common.graph.graph import NNCFGraph from nncf.common.graph.transformations.layout import TransformationLayout from nncf.common.tensor import NNCFTensor from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer @@ -36,19 +37,20 @@ def __init__(self, dataset: Dataset): self.stat_subset_size = None self.statistic_points = StatisticPointsContainer() - def collect_statistics(self, model: TModel) -> None: + def collect_statistics(self, model: TModel, graph: NNCFGraph) -> None: """ Collects statistics for registered StatisticPoints. The statistics are stored in self.statistic_points. - :param model: backend-specific model instance + :param model: Backend-specific model instance. + :param graph: Model graph. """ if not self.statistic_points: return model_transformer = ModelTransformerFactory.create(model) - merged_statistics = self._get_merged_statistic_points(self.statistic_points, model) + merged_statistics = self._get_merged_statistic_points(self.statistic_points, model, graph) transformation_layout = self._get_transformation_layout_extra_outputs(merged_statistics) model_with_outputs = model_transformer.transform(transformation_layout) engine = EngineFactory.create(model_with_outputs) @@ -105,7 +107,7 @@ def _get_transformation_layout_extra_outputs( @staticmethod @abstractmethod def _get_merged_statistic_points( - statistic_points: StatisticPointsContainer, model: TModel + statistic_points: StatisticPointsContainer, model: TModel, graph: NNCFGraph ) -> StatisticPointsContainer: """ Creates a new StatisticPointContainer that has no duplicated tensor collectors for one @@ -115,6 +117,7 @@ def _get_merged_statistic_points( :param statistic_points: Registered statistic points with possible tensor collectors duplicates. :param model: Backend-specific target model. + :param graph: Model graph. :return: Merged statistic points container bounded with given statistic point container. """ diff --git a/nncf/experimental/torch/quantization/quantize_model.py b/nncf/experimental/torch/quantization/quantize_model.py index c52a79d1ed0..c97ab9c9675 100644 --- a/nncf/experimental/torch/quantization/quantize_model.py +++ b/nncf/experimental/torch/quantization/quantize_model.py @@ -115,9 +115,9 @@ def quantize_impl( advanced_parameters=advanced_parameters, ) - quantized_model = quantization_algorithm.apply(nncf_network, dataset=calibration_dataset) - - # TODO (asuslov): quantized_model = quantized_model.strip() + quantized_model = quantization_algorithm.apply( + nncf_network, nncf_network.nncf.get_graph(), dataset=calibration_dataset + ) quantized_model.nncf.disable_dynamic_graph_building() diff --git a/nncf/onnx/graph/model_utils.py b/nncf/onnx/graph/model_utils.py index f6e9002edae..62facd50614 100644 --- a/nncf/onnx/graph/model_utils.py +++ b/nncf/onnx/graph/model_utils.py @@ -13,7 +13,7 @@ import onnx from nncf.common.factory import ModelTransformerFactory -from nncf.common.factory import NNCFGraphFactory +from nncf.common.graph.graph import NNCFGraph from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.layout import TransformationLayout from nncf.onnx.graph.metatypes.onnx_metatypes import ONNXDequantizeLinearMetatype @@ -22,7 +22,7 @@ from nncf.onnx.graph.transformations.commands import ONNXTargetPoint -def remove_fq_from_inputs(model: onnx.ModelProto) -> onnx.ModelProto: +def remove_fq_from_inputs(model: onnx.ModelProto, nncf_graph: NNCFGraph) -> onnx.ModelProto: """ This method removes the activation Quantizer nodes from the model. It's needed for the further bias shift calculation that relates on quantized weights. @@ -31,8 +31,6 @@ def remove_fq_from_inputs(model: onnx.ModelProto) -> onnx.ModelProto: :return: onnx.ModelProto instance without activation Quantizer nodes. """ transformation_layout = TransformationLayout() - nncf_graph = NNCFGraphFactory.create(model) - model_transformer = ModelTransformerFactory.create(model) seen_nodes = [] diff --git a/nncf/onnx/quantization/quantize_model.py b/nncf/onnx/quantization/quantize_model.py index c3bb568c62c..a88a9213f36 100644 --- a/nncf/onnx/quantization/quantize_model.py +++ b/nncf/onnx/quantization/quantize_model.py @@ -16,6 +16,7 @@ from nncf.common.logging.logger import nncf_logger from nncf.common.quantization.structs import QuantizationPreset from nncf.data import Dataset +from nncf.onnx.graph.nncf_graph_builder import GraphConverter from nncf.parameters import ModelType from nncf.parameters import TargetDevice from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters @@ -65,6 +66,7 @@ def quantize_impl( advanced_parameters=advanced_parameters, ) - quantized_model = quantization_algorithm.apply(model, dataset=calibration_dataset) + graph = GraphConverter.create_nncf_graph(model) + quantized_model = quantization_algorithm.apply(model, graph, dataset=calibration_dataset) return quantized_model diff --git a/nncf/onnx/statistics/aggregator.py b/nncf/onnx/statistics/aggregator.py index e48267ac136..e3435382b5d 100644 --- a/nncf/onnx/statistics/aggregator.py +++ b/nncf/onnx/statistics/aggregator.py @@ -14,8 +14,8 @@ import numpy as np import onnx -from nncf.common.factory import NNCFGraphFactory from nncf.common.factory import TModel +from nncf.common.graph.graph import NNCFGraph from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.layout import TransformationLayout from nncf.common.tensor_statistics.aggregator import StatisticsAggregator @@ -28,12 +28,11 @@ class ONNXStatisticsAggregator(StatisticsAggregator): - def collect_statistics(self, model: onnx.ModelProto) -> None: - self._nncf_graph = NNCFGraphFactory.create(model) - self.input_edges_mapping = get_input_edges_mapping(self._nncf_graph) + def collect_statistics(self, model: onnx.ModelProto, graph: NNCFGraph) -> None: + self.input_edges_mapping = get_input_edges_mapping(graph) self._onnx_graph = ONNXGraph(model) self._registered_weights = set() - super().collect_statistics(model) + super().collect_statistics(model, graph) def _register_statistics( self, outputs: Dict[str, ONNXNNCFTensor], statistic_points: StatisticPointsContainer @@ -71,7 +70,7 @@ def _get_transformation_layout_extra_outputs( @staticmethod def _get_merged_statistic_points( - statistic_points: StatisticPointsContainer, model: TModel + statistic_points: StatisticPointsContainer, model: TModel, graph: NNCFGraph ) -> StatisticPointsContainer: # TODO: mirgate to experimental statistic collector and use common merging algorithm return statistic_points diff --git a/nncf/openvino/graph/model_utils.py b/nncf/openvino/graph/model_utils.py index aa35d1ed34a..aac584c321b 100644 --- a/nncf/openvino/graph/model_utils.py +++ b/nncf/openvino/graph/model_utils.py @@ -13,7 +13,7 @@ import openvino.runtime as ov from nncf.common.factory import ModelTransformerFactory -from nncf.common.factory import NNCFGraphFactory +from nncf.common.graph.graph import NNCFGraph from nncf.common.graph.transformations.layout import TransformationLayout from nncf.openvino.graph.metatypes.common import FAKE_QUANTIZE_OPERATIONS from nncf.openvino.graph.metatypes.openvino_metatypes import OVConvolutionBackpropDataMetatype @@ -25,11 +25,12 @@ from nncf.openvino.graph.transformations.command_creation import OVCommandCreator -def insert_null_biases(model: ov.Model) -> ov.Model: +def insert_null_biases(model: ov.Model, graph: NNCFGraph) -> ov.Model: """ This method finds and inserts zero biases for the layers that should have it. :param model: ov.Model instance. + :param graph: Model graph. :return: Updated ov.Model instance with zero biases """ types_to_insert_bias = [ @@ -39,9 +40,8 @@ def insert_null_biases(model: ov.Model) -> ov.Model: OVConvolutionBackpropDataMetatype, OVGroupConvolutionBackpropDataMetatype, ] - nncf_graph = NNCFGraphFactory.create(model) - nodes_without_biases = nncf_graph.get_nodes_by_metatypes(types_to_insert_bias) - nodes_without_biases = [node for node in nodes_without_biases if not is_node_with_bias(node, nncf_graph)] + nodes_without_biases = graph.get_nodes_by_metatypes(types_to_insert_bias) + nodes_without_biases = [node for node in nodes_without_biases if not is_node_with_bias(node, graph)] transformation_layout = TransformationLayout() model_transformer = ModelTransformerFactory.create(model) for node_without_bias in nodes_without_biases: @@ -50,7 +50,7 @@ def insert_null_biases(model: ov.Model) -> ov.Model: return model_transformer.transform(transformation_layout) -def remove_fq_from_inputs(model: ov.Model) -> ov.Model: +def remove_fq_from_inputs(model: ov.Model, graph: NNCFGraph) -> ov.Model: """ This method removes the activation Fake Quantize nodes from the model. It's needed for the further bias shift calculation that relates on quantized weights. @@ -59,12 +59,10 @@ def remove_fq_from_inputs(model: ov.Model) -> ov.Model: :return: ov.Model instance without activation Fake Quantize nodes. """ transformation_layout = TransformationLayout() - nncf_graph = NNCFGraphFactory.create(model) - model_transformer = ModelTransformerFactory.create(model) seen_nodes = [] - nodes_queue = deque(nncf_graph.get_input_nodes()) + nodes_queue = deque(graph.get_input_nodes()) while nodes_queue: current_node = nodes_queue.popleft() current_node_name = current_node.node_name @@ -76,6 +74,6 @@ def remove_fq_from_inputs(model: ov.Model) -> ov.Model: if current_node.metatype in FAKE_QUANTIZE_OPERATIONS: command = OVCommandCreator.create_command_to_remove_quantizer(current_node) transformation_layout.register(command) - nodes_queue.extend(nncf_graph.get_next_nodes(current_node)) + nodes_queue.extend(graph.get_next_nodes(current_node)) return model_transformer.transform(transformation_layout) diff --git a/nncf/openvino/quantization/quantize_model.py b/nncf/openvino/quantization/quantize_model.py index c1003b5903f..e681e8723aa 100644 --- a/nncf/openvino/quantization/quantize_model.py +++ b/nncf/openvino/quantization/quantize_model.py @@ -19,6 +19,7 @@ from nncf.common.logging import nncf_logger from nncf.common.quantization.structs import QuantizationPreset from nncf.data import Dataset +from nncf.openvino.graph.nncf_graph_builder import GraphConverter from nncf.openvino.quantization.backend_parameters import BackendParameters from nncf.openvino.quantization.backend_parameters import is_weight_compression_needed from nncf.parameters import DropType @@ -111,7 +112,8 @@ def native_quantize_impl( advanced_parameters=advanced_parameters, ) - quantized_model = quantization_algorithm.apply(model, dataset=calibration_dataset) + graph = GraphConverter.create_nncf_graph(model) + quantized_model = quantization_algorithm.apply(model, graph, dataset=calibration_dataset) if is_weight_compression_needed(advanced_parameters): compress_quantize_weights_transformation(quantized_model) diff --git a/nncf/openvino/statistics/aggregator.py b/nncf/openvino/statistics/aggregator.py index c69d4073836..7fd5e26c72d 100644 --- a/nncf/openvino/statistics/aggregator.py +++ b/nncf/openvino/statistics/aggregator.py @@ -15,6 +15,7 @@ import numpy as np import openvino.runtime as ov +from nncf.common.graph.graph import NNCFGraph from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.layout import TransformationLayout from nncf.common.tensor_statistics.aggregator import StatisticsAggregator @@ -22,16 +23,15 @@ from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.experimental.common.tensor_statistics.collectors import MergedTensorCollector from nncf.experimental.common.tensor_statistics.collectors import TensorCollector -from nncf.openvino.graph.nncf_graph_builder import GraphConverter from nncf.openvino.graph.transformations.commands import OVInplaceFnInsertionCommand from nncf.openvino.graph.transformations.commands import OVOutputInsertionCommand from nncf.openvino.tensor import OVNNCFTensor class OVStatisticsAggregator(StatisticsAggregator): - def collect_statistics(self, model: ov.Model) -> None: + def collect_statistics(self, model: ov.Model, graph: NNCFGraph) -> None: self._name_to_node_mapping = {op.get_friendly_name(): op for op in model.get_ops()} - super().collect_statistics(model) + super().collect_statistics(model, graph) def _register_statistics( self, outputs: Dict[str, OVNNCFTensor], statistic_points: StatisticPointsContainer @@ -75,17 +75,16 @@ def _get_transformation_layout_extra_outputs( @staticmethod # TODO(dlyakhov) Move this to common part def _get_merged_statistic_points( - statistic_points: StatisticPointsContainer, model: ov.Model + statistic_points: StatisticPointsContainer, model: ov.Model, graph: NNCFGraph ) -> StatisticPointsContainer: - nncf_graph = GraphConverter.create_nncf_graph(model) merged_statistic_points = StatisticPointsContainer() target_type_to_tensor_collector_map = defaultdict(lambda: defaultdict(list)) for target_node_name, _statistic_points in statistic_points.data.items(): for statistic_point in _statistic_points: target_point = statistic_point.target_point if target_point.type in [TargetType.PRE_LAYER_OPERATION, TargetType.OPERATION_WITH_WEIGHTS]: - node = nncf_graph.get_node_by_name(target_node_name) - target_input_edge = nncf_graph.get_input_edges(node)[target_point.port_id] + node = graph.get_node_by_name(target_node_name) + target_input_edge = graph.get_input_edges(node)[target_point.port_id] target_type = TargetType.POST_LAYER_OPERATION _target_node_name = target_input_edge.from_node.node_name diff --git a/nncf/quantization/algorithms/algorithm.py b/nncf/quantization/algorithms/algorithm.py index b4c7a7fd604..e7086a8082d 100644 --- a/nncf/quantization/algorithms/algorithm.py +++ b/nncf/quantization/algorithms/algorithm.py @@ -14,18 +14,13 @@ from typing import Dict, Optional, TypeVar from nncf import Dataset +from nncf.common.graph.graph import NNCFGraph from nncf.common.tensor_statistics.statistic_point import StatisticPointsContainer from nncf.common.utils.backend import BackendType TModel = TypeVar("TModel") -class AlgorithmParameters(ABC): - """ - Base class for Post-Training algorithm parameters. - """ - - class Algorithm(ABC): """ Base class for all Post-Training algorithms. @@ -35,38 +30,35 @@ class Algorithm(ABC): @abstractmethod def available_backends(self) -> Dict[str, BackendType]: """ - Returns dictionary of the available backends for the algorithm + Returns dictionary of the available backends for the algorithm. - :return: Dict of backends supported by the algorithm + :return: Dict of backends supported by the algorithm. """ + @abstractmethod def apply( self, model: TModel, + graph: NNCFGraph, statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, - ) -> TModel: - """ - Checks that statistic point exists, sets model into transformer - and applies the algorithm to the model. - :param model: model for applying algorithm - :param engine: engine for the model execution - :param statistic_points: StatisticPointsContainer - :return: model after algorithm - """ - # TODO (asuslov): add validation statistic_points - return self._apply(model, statistic_points=statistic_points, dataset=dataset) - - @abstractmethod - def _apply( - self, model: TModel, statistic_points: StatisticPointsContainer, dataset: Optional[Dataset] = None ) -> TModel: """ Applies the algorithm to the model. + + :param model: Model for applying algorithm. + :param graph: Model graph. + :param statistic_points: Statistic points with collected statistics values. + :param dataset: A representative dataset for the calibration process. + :return: A resulting model. """ @abstractmethod - def get_statistic_points(self, model: TModel) -> StatisticPointsContainer: + def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: """ - Returns activation layers, for which StatisticsCollector should collect statistics. + Returns statistic points, for which StatisticsCollector should collect statistics. + + :param model: Model for statististics collection. + :param graph: Model graph. + :retrun: Statistic points, for which StatisticsCollector should collect statistics. """ diff --git a/nncf/quantization/algorithms/bias_correction/algorithm.py b/nncf/quantization/algorithms/bias_correction/algorithm.py index 634a1322e1b..805ceeaac61 100644 --- a/nncf/quantization/algorithms/bias_correction/algorithm.py +++ b/nncf/quantization/algorithms/bias_correction/algorithm.py @@ -127,19 +127,21 @@ def _set_backend_entity(self, model: TModel) -> None: "Cannot return backend-specific entity because {} is not supported!".format(model_backend) ) - def _apply( + def apply( self, model: TModel, + graph: NNCFGraph, statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, ) -> TModel: self._set_backend_entity(model) - model = self._backend_entity.insert_null_biases(model) + model = self._backend_entity.insert_null_biases(model, graph) main_transformations_layout = TransformationLayout() main_model_transformer = ModelTransformerFactory.create(model) model_copy = copy_model(model) - model_copy = self._backend_entity.remove_fq_from_inputs(model_copy) + graph_copy = NNCFGraphFactory.create(model_copy) + model_copy = self._backend_entity.remove_fq_from_inputs(model_copy, graph_copy) nncf_graph = NNCFGraphFactory.create(model_copy) nodes_with_bias = [] @@ -479,10 +481,11 @@ def output_filter_func(point): output_fp.extend(tensor_collector.get_statistics().mean_values) return np.array(output_fp) - def get_statistic_points(self, model: TModel) -> StatisticPointsContainer: + def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: self._set_backend_entity(model) - model_copy = self._backend_entity.remove_fq_from_inputs(copy_model(model)) - model_copy = self._backend_entity.insert_null_biases(model_copy) + model_copy = self._backend_entity.remove_fq_from_inputs(copy_model(model), graph) + graph_copy = NNCFGraphFactory.create(model_copy) + model_copy = self._backend_entity.insert_null_biases(model_copy, graph_copy) nncf_graph = NNCFGraphFactory.create(model_copy) statistic_container = StatisticPointsContainer() diff --git a/nncf/quantization/algorithms/bias_correction/backend.py b/nncf/quantization/algorithms/bias_correction/backend.py index 301cf61d3cf..c9d42899b87 100644 --- a/nncf/quantization/algorithms/bias_correction/backend.py +++ b/nncf/quantization/algorithms/bias_correction/backend.py @@ -196,21 +196,23 @@ def is_node_with_bias(node: NNCFNode, nncf_graph: NNCFGraph) -> bool: @staticmethod @abstractmethod - def remove_fq_from_inputs(model: TModel) -> TModel: + def remove_fq_from_inputs(model: TModel, nncf_graph: NNCFGraph) -> TModel: """ This method removes the activation Fake Quantize nodes (or Quantize-Dequantize pairs) from the model. It's needed for the further bias shift calculation that relates on quantized weights. :param model: TModel instance. + :param nncf_graph: NNCFGraph instance. :return: TModel without activation Fake Quantize nodes (or Quantize-Dequantize pairs). """ @staticmethod @abstractmethod - def insert_null_biases(model: TModel) -> TModel: + def insert_null_biases(model: TModel, nncf_graph: NNCFGraph) -> TModel: """ This method finds and inserts zero biases for the layers that should have it. :param model: TModel instance. + :param nncf_graph: NNCFGraph instance. :return: TModel instance with zero biases """ diff --git a/nncf/quantization/algorithms/bias_correction/onnx_backend.py b/nncf/quantization/algorithms/bias_correction/onnx_backend.py index 5124b49def5..847e7dfcb00 100644 --- a/nncf/quantization/algorithms/bias_correction/onnx_backend.py +++ b/nncf/quantization/algorithms/bias_correction/onnx_backend.py @@ -121,9 +121,9 @@ def is_node_with_bias(node: NNCFNode, nncf_graph: NNCFGraph) -> bool: return is_node_with_bias(node) @staticmethod - def remove_fq_from_inputs(model: onnx.ModelProto) -> onnx.ModelProto: - return remove_fq_from_inputs(model) + def remove_fq_from_inputs(model: onnx.ModelProto, nncf_graph: NNCFGraph) -> onnx.ModelProto: + return remove_fq_from_inputs(model, nncf_graph) @staticmethod - def insert_null_biases(model: onnx.ModelProto) -> onnx.ModelProto: + def insert_null_biases(model: onnx.ModelProto, nncf_graph: NNCFGraph) -> onnx.ModelProto: return model diff --git a/nncf/quantization/algorithms/bias_correction/openvino_backend.py b/nncf/quantization/algorithms/bias_correction/openvino_backend.py index 5745a0b5017..37424b6b100 100644 --- a/nncf/quantization/algorithms/bias_correction/openvino_backend.py +++ b/nncf/quantization/algorithms/bias_correction/openvino_backend.py @@ -134,9 +134,9 @@ def is_node_with_bias(node: NNCFNode, nncf_graph: NNCFGraph) -> bool: return is_node_with_bias(node, nncf_graph) @staticmethod - def remove_fq_from_inputs(model: ov.Model) -> ov.Model: - return remove_fq_from_inputs(model) + def remove_fq_from_inputs(model: ov.Model, nncf_graph: NNCFGraph) -> ov.Model: + return remove_fq_from_inputs(model, nncf_graph) @staticmethod - def insert_null_biases(model: ov.Model) -> ov.Model: - return insert_null_biases(model) + def insert_null_biases(model: ov.Model, nncf_graph: NNCFGraph) -> ov.Model: + return insert_null_biases(model, nncf_graph) diff --git a/nncf/quantization/algorithms/channel_alignment/algorithm.py b/nncf/quantization/algorithms/channel_alignment/algorithm.py index 5f9eed9965c..ed545f9512a 100644 --- a/nncf/quantization/algorithms/channel_alignment/algorithm.py +++ b/nncf/quantization/algorithms/channel_alignment/algorithm.py @@ -17,7 +17,6 @@ from nncf import Dataset from nncf.common.factory import CommandCreatorFactory from nncf.common.factory import ModelTransformerFactory -from nncf.common.factory import NNCFGraphFactory from nncf.common.graph.graph import NNCFGraph from nncf.common.graph.graph import NNCFNode from nncf.common.graph.patterns import GraphPattern @@ -74,7 +73,6 @@ def __init__( self.subset_size = subset_size self.inplace_statistics = inplace_statistics self.backend_params = backend_params - self._original_nncf_graph = None self._backend_entity = None self._quantile = 1e-4 self._algorithm_key = f"CA_{hash(self)}" @@ -95,29 +93,29 @@ def _set_backend_entity(self, model: TModel) -> None: self._backend_entity = OVChannelAlignmentAlgoBackend() - def _apply( + def apply( self, model: TModel, + graph: NNCFGraph, statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, ) -> TModel: self._set_backend_entity(model) - nncf_graph = NNCFGraphFactory.create(model) if self._original_nncf_graph is None else self._original_nncf_graph model_transformer = ModelTransformerFactory.create(model) transformation_layout = TransformationLayout() def filter_func(point: StatisticPoint) -> bool: return self._algorithm_key in point.algorithm_to_tensor_collectors and point.target_point == target_point - for conv_in, add_in, conv_out in tqdm(self._get_node_pairs(nncf_graph), desc="Channel alignment"): + for conv_in, add_in, conv_out in tqdm(self._get_node_pairs(graph), desc="Channel alignment"): target_point, node_in = self._get_target_point_and_node_in(conv_in, add_in) tensor_collectors = list( statistic_points.get_algo_statistics_for_node(node_in.node_name, filter_func, self._algorithm_key) ) assert len(tensor_collectors) == 1 stat = tensor_collectors[0].get_statistics() - conv_in_cont = ConvParamsContainer(conv_in, model, nncf_graph, self._backend_entity) - conv_out_cont = ConvParamsContainer(conv_out, model, nncf_graph, self._backend_entity) + conv_in_cont = ConvParamsContainer(conv_in, model, graph, self._backend_entity) + conv_out_cont = ConvParamsContainer(conv_out, model, graph, self._backend_entity) if conv_in_cont.has_bias() and conv_out_cont.has_bias(): amean = (stat.max_values + stat.min_values) * 0.5 @@ -153,7 +151,7 @@ def filter_func(point: StatisticPoint) -> bool: if container.stated_bias.is_modified(): transformation_layout.register( - command_creator.create_command_to_update_bias(container.op, container.bias, nncf_graph), + command_creator.create_command_to_update_bias(container.op, container.bias, graph), ) transformed_model = model_transformer.transform(transformation_layout) @@ -371,15 +369,14 @@ def _get_target_point_and_node_in(self, conv_in, add_in) -> Tuple[TargetPoint, N node_in, ) - def get_statistic_points(self, model: TModel) -> StatisticPointsContainer: + def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: self._set_backend_entity(model) - self._original_nncf_graph = NNCFGraphFactory.create(model) statistic_container = StatisticPointsContainer() - for conv_in, add_in, _ in self._get_node_pairs(self._original_nncf_graph): + for conv_in, add_in, _ in self._get_node_pairs(graph): target_point, node_in = self._get_target_point_and_node_in(conv_in, add_in) channel_axis = conv_in.metatype.output_channel_axis - reduction_shape = list(range(len(self._original_nncf_graph.get_output_edges(node_in)[0].tensor_shape))) + reduction_shape = list(range(len(graph.get_output_edges(node_in)[0].tensor_shape))) reduction_shape.remove(channel_axis) statistic_collector = self._backend_entity.get_statistic_collector( diff --git a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py index 8b9ca20c463..4a294b5a0f7 100644 --- a/nncf/quantization/algorithms/fast_bias_correction/algorithm.py +++ b/nncf/quantization/algorithms/fast_bias_correction/algorithm.py @@ -16,7 +16,7 @@ from nncf import Dataset from nncf.common.factory import EngineFactory from nncf.common.factory import ModelTransformerFactory -from nncf.common.factory import NNCFGraphFactory +from nncf.common.graph.graph import NNCFGraph from nncf.common.graph.model_transformer import ModelTransformer from nncf.common.graph.transformations.commands import TargetPoint from nncf.common.graph.transformations.commands import TargetType @@ -82,7 +82,6 @@ def __init__( self.apply_for_all_nodes = apply_for_all_nodes self.inplace_statistics = inplace_statistics self.backend_params = backend_params - self.nncf_graph = None self._backend_entity = None self._algorithm_key = f"FBC_{hash(self)}" @@ -119,21 +118,21 @@ def _set_backend_entity(self, model: TModel) -> None: "Cannot return backend-specific entity because {} is not supported!".format(model_backend) ) - def _apply( + def apply( self, model: TModel, + graph: NNCFGraph, statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, ) -> TModel: self._set_backend_entity(model) - nncf_graph = NNCFGraphFactory.create(model) model_transformer = ModelTransformerFactory.create(model) node_and_bias_value = [ - (node, self._backend_entity.get_bias_value(node, nncf_graph, model)) - for node in nncf_graph.get_all_nodes() - if self._backend_entity.is_node_with_bias(node, nncf_graph) + (node, self._backend_entity.get_bias_value(node, graph, model)) + for node in graph.get_all_nodes() + if self._backend_entity.is_node_with_bias(node, graph) ] # Fill `node_and_new_bias_value` list. It is a correspondence between nodes @@ -143,13 +142,11 @@ def _apply( for node, bias_value in tqdm(node_and_bias_value, desc="Applying Fast Bias correction"): node_name = node.node_name - if not self._backend_entity.is_quantized_weights(node, nncf_graph): + if not self._backend_entity.is_quantized_weights(node, graph): nncf_logger.debug(f"Skipping node {node_name} because weights were not quantized") continue - in_node_name, out_node_name = self._backend_entity.get_node_names_for_input_output_statistics( - node, nncf_graph - ) + in_node_name, out_node_name = self._backend_entity.get_node_names_for_input_output_statistics(node, graph) input_fp, input_shape = self._get_fp_inputs(statistic_points, in_node_name) output_fp = self._get_fp_outputs(statistic_points, out_node_name) @@ -184,9 +181,7 @@ def _apply( # Create commands of bias correction and apply them to the model. transformation_layout = TransformationLayout() for node, bias_value in node_and_new_bias_value: - transformation_layout.register( - self._backend_entity.create_bias_correction_command(node, bias_value, nncf_graph) - ) + transformation_layout.register(self._backend_entity.create_bias_correction_command(node, bias_value, graph)) transformed_model = model_transformer.transform(transformation_layout) return transformed_model @@ -309,19 +304,16 @@ def _get_bias_shift( bias_shift = self._backend_entity.post_process_output_data(output_fp) - q_outputs return bias_shift - def get_statistic_points(self, model: TModel) -> StatisticPointsContainer: + def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: self._set_backend_entity(model) - nncf_graph = NNCFGraphFactory.create(model) nodes_with_bias = [ - node for node in nncf_graph.get_all_nodes() if self._backend_entity.is_node_with_bias(node, nncf_graph) + node for node in graph.get_all_nodes() if self._backend_entity.is_node_with_bias(node, graph) ] statistic_container = StatisticPointsContainer() for node in nodes_with_bias: input_port_id, output_port_id = self._backend_entity.get_activation_port_ids_for_bias_node(node) - in_node_name, out_node_name = self._backend_entity.get_node_names_for_input_output_statistics( - node, nncf_graph - ) + in_node_name, out_node_name = self._backend_entity.get_node_names_for_input_output_statistics(node, graph) pre_layer_statistic_point = self._backend_entity.target_point( TargetType.PRE_LAYER_OPERATION, in_node_name, input_port_id diff --git a/nncf/quantization/algorithms/min_max/algorithm.py b/nncf/quantization/algorithms/min_max/algorithm.py index 922e31b5ec1..ef1aa7baa1b 100644 --- a/nncf/quantization/algorithms/min_max/algorithm.py +++ b/nncf/quantization/algorithms/min_max/algorithm.py @@ -18,7 +18,6 @@ from nncf import Dataset from nncf.common.factory import ModelTransformerFactory -from nncf.common.factory import NNCFGraphFactory from nncf.common.graph.graph import NNCFGraph from nncf.common.graph.graph import NNCFNode from nncf.common.graph.operator_metatypes import OperatorMetatype @@ -607,18 +606,18 @@ def _get_quantization_points_overflow_fix( output.update(nodes) return output - def _apply( + def apply( self, model: TModel, + graph: NNCFGraph, statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, ) -> TModel: transformation_layout = TransformationLayout() - nncf_graph = NNCFGraphFactory.create(model) model_transformer = ModelTransformerFactory.create(model) - quantization_target_points, unified_scale_groups = self._get_quantization_target_points(model, nncf_graph) + quantization_target_points, unified_scale_groups = self._get_quantization_target_points(model, graph) quantization_points_overflow_fix = self._get_quantization_points_overflow_fix( - self._overflow_fix, quantization_target_points, nncf_graph + self._overflow_fix, quantization_target_points, graph ) weight_layer_names = set() @@ -645,7 +644,7 @@ def filter_func(point: StatisticPoint) -> bool: narrow_range = get_quantizer_narrow_range(qconfig, q_group) parameters = calculate_quantizer_parameters(unified_values, qconfig, q_group, narrow_range) command = self._backend_entity.create_quantizer_insertion_command( - nncf_graph, quantization_target_point, qconfig, parameters + graph, quantization_target_point, qconfig, parameters ) transformation_layout.register(command) unified_ops_list.add(quantization_target_point) @@ -658,7 +657,7 @@ def filter_func(point: StatisticPoint) -> bool: target_node_name, filter_func, self._algorithm_key ): if quantization_target_point.is_weight_target_point(): - weights_name = self._backend_entity.get_weight_name(nncf_graph, quantization_target_point) + weights_name = self._backend_entity.get_weight_name(graph, quantization_target_point) if not self._backend_entity.should_quantize_weight(weights_name, weight_layer_names): continue weight_layer_names.add(weights_name) @@ -671,19 +670,17 @@ def filter_func(point: StatisticPoint) -> bool: statistics = tensor_collector.get_statistics() parameters = calculate_quantizer_parameters(statistics, qconfig, quant_group, narrow_range, half_range) command = self._backend_entity.create_quantizer_insertion_command( - nncf_graph, quantization_target_point, qconfig, parameters + graph, quantization_target_point, qconfig, parameters ) - transformation_layout.register(command) quantized_model = model_transformer.transform(transformation_layout) return quantized_model - def get_statistic_points(self, model: TModel) -> StatisticPointsContainer: + def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: self._set_backend_entity(model) - nncf_graph = NNCFGraphFactory.create(model) - quantization_target_points, _ = self._get_quantization_target_points(model, nncf_graph) + quantization_target_points, _ = self._get_quantization_target_points(model, graph) output = StatisticPointsContainer() for quantization_target_point, qconfig in quantization_target_points.items(): nncf_logger.debug( @@ -694,7 +691,7 @@ def get_statistic_points(self, model: TModel) -> StatisticPointsContainer: if quantization_target_point.is_weight_target_point(): # Weight statistics is constant, so only one collection is enough. num_samples = 1 - stat_collector = self._get_stat_collector(nncf_graph, quantization_target_point, qconfig, num_samples) + stat_collector = self._get_stat_collector(graph, quantization_target_point, qconfig, num_samples) output.add_statistic_point( StatisticPoint( target_point=quantization_target_point, diff --git a/nncf/quantization/algorithms/post_training/algorithm.py b/nncf/quantization/algorithms/post_training/algorithm.py index c99c9acf844..39c2132f0d6 100644 --- a/nncf/quantization/algorithms/post_training/algorithm.py +++ b/nncf/quantization/algorithms/post_training/algorithm.py @@ -13,6 +13,8 @@ from typing import Callable, Dict, List, Optional, TypeVar from nncf import Dataset +from nncf.common.factory import NNCFGraphFactory +from nncf.common.graph.graph import NNCFGraph from nncf.common.logging import nncf_logger from nncf.common.quantization.structs import QuantizationPreset from nncf.common.tensor_statistics.aggregator import StatisticsAggregator @@ -189,13 +191,15 @@ def _create_statistics_aggregator(self, dataset: Dataset, backend: BackendType) return PTStatisticsAggregator(dataset) return None - def _apply( + def apply( self, model: TModel, + graph: NNCFGraph, statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, ) -> TModel: modified_model = copy_model(model) + modified_model_graph = graph backend = get_backend(modified_model) if statistic_points is None: @@ -211,22 +215,29 @@ def _apply( continue for pre_pass in first_stage_algorithm.pre_passes: - modified_model = pre_pass(modified_model) + modified_model = pre_pass(modified_model, modified_model_graph) + modified_model_graph = NNCFGraphFactory.create(modified_model) statistics_aggregator = self._create_statistics_aggregator(dataset, backend) - algo_statistic_points = algorithm.get_statistic_points(modified_model) + algo_statistic_points = algorithm.get_statistic_points(modified_model, modified_model_graph) statistics_aggregator.register_statistic_points(algo_statistic_points) - statistics_aggregator.collect_statistics(modified_model) - modified_model = algorithm.apply(modified_model, statistics_aggregator.statistic_points) + statistics_aggregator.collect_statistics(modified_model, modified_model_graph) + modified_model = algorithm.apply( + modified_model, modified_model_graph, statistics_aggregator.statistic_points + ) + modified_model_graph = NNCFGraphFactory.create(modified_model) statistics_aggregator = self._create_statistics_aggregator(dataset, backend) for algorithm in self.algorithms: - algo_statistic_points = algorithm.get_statistic_points(modified_model) + algo_statistic_points = algorithm.get_statistic_points(modified_model, modified_model_graph) statistics_aggregator.register_statistic_points(algo_statistic_points) - statistics_aggregator.collect_statistics(modified_model) + statistics_aggregator.collect_statistics(modified_model, modified_model_graph) statistic_points = statistics_aggregator.statistic_points - for algorithm in self.algorithms: - modified_model = algorithm.apply(modified_model, statistic_points) + for algorithm in self.algorithms[:-1]: + modified_model = algorithm.apply(modified_model, modified_model_graph, statistic_points) + modified_model_graph = NNCFGraphFactory.create(modified_model) + modified_model = self.algorithms[-1].apply(modified_model, modified_model_graph, statistic_points) + return modified_model diff --git a/nncf/quantization/algorithms/smooth_quant/algorithm.py b/nncf/quantization/algorithms/smooth_quant/algorithm.py index 7921c3167ec..0815af828b0 100644 --- a/nncf/quantization/algorithms/smooth_quant/algorithm.py +++ b/nncf/quantization/algorithms/smooth_quant/algorithm.py @@ -27,7 +27,6 @@ from nncf import Dataset from nncf.common.factory import ModelTransformerFactory -from nncf.common.factory import NNCFGraphFactory from nncf.common.graph.graph import NNCFGraph from nncf.common.graph.transformations.commands import TargetType from nncf.common.graph.transformations.layout import TransformationLayout @@ -89,9 +88,10 @@ def _set_backend_entity(self, model: TModel) -> None: "Cannot return backend-specific entity because {} is not supported!".format(model_backend) ) - def _apply( + def apply( self, model: TModel, + graph: NNCFGraph, statistic_points: Optional[StatisticPointsContainer] = None, dataset: Optional[Dataset] = None, ) -> TModel: @@ -99,12 +99,13 @@ def _apply( nncf_logger.info("Skipping SmoothQuant algorithm because alfa parameter is negative.") return model - nncf_graph = NNCFGraphFactory.create(model) - nodes_to_smooth_data = self._get_nodes_to_smooth_data(nncf_graph) + self._set_backend_entity(model) + + nodes_to_smooth_data = self._get_nodes_to_smooth_data(graph) model_transformer = ModelTransformerFactory.create(model) transformation_layout = TransformationLayout() - node_groups = self._group_nodes_by_source(nodes_to_smooth_data, nncf_graph) + node_groups = self._group_nodes_by_source(nodes_to_smooth_data, graph) for group_id, nodes in tqdm(node_groups.items(), desc="Applying Smooth Quant"): best_ratio = 0.0 @@ -191,7 +192,7 @@ def filter_func(point: StatisticPoint) -> bool: statistics_for_node.append(tensor_collector.get_statistics()[STATISTIC_BRANCH_KEY]) return statistics_for_node - def get_statistic_points(self, model: TModel) -> StatisticPointsContainer: + def get_statistic_points(self, model: TModel, graph: NNCFGraph) -> StatisticPointsContainer: statistic_container = StatisticPointsContainer() if self._alpha < 0: @@ -201,9 +202,8 @@ def get_statistic_points(self, model: TModel) -> StatisticPointsContainer: return statistic_container self._set_backend_entity(model) - nncf_graph = NNCFGraphFactory.create(model) - nodes_to_smooth_data = self._get_nodes_to_smooth_data(nncf_graph) + nodes_to_smooth_data = self._get_nodes_to_smooth_data(graph) for node_data in nodes_to_smooth_data: node_to_smooth = node_data["node_to_smooth"] @@ -213,7 +213,7 @@ def get_statistic_points(self, model: TModel) -> StatisticPointsContainer: port_id=node_data["input_act_port"], ) input_reduction_shape = self._backend_entity.calculate_input_reduction_shape( - nncf_graph, node_to_smooth, node_data["input_act_port"] + graph, node_to_smooth, node_data["input_act_port"] ) stat_collector = self._backend_entity.get_abs_max_channel_collector( self._subset_size, input_reduction_shape, self._inplace_statistics, STATISTIC_BRANCH_KEY diff --git a/nncf/quantization/passes.py b/nncf/quantization/passes.py index 6b0f8b83a6d..e6af74c8271 100644 --- a/nncf/quantization/passes.py +++ b/nncf/quantization/passes.py @@ -17,6 +17,8 @@ from nncf.common.utils.backend import BackendType from nncf.common.utils.backend import get_backend +TModel = TypeVar("TModel") + def transform_to_inference_graph( nncf_graph: NNCFGraph, @@ -120,19 +122,17 @@ def filter_constant_nodes( return nncf_graph -TModel = TypeVar("TModel") - - -def insert_null_biases_pass(model: TModel) -> TModel: +def insert_null_biases_pass(model: TModel, graph: NNCFGraph) -> TModel: """ This pass finds and inserts zero biases to the given model for the layers that should have it. :param model: Model instance. + :param graph: NNCFGraph instance. :return: Updated Model instance with zero biases """ model_backend = get_backend(model) if model_backend == BackendType.OPENVINO: from nncf.openvino.graph.model_utils import insert_null_biases - return insert_null_biases(model) + return insert_null_biases(model, graph) return model diff --git a/nncf/torch/statistics/aggregator.py b/nncf/torch/statistics/aggregator.py index 4c57699cd3c..6c2c48256c6 100644 --- a/nncf/torch/statistics/aggregator.py +++ b/nncf/torch/statistics/aggregator.py @@ -15,6 +15,7 @@ import torch from nncf.common.factory import TModel +from nncf.common.graph.graph import NNCFGraph from nncf.common.graph.transformations.commands import TransformationPriority from nncf.common.graph.transformations.layout import TransformationLayout from nncf.common.tensor_statistics.aggregator import StatisticPointsContainer @@ -25,10 +26,10 @@ class PTStatisticsAggregator(StatisticsAggregator): - def collect_statistics(self, model: NNCFNetwork) -> None: + def collect_statistics(self, model: NNCFNetwork, graph: NNCFGraph) -> None: with torch.no_grad(): with model.nncf.temporary_clean_view() as intermediate_model: - super().collect_statistics(intermediate_model) + super().collect_statistics(intermediate_model, graph) def _register_statistics( self, outputs: Dict[str, PTNNCFTensor], statistic_points: StatisticPointsContainer @@ -59,7 +60,7 @@ def _get_transformation_layout_extra_outputs( @staticmethod def _get_merged_statistic_points( - statistic_points: StatisticPointsContainer, model: TModel + statistic_points: StatisticPointsContainer, model: TModel, graph: NNCFGraph ) -> StatisticPointsContainer: # TODO: mirgate to experimental statistic collector and use common merging algorithm return statistic_points diff --git a/tests/common/test_statistics_aggregator.py b/tests/common/test_statistics_aggregator.py index feb210fbf9f..cd0545a4580 100644 --- a/tests/common/test_statistics_aggregator.py +++ b/tests/common/test_statistics_aggregator.py @@ -404,7 +404,8 @@ def test_statistics_aggregator_min_max( dataset = self.get_dataset(dataset_samples) statistics_aggregator = self.get_statistics_aggregator(dataset) statistics_aggregator.register_statistic_points(statistics_points) - statistics_aggregator.collect_statistics(model) + graph = NNCFGraphFactory.create(model) + statistics_aggregator.collect_statistics(model, graph) def filter_func(point): return ( @@ -591,7 +592,8 @@ def test_statistics_aggregator_bias_correction( statistics_aggregator = self.get_statistics_aggregator(dataset) statistics_aggregator.register_statistic_points(statistics_points) model = self.get_backend_model(dataset_samples) - statistics_aggregator.collect_statistics(model) + graph = NNCFGraphFactory.create(model) + statistics_aggregator.collect_statistics(model, graph) def filter_func(point): return ( @@ -665,7 +667,8 @@ def test_statistics_merging_simple(self, dataset_samples, inplace_statistics, st dataset = self.get_dataset(dataset_samples) statistics_aggregator = self.get_statistics_aggregator(dataset) statistics_aggregator.register_statistic_points(statistics_points) - statistics_aggregator.collect_statistics(model) + graph = NNCFGraphFactory.create(model) + statistics_aggregator.collect_statistics(model, graph) tensor_collectors = list(statistics_points.get_tensor_collectors()) assert len(tensor_collectors) == 3 @@ -771,7 +774,7 @@ def test_statistic_merging(self, test_params, key, dataset_samples, inplace_stat dataset = self.get_dataset(dataset_samples) statistics_aggregator = self.get_statistics_aggregator(dataset) # pylint: disable=protected-access - merged_statistics = statistics_aggregator._get_merged_statistic_points(statistics_points, model) + merged_statistics = statistics_aggregator._get_merged_statistic_points(statistics_points, model, nncf_graph) merged_stats_checkers_map = { "split_concat": self._check_split_concat_merged_stats, "shared_conv": self._check_shared_convs_merged_stats, @@ -779,7 +782,7 @@ def test_statistic_merging(self, test_params, key, dataset_samples, inplace_stat merged_stats_checkers_map[key](merged_statistics) statistics_aggregator.register_statistic_points(statistics_points) - statistics_aggregator.collect_statistics(model) + statistics_aggregator.collect_statistics(model, nncf_graph) for collector, ref in collectors_and_refs: stat = collector.get_statistics() @@ -842,7 +845,8 @@ def product_dict(**kwargs): statistics_aggregator = self.get_statistics_aggregator(dataset) statistics_aggregator.register_statistic_points(statistics_points) # Run statistic collection to check output names matches reduer names - statistics_aggregator.collect_statistics(model) + graph = NNCFGraphFactory.create(model) + statistics_aggregator.collect_statistics(model, graph) @pytest.mark.parametrize( "statistic_point_params", diff --git a/tests/onnx/quantization/common.py b/tests/onnx/quantization/common.py index 1b2ab0cf25a..1d3464882fe 100644 --- a/tests/onnx/quantization/common.py +++ b/tests/onnx/quantization/common.py @@ -95,6 +95,7 @@ def min_max_quantize_model( ) -> onnx.ModelProto: if convert_model_opset: original_model = convert_opset_version(original_model) + graph = GraphConverter.create_nncf_graph(original_model) dataset = get_random_dataset_for_test(original_model, dataset_has_batch_size) quantization_params = {} if quantization_params is None else quantization_params @@ -104,7 +105,7 @@ def min_max_quantize_model( post_training_quantization = PostTrainingQuantization(subset_size=1, **quantization_params) - quantized_model = post_training_quantization.apply(original_model, dataset=dataset) + quantized_model = post_training_quantization.apply(original_model, graph, dataset=dataset) return quantized_model @@ -116,10 +117,11 @@ def ptq_quantize_model( ) -> onnx.ModelProto: if convert_model_opset: original_model = convert_opset_version(original_model) + graph = GraphConverter.create_nncf_graph(original_model) dataset = get_random_dataset_for_test(original_model, dataset_has_batch_size) quantization_params = {} if quantization_params is None else quantization_params post_training_quantization = PostTrainingQuantization(subset_size=1, **quantization_params) - quantized_model = post_training_quantization.apply(original_model, dataset=dataset) + quantized_model = post_training_quantization.apply(original_model, graph, dataset=dataset) return quantized_model diff --git a/tests/onnx/quantization/test_ptq_params.py b/tests/onnx/quantization/test_ptq_params.py index 863af678f81..9bc23b1410b 100644 --- a/tests/onnx/quantization/test_ptq_params.py +++ b/tests/onnx/quantization/test_ptq_params.py @@ -18,6 +18,7 @@ from nncf.onnx.graph.metatypes.onnx_metatypes import ONNXConvolutionMetatype from nncf.onnx.graph.metatypes.onnx_metatypes import ONNXGemmMetatype from nncf.onnx.graph.metatypes.onnx_metatypes import ONNXSoftmaxMetatype +from nncf.onnx.graph.nncf_graph_builder import GraphConverter from nncf.onnx.graph.nncf_graph_builder import ONNXLayerAttributes from nncf.onnx.graph.transformations.commands import ONNXTargetPoint from nncf.onnx.statistics.collectors import ONNXMeanMinMaxStatisticCollector @@ -84,10 +85,20 @@ def metatypes_mapping(self): @pytest.fixture(scope="session") def test_params(self): + linear_model = LinearModel().onnx_model + linear_model_graph = GraphConverter.create_nncf_graph(linear_model) + depthwise_model = OneDepthwiseConvolutionalModel().onnx_model + depthwise_model_graph = GraphConverter.create_nncf_graph(depthwise_model) + return { - "test_range_estimator_per_tensor": {"model": LinearModel().onnx_model, "stat_points_num": 5}, + "test_range_estimator_per_tensor": { + "model": linear_model, + "nncf_graph": linear_model_graph, + "stat_points_num": 5, + }, "test_range_estimator_per_channel": { - "model": OneDepthwiseConvolutionalModel().onnx_model, + "model": depthwise_model, + "nncf_graph": depthwise_model_graph, "stat_points_num": 2, }, "test_quantize_outputs": { diff --git a/tests/openvino/native/quantization/test_fq_params_calculation.py b/tests/openvino/native/quantization/test_fq_params_calculation.py index 405f5b2ddf6..c28b415ad59 100644 --- a/tests/openvino/native/quantization/test_fq_params_calculation.py +++ b/tests/openvino/native/quantization/test_fq_params_calculation.py @@ -16,6 +16,7 @@ import pytest from nncf.common.quantization.structs import QuantizationPreset +from nncf.openvino.graph.nncf_graph_builder import GraphConverter from nncf.openvino.statistics.aggregator import OVStatisticsAggregator from nncf.quantization.advanced_parameters import OverflowFix from nncf.quantization.algorithms.min_max.algorithm import MinMaxQuantization @@ -56,13 +57,14 @@ def get_fq_nodes_stats_algo(model): # pylint: disable=protected-access def quantize_model(ov_model, q_params): dataset = get_dataset_for_test(ov_model) + graph = GraphConverter.create_nncf_graph(ov_model) min_max_algo = MinMaxQuantization(subset_size=1, **q_params) statistics_aggregator = OVStatisticsAggregator(dataset) - statistic_points = min_max_algo.get_statistic_points(ov_model) + statistic_points = min_max_algo.get_statistic_points(ov_model, graph) statistics_aggregator.register_statistic_points(statistic_points) - statistics_aggregator.collect_statistics(ov_model) - quantized_model = min_max_algo._apply(ov_model, statistics_aggregator.statistic_points) + statistics_aggregator.collect_statistics(ov_model, graph) + quantized_model = min_max_algo.apply(ov_model, graph, statistics_aggregator.statistic_points) return quantized_model diff --git a/tests/openvino/native/quantization/test_graphs.py b/tests/openvino/native/quantization/test_graphs.py index 232c20a7fdc..373ee9cfd9d 100644 --- a/tests/openvino/native/quantization/test_graphs.py +++ b/tests/openvino/native/quantization/test_graphs.py @@ -16,6 +16,7 @@ import pytest from nncf.common.quantization.structs import QuantizationPreset +from nncf.openvino.graph.nncf_graph_builder import GraphConverter from nncf.openvino.statistics.aggregator import OVStatisticsAggregator from nncf.parameters import ModelType from nncf.parameters import TargetDevice @@ -128,13 +129,14 @@ def test_omz_models_sq_placement(model_name_params, tmp_path): # pylint: disable=protected-access def smooth_quant_model(ov_model: ov.Model, q_params: Dict, quantize=True): dataset = get_dataset_for_test(ov_model) + graph = GraphConverter.create_nncf_graph(ov_model) smooth_quant_algo = SmoothQuant(subset_size=1) statistics_aggregator = OVStatisticsAggregator(dataset) - statistic_points = smooth_quant_algo.get_statistic_points(ov_model) + statistic_points = smooth_quant_algo.get_statistic_points(ov_model, graph) statistics_aggregator.register_statistic_points(statistic_points) - statistics_aggregator.collect_statistics(ov_model) - modified_model = smooth_quant_algo._apply(ov_model, statistics_aggregator.statistic_points) + statistics_aggregator.collect_statistics(ov_model, graph) + modified_model = smooth_quant_algo.apply(ov_model, graph, statistics_aggregator.statistic_points) if quantize: modified_model = quantize_model(modified_model, q_params) diff --git a/tests/openvino/native/quantization/test_ptq_params.py b/tests/openvino/native/quantization/test_ptq_params.py index c94f730a1ed..3552915d523 100644 --- a/tests/openvino/native/quantization/test_ptq_params.py +++ b/tests/openvino/native/quantization/test_ptq_params.py @@ -91,16 +91,29 @@ def metatypes_mapping(self): @pytest.fixture(scope="session") def test_params(self): + linear_model = LinearModel().ov_model + linear_model_graph = GraphConverter.create_nncf_graph(linear_model) + depthwise_model = DepthwiseConv4DModel().ov_model + depthwise_model_graph = GraphConverter.create_nncf_graph(depthwise_model) + return { - "test_range_estimator_per_tensor": {"model": LinearModel().ov_model, "stat_points_num": 2}, - "test_range_estimator_per_channel": {"model": DepthwiseConv4DModel().ov_model, "stat_points_num": 2}, + "test_range_estimator_per_tensor": { + "model": linear_model, + "nncf_graph": linear_model_graph, + "stat_points_num": 2, + }, + "test_range_estimator_per_channel": { + "model": depthwise_model, + "nncf_graph": depthwise_model_graph, + "stat_points_num": 2, + }, "test_quantize_outputs": { - "nncf_graph": GraphConverter.create_nncf_graph(LinearModel().ov_model), + "nncf_graph": linear_model_graph, "hw_patterns": get_hw_patterns(), "ignored_patterns": get_ignored_patterns(), }, "test_ignored_scopes": { - "nncf_graph": GraphConverter.create_nncf_graph(LinearModel().ov_model), + "nncf_graph": linear_model_graph, "hw_patterns": get_hw_patterns(), "ignored_patterns": get_ignored_patterns(), }, @@ -110,7 +123,7 @@ def test_params(self): "ignored_patterns": get_ignored_patterns(), }, "test_validate_scope": { - "nncf_graph": GraphConverter.create_nncf_graph(LinearModel().ov_model), + "nncf_graph": linear_model_graph, "ignored_patterns": get_ignored_patterns(), }, } diff --git a/tests/post_training/test_templates/test_bias_correction.py b/tests/post_training/test_templates/test_bias_correction.py index 725849726ff..f7c9346cb3c 100644 --- a/tests/post_training/test_templates/test_bias_correction.py +++ b/tests/post_training/test_templates/test_bias_correction.py @@ -14,6 +14,7 @@ import pytest +from nncf.common.factory import NNCFGraphFactory from nncf.data import Dataset from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.advanced_parameters import OverflowFix @@ -118,7 +119,8 @@ def test_update_bias(self, model_cls, ref_biases, tmpdir): dataset = Dataset(self.get_dataset(model_cls.INPUT_SIZE), self.get_transform_fn()) quantization_algorithm = self.get_quantization_algorithm() - quantized_model = quantization_algorithm.apply(model, dataset=dataset) + graph = NNCFGraphFactory.create(model) + quantized_model = quantization_algorithm.apply(model, graph, dataset=dataset) mapped_ref_biases = self.map_references(ref_biases) self.check_bias(quantized_model, mapped_ref_biases) diff --git a/tests/post_training/test_templates/test_channel_alignment.py b/tests/post_training/test_templates/test_channel_alignment.py index c87e9afd841..6279bd1f0e8 100644 --- a/tests/post_training/test_templates/test_channel_alignment.py +++ b/tests/post_training/test_templates/test_channel_alignment.py @@ -355,7 +355,7 @@ class MockBackend(backend_cls): ref_bias_in_after_scale_align, ) ) - algorithm._apply(None, statistic_points) + algorithm.apply(None, nncf_graph, statistic_points) align_means_called = 1 if num_biases == 2 else 0 assert algorithm._align_means.call_count == align_means_called @@ -443,7 +443,7 @@ class MockBackend(backend_cls): MockBackend.get_statistic_collector = mocker.MagicMock(return_value=ref_stat_collector) algorithm._backend_entity = MockBackend - statistic_container = algorithm.get_statistic_points(None) + statistic_container = algorithm.get_statistic_points(None, nncf_graph) backend_cls = self.get_backend_cls() target_node_name = "/Add_1_0" if num_biases else "/Conv_1_0" diff --git a/tests/post_training/test_templates/test_fast_bias_correction.py b/tests/post_training/test_templates/test_fast_bias_correction.py index 1cf2172246f..b972ce851cd 100644 --- a/tests/post_training/test_templates/test_fast_bias_correction.py +++ b/tests/post_training/test_templates/test_fast_bias_correction.py @@ -14,6 +14,7 @@ import pytest +from nncf.common.factory import NNCFGraphFactory from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters from nncf.quantization.advanced_parameters import OverflowFix from nncf.quantization.algorithms.fast_bias_correction.algorithm import FastBiasCorrection @@ -114,6 +115,7 @@ def test_update_bias(self, model_cls, ref_bias, tmpdir): dataset = get_static_dataset(model_cls.INPUT_SIZE, self.get_transform_fn(), self.fn_to_type) quantization_algorithm = self.get_quantization_algorithm() - quantized_model = quantization_algorithm.apply(model, dataset=dataset) + graph = NNCFGraphFactory.create(model) + quantized_model = quantization_algorithm.apply(model, graph, dataset=dataset) self.check_bias(quantized_model, ref_bias) diff --git a/tests/post_training/test_templates/test_ptq_params.py b/tests/post_training/test_templates/test_ptq_params.py index 16413c02bcf..eb91aa770eb 100644 --- a/tests/post_training/test_templates/test_ptq_params.py +++ b/tests/post_training/test_templates/test_ptq_params.py @@ -138,7 +138,7 @@ def test_range_estimator_per_tensor(self, test_params, range_estimator_params): assert min_max_algo._range_estimator_params[QuantizerGroup.ACTIVATIONS] == range_estimator_params params = test_params["test_range_estimator_per_tensor"] - stat_points = min_max_algo.get_statistic_points(params["model"]) + stat_points = min_max_algo.get_statistic_points(params["model"], params["nncf_graph"]) assert len(stat_points) == params["stat_points_num"] for _, stat_point in stat_points.items(): diff --git a/tests/post_training/test_templates/test_smooth_quant.py b/tests/post_training/test_templates/test_smooth_quant.py index 971eaf9781c..6bddc8b9acb 100644 --- a/tests/post_training/test_templates/test_smooth_quant.py +++ b/tests/post_training/test_templates/test_smooth_quant.py @@ -14,6 +14,7 @@ import pytest +from nncf.common.factory import NNCFGraphFactory from nncf.experimental.common.tensor_statistics.collectors import AbsMaxReducer from nncf.experimental.common.tensor_statistics.collectors import MaxAggregator from nncf.parameters import ModelType @@ -85,7 +86,8 @@ def test_smooth_quant_algo(self, model_cls, reference_values, tmpdir): dataset = get_static_dataset(model_cls.INPUT_SIZE, self.get_transform_fn(), self.fn_to_type) quantization_algorithm = self.get_quantization_algorithm() - quantized_model = quantization_algorithm.apply(model, dataset=dataset) + graph = NNCFGraphFactory.create(model) + quantized_model = quantization_algorithm.apply(model, graph, dataset=dataset) self.check_scales(quantized_model, reference_values) diff --git a/tests/torch/ptq/test_calculation_quantizer_params.py b/tests/torch/ptq/test_calculation_quantizer_params.py index 79b7f050131..cec246f1ad7 100644 --- a/tests/torch/ptq/test_calculation_quantizer_params.py +++ b/tests/torch/ptq/test_calculation_quantizer_params.py @@ -315,10 +315,10 @@ def test_quantizer_parameters_export(tmp_path: Path): nncf_config = NNCFConfig({"input_info": {"sample_size": [1, 3, 32, 32]}}) nncf_network = create_nncf_network(model, nncf_config) - statistic_points = min_max_algo.get_statistic_points(nncf_network) + statistic_points = min_max_algo.get_statistic_points(nncf_network, nncf_network.nncf.get_graph()) statistics_aggregator.register_statistic_points(statistic_points) - statistics_aggregator.collect_statistics(model) - torch_quantized_model = min_max_algo._apply(model, statistics_aggregator.statistic_points) + statistics_aggregator.collect_statistics(model, nncf_network.nncf.get_graph()) + torch_quantized_model = min_max_algo.apply(model, statistics_aggregator.statistic_points) path = str(tmp_path / "torch_ptq_model.onnx") torch.onnx.export( diff --git a/tests/torch/ptq/test_fq_params_calculation.py b/tests/torch/ptq/test_fq_params_calculation.py index 1c52f1beb2e..974f42588d8 100644 --- a/tests/torch/ptq/test_fq_params_calculation.py +++ b/tests/torch/ptq/test_fq_params_calculation.py @@ -59,7 +59,7 @@ def transform_fn(sample): original_model.eval() nncf_network = create_nncf_network(original_model, config) - quantized_model = post_training_quantization.apply(nncf_network, dataset=dataset) + quantized_model = post_training_quantization.apply(nncf_network, nncf_network.nncf.get_graph(), dataset=dataset) return quantized_model diff --git a/tests/torch/ptq/test_graphs.py b/tests/torch/ptq/test_graphs.py index 0291caa7ff1..bdcc53f5978 100644 --- a/tests/torch/ptq/test_graphs.py +++ b/tests/torch/ptq/test_graphs.py @@ -98,6 +98,6 @@ def test_min_max_classification_quantized_graphs(desc: ModelDesc, quantization_p quantization_parameters["advanced_parameters"] = AdvancedQuantizationParameters(disable_bias_correction=True) quantization_algorithm = PostTrainingQuantization(**quantization_parameters) - quantized_model = quantization_algorithm.apply(nncf_network, dataset=None) + quantized_model = quantization_algorithm.apply(nncf_network, nncf_network.nncf.get_graph(), dataset=None) check_graph(quantized_model.nncf.get_graph(), desc.dot_filename, graph_dir) diff --git a/tests/torch/ptq/test_ptq_params.py b/tests/torch/ptq/test_ptq_params.py index f0fdea71676..7d657d81441 100644 --- a/tests/torch/ptq/test_ptq_params.py +++ b/tests/torch/ptq/test_ptq_params.py @@ -130,10 +130,18 @@ def metatypes_mapping(self): @pytest.fixture(scope="session") def test_params(self): + linear_model = LinearTestModel().get_nncf_network() + depthwise_model = OneDepthwiseConvModel().get_nncf_network() + return { - "test_range_estimator_per_tensor": {"model": LinearTestModel().get_nncf_network(), "stat_points_num": 5}, + "test_range_estimator_per_tensor": { + "model": linear_model, + "nncf_graph": linear_model.nncf.get_graph(), + "stat_points_num": 5, + }, "test_range_estimator_per_channel": { - "model": OneDepthwiseConvModel().get_nncf_network(), + "model": depthwise_model, + "nncf_graph": depthwise_model.nncf.graph, "stat_points_num": 2, }, "test_quantize_outputs": {