Skip to content

Commit

Permalink
Comments
Browse files Browse the repository at this point in the history
  • Loading branch information
daniil-lyakhov committed Jan 17, 2025
1 parent 0bf1e1e commit 9511c7c
Show file tree
Hide file tree
Showing 10 changed files with 9,125 additions and 8,715 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from torch.ao.quantization.observer import PerChannelMinMaxObserver
from torch.ao.quantization.quantizer.quantizer import QuantizationAnnotation as InductorQAnotation
from torch.ao.quantization.quantizer.quantizer import QuantizationSpec as InductorQuantizationSpec
from torch.ao.quantization.quantizer.quantizer import Quantizer as InductorQuantizer
from torch.ao.quantization.quantizer.quantizer import Quantizer

from nncf.common.graph.graph import NNCFGraph
from nncf.common.quantization.quantizer_propagation.solver import QuantizerPropagationRule
Expand All @@ -26,7 +26,7 @@
from nncf.common.quantization.structs import QuantizationPreset
from nncf.common.quantization.structs import QuantizationScheme
from nncf.common.quantization.structs import QuantizerConfig as NNCFQuantizerConfig
from nncf.experimental.common.quantization.algorithms.quantizer.base_quantizer import NNCFQuantizer
from nncf.experimental.quantization.algorithms.quantizer.base_quantizer import Quantizer as NNCFQuantizer
from nncf.experimental.torch.fx.nncf_graph_builder import GraphConverter
from nncf.experimental.torch.fx.node_utils import get_graph_node_by_name
from nncf.experimental.torch.fx.transformations import fold_constant_except_qdq
Expand All @@ -42,7 +42,7 @@
QUANT_ANNOTATION_KEY = "quantization_annotation"


class OpenVINOQuantizer(InductorQuantizer, NNCFQuantizer):
class OpenVINOQuantizer(Quantizer):
def __init__(
self,
mode: Optional[QuantizationMode] = None,
Expand Down Expand Up @@ -169,3 +169,11 @@ def validate(self, model: torch.fx.GraphModule) -> None:
def transform_for_annotation(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
fold_constant_except_qdq(model)
return model


class OpenVINOQuantizerAdapter(NNCFQuantizer):
def __init__(self, quantizer: OpenVINOQuantizer):
self._quantizer = quantizer

def get_quantization_setup(self, model: torch.fx.GraphModule, nncf_graph: NNCFGraph) -> SingleConfigQuantizerSetup:
return self._quantizer.get_quantization_setup(model, nncf_graph)
7 changes: 6 additions & 1 deletion nncf/experimental/torch/fx/quantization/quantize_pt2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,12 @@
from nncf.common.factory import NNCFGraphFactory
from nncf.common.logging import nncf_logger
from nncf.data import Dataset
from nncf.experimental.common.quantization.algorithms.quantizer.openvino_quantizer import OpenVINOQuantizerAdapter
from nncf.experimental.quantization.algorithms.post_training.algorithm import ExperimentalPostTrainingQuantization
from nncf.experimental.quantization.quantizers.torch_ao_adapter import TorchAOQuantizerAdapter
from nncf.experimental.torch.fx.constant_folding import constant_fold
from nncf.experimental.torch.fx.transformations import QUANTIZE_NODE_TARGETS
from nncf.experimental.torch.fx.transformations import compress_post_quantize_transformation
from nncf.quantization.advanced_parameters import AdvancedBiasCorrectionParameters
from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters
from nncf.quantization.advanced_parameters import RangeEstimatorParameters
Expand Down Expand Up @@ -114,7 +116,10 @@ def quantize_pt2e(
quantized_model = GraphModule(quantized_model, quantized_model.graph)

if fold_quantize:
constant_fold(quantized_model, _quant_node_constraint)
if isinstance(quantizer, OpenVINOQuantizerAdapter):
compress_post_quantize_transformation(quantized_model)
else:
constant_fold(quantized_model, _quant_node_constraint)

pm = PassManager([DuplicateDQPass()])

Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

9,952 changes: 5,077 additions & 4,875 deletions tests/torch/data/reference_graphs/fx/experimental/OpenVINOQuantizer/swin_v2_s.dot

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -7,39 +7,43 @@ strict digraph {
"5 embedding_0_0_nncf_smooth_quant_0" [id=5, type=call_module];
"6 quantize_per_tensor_default" [id=6, type=quantize_per_tensor];
"7 dequantize_per_tensor_default" [id=7, type=dequantize_per_tensor];
"8 linear_scale_0" [id=8, type=get_attr];
"9 linear_zero_point_0" [id=9, type=get_attr];
"10 _frozen_param0" [id=10, type=get_attr];
"11 dequantize_per_channel_default" [id=11, type=dequantize_per_channel];
"12 linear" [id=12, type=linear];
"13 linear_0_0_nncf_smooth_quant_0" [id=13, type=call_module];
"14 quantize_per_tensor_default_1" [id=14, type=quantize_per_tensor];
"15 dequantize_per_tensor_default_1" [id=15, type=dequantize_per_tensor];
"16 linear_1_scale_0" [id=16, type=get_attr];
"17 linear_1_zero_point_0" [id=17, type=get_attr];
"18 _frozen_param1" [id=18, type=get_attr];
"19 dequantize_per_channel_default_1" [id=19, type=dequantize_per_channel];
"20 linear_1" [id=20, type=linear];
"21 output" [id=21, type=output];
"8 scale_updated_constant0" [id=8, type=get_attr];
"9 compressed_weight_updated_constant0" [id=9, type=get_attr];
"10 mul_tensor" [id=10, type=mul];
"11 zero_point_updated_constant0" [id=11, type=get_attr];
"12 sub_tensor" [id=12, type=sub];
"13 linear" [id=13, type=linear];
"14 linear_0_0_nncf_smooth_quant_0" [id=14, type=call_module];
"15 quantize_per_tensor_default_1" [id=15, type=quantize_per_tensor];
"16 dequantize_per_tensor_default_1" [id=16, type=dequantize_per_tensor];
"17 scale_updated_constant1" [id=17, type=get_attr];
"18 compressed_weight_updated_constant1" [id=18, type=get_attr];
"19 mul_tensor_1" [id=19, type=mul];
"20 zero_point_updated_constant1" [id=20, type=get_attr];
"21 sub_tensor_1" [id=21, type=sub];
"22 linear_1" [id=22, type=linear];
"23 output" [id=23, type=output];
"0 wte_weight" -> "4 embedding" [label="(10, 5)", style=solid];
"1 linear_bias" -> "12 linear" [label="(5,)", style=solid];
"2 lm_head_bias" -> "20 linear_1" [label="(10,)", style=solid];
"1 linear_bias" -> "13 linear" [label="(5,)", style=solid];
"2 lm_head_bias" -> "22 linear_1" [label="(10,)", style=solid];
"3 input_ids" -> "4 embedding" [label="(5,)", style=solid];
"4 embedding" -> "5 embedding_0_0_nncf_smooth_quant_0" [label="(5, 5)", style=solid];
"5 embedding_0_0_nncf_smooth_quant_0" -> "6 quantize_per_tensor_default" [label="(5, 5)", style=solid];
"6 quantize_per_tensor_default" -> "7 dequantize_per_tensor_default" [label="(5, 5)", style=solid];
"7 dequantize_per_tensor_default" -> "12 linear" [label="(5, 5)", style=solid];
"8 linear_scale_0" -> "11 dequantize_per_channel_default" [label="(5,)", style=solid];
"9 linear_zero_point_0" -> "11 dequantize_per_channel_default" [label="(5,)", style=solid];
"10 _frozen_param0" -> "11 dequantize_per_channel_default" [label="(5, 5)", style=solid];
"11 dequantize_per_channel_default" -> "12 linear" [label="(5, 5)", style=solid];
"12 linear" -> "13 linear_0_0_nncf_smooth_quant_0" [label="(5, 5)", style=solid];
"13 linear_0_0_nncf_smooth_quant_0" -> "14 quantize_per_tensor_default_1" [label="(5, 5)", style=solid];
"14 quantize_per_tensor_default_1" -> "15 dequantize_per_tensor_default_1" [label="(5, 5)", style=solid];
"15 dequantize_per_tensor_default_1" -> "20 linear_1" [label="(5, 5)", style=solid];
"16 linear_1_scale_0" -> "19 dequantize_per_channel_default_1" [label="(10,)", style=solid];
"17 linear_1_zero_point_0" -> "19 dequantize_per_channel_default_1" [label="(10,)", style=solid];
"18 _frozen_param1" -> "19 dequantize_per_channel_default_1" [label="(10, 5)", style=solid];
"19 dequantize_per_channel_default_1" -> "20 linear_1" [label="(10, 5)", style=solid];
"20 linear_1" -> "21 output" [label="(5, 10)", style=solid];
"7 dequantize_per_tensor_default" -> "13 linear" [label="(5, 5)", style=solid];
"8 scale_updated_constant0" -> "10 mul_tensor" [label="(5, 1)", style=solid];
"9 compressed_weight_updated_constant0" -> "10 mul_tensor" [label="(5, 5)", style=solid];
"10 mul_tensor" -> "12 sub_tensor" [label="(5, 5)", style=solid];
"11 zero_point_updated_constant0" -> "12 sub_tensor" [label="(5, 1)", style=solid];
"12 sub_tensor" -> "13 linear" [label="(5, 5)", style=solid];
"13 linear" -> "14 linear_0_0_nncf_smooth_quant_0" [label="(5, 5)", style=solid];
"14 linear_0_0_nncf_smooth_quant_0" -> "15 quantize_per_tensor_default_1" [label="(5, 5)", style=solid];
"15 quantize_per_tensor_default_1" -> "16 dequantize_per_tensor_default_1" [label="(5, 5)", style=solid];
"16 dequantize_per_tensor_default_1" -> "22 linear_1" [label="(5, 5)", style=solid];
"17 scale_updated_constant1" -> "19 mul_tensor_1" [label="(10, 1)", style=solid];
"18 compressed_weight_updated_constant1" -> "19 mul_tensor_1" [label="(10, 5)", style=solid];
"19 mul_tensor_1" -> "21 sub_tensor_1" [label="(10, 5)", style=solid];
"20 zero_point_updated_constant1" -> "21 sub_tensor_1" [label="(10, 1)", style=solid];
"21 sub_tensor_1" -> "22 linear_1" [label="(10, 5)", style=solid];
"22 linear_1" -> "23 output" [label="(5, 10)", style=solid];
}
Loading

0 comments on commit 9511c7c

Please sign in to comment.