Embedding qconfig list is extended for CPU devices / tests fixes

openvinotoolkit · Jan 20, 2025 · c8b1761 · c8b1761
1 parent a9a0c19
commit c8b1761
Show file tree

Hide file tree

Showing 4 changed files with 50 additions and 15 deletions.
diff --git a/nncf/common/hardware/configs/cpu.json b/nncf/common/hardware/configs/cpu.json
@@ -293,7 +293,7 @@
         {
             "type": "Embedding",
             "quantization": {
-                "weights": ["q8_w_sym", "q8_w_asym"]
+                "weights": ["q8_w_sym", "q8_w_asym", "q8_a", "q8_a_sym", "q8_a_ch"]
             }
         },
         {"type": "EmbeddingBag"}

diff --git a/tests/cross_fw/test_templates/test_quantizer_config.py b/tests/cross_fw/test_templates/test_quantizer_config.py
@@ -158,10 +158,10 @@ def test_default_quantizer_config(self, single_conv_nncf_graph):
         )
 
         weight_default_config = QuantizerConfig(
-            mode=QuantizationMode.SYMMETRIC, num_bits=8, signedness_to_force=True, per_channel=True
+            mode=QuantizationMode.SYMMETRIC, num_bits=8, signedness_to_force=True, per_channel=True, narrow_range=True
         )
         activation_default_config = QuantizerConfig(
-            mode=QuantizationMode.SYMMETRIC, num_bits=8, signedness_to_force=None, per_channel=False
+            mode=QuantizationMode.SYMMETRIC, num_bits=8, signedness_to_force=None, per_channel=False, narrow_range=False
         )
 
         assert len(q_setup.quantization_points) == 2
@@ -244,10 +244,10 @@ def test_depthwise_conv_default_quantizer_config(self, depthwise_conv_nncf_graph
         )
 
         weight_default_config = QuantizerConfig(
-            mode=QuantizationMode.SYMMETRIC, num_bits=8, signedness_to_force=True, per_channel=True
+            mode=QuantizationMode.SYMMETRIC, num_bits=8, signedness_to_force=True, per_channel=True, narrow_range=True
         )
         activation_default_config = QuantizerConfig(
-            mode=QuantizationMode.SYMMETRIC, num_bits=8, signedness_to_force=None, per_channel=True
+            mode=QuantizationMode.SYMMETRIC, num_bits=8, signedness_to_force=None, per_channel=True, narrow_range=False
         )
 
         assert len(q_setup.quantization_points) == 2
@@ -263,37 +263,73 @@ def test_depthwise_conv_default_quantizer_config(self, depthwise_conv_nncf_graph
             4: {
                 "qip": {"target_node_name": "/K_0", "input_port_id": None},
                 "qip_class": "ActivationQuantizationInsertionPoint",
-                "qconfig": {"num_bits": 8, "mode": "symmetric", "signedness_to_force": None, "per_channel": False},
+                "qconfig": {
+                    "num_bits": 8,
+                    "mode": "symmetric",
+                    "signedness_to_force": None,
+                    "per_channel": False,
+                    "narrow_range": False,
+                },
                 "directly_quantized_operator_node_names": ["/K_Q_0"],
             },
             5: {
                 "qip": {"target_node_name": "/Q_0", "input_port_id": None},
                 "qip_class": "ActivationQuantizationInsertionPoint",
-                "qconfig": {"num_bits": 8, "mode": "symmetric", "signedness_to_force": None, "per_channel": False},
+                "qconfig": {
+                    "num_bits": 8,
+                    "mode": "symmetric",
+                    "signedness_to_force": None,
+                    "per_channel": False,
+                    "narrow_range": False,
+                },
                 "directly_quantized_operator_node_names": ["/K_Q_0"],
             },
             6: {
                 "qip": {"target_node_name": "/Input_1_0", "input_port_id": None},
                 "qip_class": "ActivationQuantizationInsertionPoint",
-                "qconfig": {"num_bits": 8, "mode": "asymmetric", "signedness_to_force": None, "per_channel": False},
+                "qconfig": {
+                    "num_bits": 8,
+                    "mode": "asymmetric",
+                    "signedness_to_force": None,
+                    "per_channel": False,
+                    "narrow_range": False,
+                },
                 "directly_quantized_operator_node_names": ["/K_0", "/Q_0", "/V_0"],
             },
             8: {
                 "qip": {"target_node_name": "/K_0"},
                 "qip_class": "WeightQuantizationInsertionPoint",
-                "qconfig": {"num_bits": 8, "mode": "symmetric", "signedness_to_force": True, "per_channel": True},
+                "qconfig": {
+                    "num_bits": 8,
+                    "mode": "symmetric",
+                    "signedness_to_force": True,
+                    "per_channel": True,
+                    "narrow_range": True,
+                },
                 "directly_quantized_operator_node_names": ["/K_0"],
             },
             9: {
                 "qip": {"target_node_name": "/Q_0"},
                 "qip_class": "WeightQuantizationInsertionPoint",
-                "qconfig": {"num_bits": 8, "mode": "symmetric", "signedness_to_force": True, "per_channel": True},
+                "qconfig": {
+                    "num_bits": 8,
+                    "mode": "symmetric",
+                    "signedness_to_force": True,
+                    "per_channel": True,
+                    "narrow_range": True,
+                },
                 "directly_quantized_operator_node_names": ["/Q_0"],
             },
             10: {
                 "qip": {"target_node_name": "/V_0"},
                 "qip_class": "WeightQuantizationInsertionPoint",
-                "qconfig": {"num_bits": 8, "mode": "symmetric", "signedness_to_force": True, "per_channel": True},
+                "qconfig": {
+                    "num_bits": 8,
+                    "mode": "symmetric",
+                    "signedness_to_force": True,
+                    "per_channel": True,
+                    "narrow_range": True,
+                },
                 "directly_quantized_operator_node_names": ["/V_0"],
             },
         },

diff --git a/tests/onnx/data/reference_scales/embedding_model_performance.json b/tests/onnx/data/reference_scales/embedding_model_performance.json
@@ -1,7 +1,7 @@
 {
     "QuantizeLinear_Identity_Y_1": {
-        "scale": 0.007852046750485897,
-        "zero_point": 0
+        "scale": 0.007821254432201385,
+        "zero_point": -1
     },
     "QuantizeLinear_W_1": {
         "scale": [

diff --git a/tests/onnx/quantization/test_qdq_params_calculation.py b/tests/onnx/quantization/test_qdq_params_calculation.py
@@ -109,8 +109,7 @@ def test_scales(model, preset):
     ref_stats_path = REFERENCE_SCALES_DIR / ref_stats_name
 
     # Unkomment lines below to generate reference for new models.
-    # from tests.shared.helpers import dump_to_json
-
+    # from tests.cross_fw.shared.json import dump_to_json
     # dump_to_json(ref_stats_path, q_nodes_params)
 
     ref_nodes_params = load_json(ref_stats_path)