More references

openvinotoolkit · Jan 31, 2025 · 987c51d · 987c51d
1 parent 470e05f
commit 987c51d
Show file tree

Hide file tree

Showing 20 changed files with 1,706 additions and 1,709 deletions.
diff --git a/nncf/common/hardware/configs/cpu.json b/nncf/common/hardware/configs/cpu.json
@@ -39,6 +39,14 @@
                 "granularity": ["perchannel", "pertensor"],
                 "narrow_range": true
             },
+            "q8_w_sym_any_nr": {
+                "bits": 8,
+                "mode": "symmetric",
+                "level_low": -128,
+                "level_high": 127,
+                "granularity": ["perchannel", "pertensor"],
+                "narrow_range": [true, false]
+            },
             "q8_w_asym": {
                 "bits": 8,
                 "mode": "asymmetric",
@@ -293,7 +301,7 @@
         {
             "type": "Embedding",
             "quantization": {
-                "weights": ["q8_w_sym", "q8_w_asym", "q8_a", "q8_a_sym", "q8_a_ch"]
+                "weights": ["q8_w_sym_any_nr", "q8_w_asym"]
             }
         },
         {"type": "EmbeddingBag"}

diff --git a/nncf/common/hardware/configs/npu.json b/nncf/common/hardware/configs/npu.json
@@ -39,6 +39,14 @@
                 "granularity": ["perchannel", "pertensor"],
                 "narrow_range": true
             },
+            "q8_w_sym_any_nr": {
+                "bits": 8,
+                "mode": "symmetric",
+                "level_low": -128,
+                "level_high": 127,
+                "granularity": ["perchannel", "pertensor"],
+                "narrow_range": [true, false]
+            },
             "q8_w_asym": {
                 "bits": 8,
                 "mode": "asymmetric",
@@ -392,7 +400,7 @@
             "type": "Embedding",
             "quantization": {
                 "weights": [
-                    "q8_w_sym", "q8_w_asym", "q8_a", "q8_a_sym", "q8_a_ch"
+                    "q8_w_sym_any_nr", "q8_w_asym"
                 ]
             }
         },

diff --git a/nncf/common/quantization/structs.py b/nncf/common/quantization/structs.py
@@ -58,6 +58,8 @@ def __init__(
             None if the signed/unsigned attribute should be determined based on the incoming activation
             statistics during range initialization.
         :param per_channel: True for per-channel quantization, False for per-tensor.
+        :param narrow_range: True if the range of quantized values should be narrowed as compared to the
+            naive case, False if all 2^`num_bits` quantizations should be used.
         """
         self.num_bits = num_bits
         self.mode = mode

diff --git a/tests/cross_fw/test_templates/fq_params/fq_params.json b/tests/cross_fw/test_templates/fq_params/fq_params.json
@@ -1,30 +1,9 @@
 {
-    "weights_symmetric_sign_True_per_ch_False_narrow_range_True_hf_range_True": {
-        "levels": 255,
-        "input_low": -1.994419813156128,
-        "input_high": 1.994419813156128,
-        "output_low": -1.994419813156128,
-        "output_high": 1.994419813156128
-    },
-    "weights_symmetric_sign_True_per_ch_False_narrow_range_True_hf_range_False": {
-        "levels": 255,
-        "input_low": -0.997209906578064,
-        "input_high": 0.997209906578064,
-        "output_low": -0.997209906578064,
-        "output_high": 0.997209906578064
-    },
     "weights_symmetric_sign_True_per_ch_False_narrow_range_False_hf_range_True": {
         "levels": 256,
         "input_low": -2.0340540409088135,
-        "input_high": 2.0022718596646167,
-        "output_low": -2.0340540409088135,
-        "output_high": 2.0022718596646167
-    },
-    "weights_symmetric_sign_None_per_ch_False_narrow_range_False_hf_range_True": {
-        "levels": 256,
-        "input_low": -2.002271890640259,
         "input_high": 2.002271890640259,
-        "output_low": -2.002271890640259,
+        "output_low": -2.0340540409088135,
         "output_high": 2.002271890640259
     },
     "weights_symmetric_sign_True_per_ch_False_narrow_range_False_hf_range_False": {
@@ -263,14 +242,14 @@
             ]
         ]
     },
-    "weights_symmetric_sign_None_per_ch_False_narrow_range_True_hf_range_True": {
+    "weights_symmetric_sign_True_per_ch_False_narrow_range_True_hf_range_True": {
         "levels": 255,
         "input_low": -1.994419813156128,
         "input_high": 1.994419813156128,
         "output_low": -1.994419813156128,
         "output_high": 1.994419813156128
     },
-    "weights_symmetric_sign_None_per_ch_False_narrow_range_True_hf_range_False": {
+    "weights_symmetric_sign_True_per_ch_False_narrow_range_True_hf_range_False": {
         "levels": 255,
         "input_low": -0.997209906578064,
         "input_high": 0.997209906578064,

diff --git a/tests/onnx/data/reference_scales/embedding_model_performance.json b/tests/onnx/data/reference_scales/embedding_model_performance.json
@@ -1,7 +1,7 @@
 {
     "QuantizeLinear_Identity_Y_1": {
-        "scale": 0.007821254432201385,
-        "zero_point": -1
+        "scale": 0.007852046750485897,
+        "zero_point": 0
     },
     "QuantizeLinear_W_1": {
         "scale": [

diff --git a/tests/openvino/native/data/2024.1/reference_scales/UnifiedEmbeddingModel_performance.json b/tests/openvino/native/data/2024.1/reference_scales/UnifiedEmbeddingModel_performance.json
@@ -22,9 +22,9 @@
         ]
     },
     "MatMul_1/fq_output_0": {
-        "input_low": 0.0,
+        "input_low": -0.9424352049827576,
         "input_high": 0.9350724220275879,
-        "output_low": 0.0,
+        "output_low": -0.9424352049827576,
         "output_high": 0.9350724220275879
     },
     "MatMul_1/fq_weights_1": {
@@ -152,9 +152,9 @@
         "output_high": 0.6369616985321045
     },
     "Gather_1/fq_weights_0": {
-        "input_low": 0.0,
+        "input_low": -0.9424352049827576,
         "input_high": 0.9350724220275879,
-        "output_low": 0.0,
+        "output_low": -0.9424352049827576,
         "output_high": 0.9350724220275879
     }
 }