[CPU] Add Clamp for FakeConvertDecomposition

openvinotoolkit · Jan 24, 2025 · a6b2559 · a6b2559
1 parent 9232859
commit a6b2559
Show file tree

Hide file tree

Showing 4 changed files with 37 additions and 5 deletions.
diff --git a/src/common/transformations/src/transformations/op_conversions/fake_convert_decomposition.cpp b/src/common/transformations/src/transformations/op_conversions/fake_convert_decomposition.cpp
@@ -7,6 +7,7 @@
 #include "itt.hpp"
 #include "openvino/core/rt_info.hpp"
 #include "openvino/op/add.hpp"
+#include "openvino/op/clamp.hpp"
 #include "openvino/op/constant.hpp"
 #include "openvino/op/convert.hpp"
 #include "openvino/op/divide.hpp"
@@ -41,20 +42,30 @@ ov::pass::FakeConvertDecomposition::FakeConvertDecomposition() {
             data = decomp_ops.add(data.get_node_shared_ptr());
         }
 
+        // Align with clamp behavior of FakeConvert in ngraph reference
+        const auto lower_bound = fake_convert_node->get_destination_element_type() == ov::element::f8e4m3
+                                     ? std::numeric_limits<ov::float8_e4m3>::lowest()
+                                     : std::numeric_limits<ov::float8_e5m2>::lowest();
+        const auto upper_bound = fake_convert_node->get_destination_element_type() == ov::element::f8e4m3
+                                     ? std::numeric_limits<ov::float8_e4m3>::max()
+                                     : std::numeric_limits<ov::float8_e5m2>::max();
+
         std::shared_ptr<Node> result;
         const auto scale = decomp_ops.make<ov::op::v1::Multiply>(data, input_scale);
         if (fake_convert_node->get_input_size() == 2) {
+            const auto clamp = std::make_shared<ov::op::v0::Clamp>(scale, lower_bound, upper_bound);
             const auto downconvert =
-                decomp_ops.make<ov::op::v0::Convert>(scale, fake_convert_node->get_destination_element_type());
+                decomp_ops.make<ov::op::v0::Convert>(clamp, fake_convert_node->get_destination_element_type());
             const auto upconvert = decomp_ops.make<ov::op::v0::Convert>(downconvert, input_type);
 
             result = decomp_ops.make<ov::op::v1::Divide>(upconvert, input_scale);
         } else {
             const Output<Node> input_shift{fake_convert_node->input_value(2)};
             const auto shift = decomp_ops.make<ov::op::v1::Subtract>(scale, input_shift);
 
+            const auto clamp = std::make_shared<ov::op::v0::Clamp>(shift, lower_bound, upper_bound);
             const auto downconvert =
-                decomp_ops.make<ov::op::v0::Convert>(shift, fake_convert_node->get_destination_element_type());
+                decomp_ops.make<ov::op::v0::Convert>(clamp, fake_convert_node->get_destination_element_type());
             const auto upconvert = decomp_ops.make<ov::op::v0::Convert>(downconvert, input_type);
 
             const auto deshift = decomp_ops.make<ov::op::v1::Add>(upconvert, input_shift);

diff --git a/src/core/reference/src/op/fake_convert.cpp b/src/core/reference/src/op/fake_convert.cpp
@@ -47,7 +47,8 @@ void emulate_f8e5m2_on_fp16(const float16* const arg_f, float16* out_f, size_t c
             // 101, 110, 111 - round up > 0x0080
             val_bit_repr += (((rnmask > 0x0080) || (rnmask_tie == rne_tie)) << lshift);
         }
-        val_bit_repr &= mask_mant; /* truncation */
+        val_bit_repr &= mask_mant;                                         /* truncation */
+        val_bit_repr -= (((val_bit_repr & 0x7F00) == fp16_inf) << lshift); /* clamp */
         out_u[i] = val_bit_repr;
     }
 }

diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
@@ -175,6 +175,9 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*ConvertCPULayerTest.*outFmts=(nhwc|nChw8c|nChw16c).*)",
         // Issue: MFDNN-12917. The oneDNN emitter of conversion from fp32 to fp8 has rounding issue.
         R"(.*ConvertCPULayerTest.*(\[1.1.1080.1920\]|\(2.17.5.4\))_.*_inputPRC=f32_targetPRC=f8e4m3_.*)",
+        // Issue: 123320
+        // Input precision bf16 is converted to fp32 by logic in core_config.cpp during ngraph reference test.
+        R"(.*FakeConvertLayerTest.*dataPrecision=bf16.*)",
         // Need to generate sequence exactly in the i64 data type. Enable in scope of i64 enabling.
         R"(.*RandomUniformLayerTestCPU.*OutPrc=i64.*)",
         // Issue: 123815 (Tests are sensintive to available thread count on testing machines)

diff --git a/src/tests/functional/shared_test_classes/src/single_op/fake_convert.cpp b/src/tests/functional/shared_test_classes/src/single_op/fake_convert.cpp
@@ -4,6 +4,8 @@
 
 #include "shared_test_classes/single_op/fake_convert.hpp"
 
+#include <random>
+
 #include "openvino/opsets/opset1.hpp"
 #include "openvino/opsets/opset13.hpp"
 
@@ -52,9 +54,24 @@ void FakeConvertLayerTest::SetUp() {
 
     init_input_shapes(data_shapes);
 
+    std::vector<float> scale_values(ov::shape_size(scale_shape));
+    std::vector<float> shift_values(ov::shape_size(shift_shape));
+    std::mt19937 gen(0);
+    std::uniform_real_distribution<float> dis(0, static_cast<float>(ov::shape_size(scale_shape)));
+    for (auto& scale_value : scale_values)
+        scale_value = dis(gen);
+    for (auto& shift_value : shift_values)
+        shift_value = dis(gen);
+
+    if (data_prec == ov::element::f16) {
+        configuration.insert(ov::hint::inference_precision(ov::element::f16));
+    } else if (data_prec == ov::element::bf16) {
+        configuration.insert(ov::hint::inference_precision(ov::element::bf16));
+    }
+
     const auto data = std::make_shared<opset1::Parameter>(data_prec, inputDynamicShapes.front());
-    const auto scale = std::make_shared<opset1::Constant>(data_prec, scale_shape);
-    const auto shift = std::make_shared<opset1::Constant>(data_prec, shift_shape);
+    const auto scale = std::make_shared<opset1::Constant>(data_prec, scale_shape, scale_values);
+    const auto shift = std::make_shared<opset1::Constant>(data_prec, shift_shape, shift_values);
 
     const auto fake_convert = default_shift ? std::make_shared<opset13::FakeConvert>(data, scale, dst_prec)
                                             : std::make_shared<opset13::FakeConvert>(data, scale, shift, dst_prec);