From 7572b2d4717e5eaa2ec67032f42b1d879ec004d8 Mon Sep 17 00:00:00 2001
From: xuchen-intel <chen.xu@intel.com>
Date: Thu, 23 Jan 2025 06:08:06 +0100
Subject: [PATCH 1/3] [CPU] Add Clamp for FakeConvertDecomposition

---
 .../fake_convert_decomposition.cpp            | 15 +++++++++++--
 src/core/reference/src/op/fake_convert.cpp    |  3 ++-
 .../skip_tests_config.cpp                     |  3 +++
 .../src/single_op/fake_convert.cpp            | 21 +++++++++++++++++--
 4 files changed, 37 insertions(+), 5 deletions(-)
diff --git a/src/common/transformations/src/transformations/op_conversions/fake_convert_decomposition.cpp b/src/common/transformations/src/transformations/op_conversions/fake_convert_decomposition.cpp
index 7f0a44df6a151d..000257652fb1f1 100644
--- a/src/common/transformations/src/transformations/op_conversions/fake_convert_decomposition.cpp
+++ b/src/common/transformations/src/transformations/op_conversions/fake_convert_decomposition.cpp
@@ -7,6 +7,7 @@
 #include "itt.hpp"
 #include "openvino/core/rt_info.hpp"
 #include "openvino/op/add.hpp"
+#include "openvino/op/clamp.hpp"
 #include "openvino/op/constant.hpp"
 #include "openvino/op/convert.hpp"
 #include "openvino/op/divide.hpp"
@@ -41,11 +42,20 @@ ov::pass::FakeConvertDecomposition::FakeConvertDecomposition() {
             data = decomp_ops.add(data.get_node_shared_ptr());
         }
 
+        // Align with clamp behavior of FakeConvert in ngraph reference
+        const auto lower_bound = fake_convert_node->get_destination_element_type() == ov::element::f8e4m3
+                                     ? std::numeric_limits<ov::float8_e4m3>::lowest()
+                                     : std::numeric_limits<ov::float8_e5m2>::lowest();
+        const auto upper_bound = fake_convert_node->get_destination_element_type() == ov::element::f8e4m3
+                                     ? std::numeric_limits<ov::float8_e4m3>::max()
+                                     : std::numeric_limits<ov::float8_e5m2>::max();
+
         std::shared_ptr<Node> result;
         const auto scale = decomp_ops.make<ov::op::v1::Multiply>(data, input_scale);
         if (fake_convert_node->get_input_size() == 2) {
+            const auto clamp = std::make_shared<ov::op::v0::Clamp>(scale, lower_bound, upper_bound);
             const auto downconvert =
-                decomp_ops.make<ov::op::v0::Convert>(scale, fake_convert_node->get_destination_element_type());
+                decomp_ops.make<ov::op::v0::Convert>(clamp, fake_convert_node->get_destination_element_type());
             const auto upconvert = decomp_ops.make<ov::op::v0::Convert>(downconvert, input_type);
 
             result = decomp_ops.make<ov::op::v1::Divide>(upconvert, input_scale);
@@ -53,8 +63,9 @@ ov::pass::FakeConvertDecomposition::FakeConvertDecomposition() {
             const Output<Node> input_shift{fake_convert_node->input_value(2)};
             const auto shift = decomp_ops.make<ov::op::v1::Subtract>(scale, input_shift);
 
+            const auto clamp = std::make_shared<ov::op::v0::Clamp>(shift, lower_bound, upper_bound);
             const auto downconvert =
-                decomp_ops.make<ov::op::v0::Convert>(shift, fake_convert_node->get_destination_element_type());
+                decomp_ops.make<ov::op::v0::Convert>(clamp, fake_convert_node->get_destination_element_type());
             const auto upconvert = decomp_ops.make<ov::op::v0::Convert>(downconvert, input_type);
 
             const auto deshift = decomp_ops.make<ov::op::v1::Add>(upconvert, input_shift);
diff --git a/src/core/reference/src/op/fake_convert.cpp b/src/core/reference/src/op/fake_convert.cpp
index cf7c20beb7de5c..4222af7568b07b 100644
--- a/src/core/reference/src/op/fake_convert.cpp
+++ b/src/core/reference/src/op/fake_convert.cpp
@@ -47,7 +47,8 @@ void emulate_f8e5m2_on_fp16(const float16* const arg_f, float16* out_f, size_t c
             // 101, 110, 111 - round up > 0x0080
             val_bit_repr += (((rnmask > 0x0080) || (rnmask_tie == rne_tie)) << lshift);
         }
-        val_bit_repr &= mask_mant; /* truncation */
+        val_bit_repr &= mask_mant;                                         /* truncation */
+        val_bit_repr -= (((val_bit_repr & 0x7F00) == fp16_inf) << lshift); /* clamp */
         out_u[i] = val_bit_repr;
     }
 }
diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
index 4eb4fa819e3224..f8b8f4b59422fc 100644
--- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp
@@ -175,6 +175,9 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*ConvertCPULayerTest.*outFmts=(nhwc|nChw8c|nChw16c).*)",
         // Issue: MFDNN-12917. The oneDNN emitter of conversion from fp32 to fp8 has rounding issue.
         R"(.*ConvertCPULayerTest.*(\[1.1.1080.1920\]|\(2.17.5.4\))_.*_inputPRC=f32_targetPRC=f8e4m3_.*)",
+        // Issue: 123320
+        // Input precision bf16 is converted to fp32 by logic in core_config.cpp during ngraph reference test.
+        R"(.*FakeConvertLayerTest.*dataPrecision=bf16.*)",
         // Need to generate sequence exactly in the i64 data type. Enable in scope of i64 enabling.
         R"(.*RandomUniformLayerTestCPU.*OutPrc=i64.*)",
         // Issue: 123815 (Tests are sensintive to available thread count on testing machines)
diff --git a/src/tests/functional/shared_test_classes/src/single_op/fake_convert.cpp b/src/tests/functional/shared_test_classes/src/single_op/fake_convert.cpp
index d207a8dabfb883..d571c38d41be1f 100644
--- a/src/tests/functional/shared_test_classes/src/single_op/fake_convert.cpp
+++ b/src/tests/functional/shared_test_classes/src/single_op/fake_convert.cpp
@@ -4,6 +4,8 @@
 
 #include "shared_test_classes/single_op/fake_convert.hpp"
 
+#include <random>
+
 #include "openvino/opsets/opset1.hpp"
 #include "openvino/opsets/opset13.hpp"
 
@@ -52,9 +54,24 @@ void FakeConvertLayerTest::SetUp() {
 
     init_input_shapes(data_shapes);
 
+    std::vector<float> scale_values(ov::shape_size(scale_shape));
+    std::vector<float> shift_values(ov::shape_size(shift_shape));
+    std::mt19937 gen(0);
+    std::uniform_real_distribution<float> dis(0, static_cast<float>(ov::shape_size(scale_shape)));
+    for (auto& scale_value : scale_values)
+        scale_value = dis(gen);
+    for (auto& shift_value : shift_values)
+        shift_value = dis(gen);
+
+    if (data_prec == ov::element::f16) {
+        configuration.insert(ov::hint::inference_precision(ov::element::f16));
+    } else if (data_prec == ov::element::bf16) {
+        configuration.insert(ov::hint::inference_precision(ov::element::bf16));
+    }
+
     const auto data = std::make_shared<opset1::Parameter>(data_prec, inputDynamicShapes.front());
-    const auto scale = std::make_shared<opset1::Constant>(data_prec, scale_shape);
-    const auto shift = std::make_shared<opset1::Constant>(data_prec, shift_shape);
+    const auto scale = std::make_shared<opset1::Constant>(data_prec, scale_shape, scale_values);
+    const auto shift = std::make_shared<opset1::Constant>(data_prec, shift_shape, shift_values);
 
     const auto fake_convert = default_shift ? std::make_shared<opset13::FakeConvert>(data, scale, dst_prec)
                                             : std::make_shared<opset13::FakeConvert>(data, scale, shift, dst_prec);

From c54f4279025772abcf06774f70632def29b35b1d Mon Sep 17 00:00:00 2001
From: xuchen-intel <chen.xu@intel.com>
Date: Sun, 26 Jan 2025 07:45:09 +0100
Subject: [PATCH 2/3] Retain non-clamp behavior for Convert layer

---
 .../openvino/reference/fake_convert.hpp       | 10 ++++++----
 src/core/reference/src/op/fake_convert.cpp    | 20 +++++++------------
 src/core/src/type/float8_e5m2.cpp             |  2 +-
 src/core/tests/eval.cpp                       | 11 ++++++----
 src/core/tests/float8_e5m2.cpp                | 12 +++++------
 5 files changed, 27 insertions(+), 28 deletions(-)

diff --git a/src/core/reference/include/openvino/reference/fake_convert.hpp b/src/core/reference/include/openvino/reference/fake_convert.hpp
index 8a0e8a94c91844..3cec7e8aa7f27c 100644
--- a/src/core/reference/include/openvino/reference/fake_convert.hpp
+++ b/src/core/reference/include/openvino/reference/fake_convert.hpp
@@ -18,23 +18,25 @@ namespace func {
  *
  * @param arg_f       Pointer to the input data.
  * @param out_f       Pointer to the otuput data.
- * @param count     Number of elements in the data input.
+ * @param count       Number of elements in the data input.
+ * @param use_clamp   If use clamp.
  */
-void emulate_f8e5m2_on_fp16(const float16* const arg_f, float16* out_f, size_t count);
+void emulate_f8e5m2_on_fp16(const float16* const arg_f, float16* out_f, size_t count, bool use_clamp = true);
 
 /**
  * @brief Emulation of conversion fp16 value to f8e4m3 format
  *
  * @param arg_f       Pointer to the input data.
  * @param out_f       Pointer to the otuput data.
- * @param count     Number of elements in the data input.
+ * @param count       Number of elements in the data input.
+ * @param use_clamp   If use clamp.
  *
  * Exponent denormal values 0 -7
  * Exponent normal values 1..15 -6..8 (7 - exponent)
  * Exponent NaN values 15 8
  *
  */
-void emulate_f8e4m3_on_fp16(const float16* arg_f, float16* out_f, size_t count);
+void emulate_f8e4m3_on_fp16(const float16* arg_f, float16* out_f, size_t count, bool use_clamp = true);
 }  // namespace func
 
 namespace fake_convert_details {
diff --git a/src/core/reference/src/op/fake_convert.cpp b/src/core/reference/src/op/fake_convert.cpp
index 4222af7568b07b..98e5ef7046e86b 100644
--- a/src/core/reference/src/op/fake_convert.cpp
+++ b/src/core/reference/src/op/fake_convert.cpp
@@ -7,7 +7,7 @@
 namespace ov {
 namespace reference {
 namespace func {
-void emulate_f8e5m2_on_fp16(const float16* const arg_f, float16* out_f, size_t count) {
+void emulate_f8e5m2_on_fp16(const float16* const arg_f, float16* out_f, size_t count, bool use_clamp) {
     const auto arg_u = reinterpret_cast<const uint16_t*>(arg_f);
     auto out_u = reinterpret_cast<uint16_t*>(out_f);
     uint16_t val_bit_repr;
@@ -24,13 +24,6 @@ void emulate_f8e5m2_on_fp16(const float16* const arg_f, float16* out_f, size_t c
     for (size_t i = 0; i < count; ++i) {
         /// converts float number to half precision in round-to-nearest-even mode and returns half with converted value.
         val_bit_repr = arg_u[i];
-        /// 0x7c00 = 0111110000000000 - exponent mask
-        /// s 11111 xxx xxxx xxxx - is nan (if some x is 1) or inf (if all x is 0)
-        /// 0x7800 is 0111100000000000 and 0x400 is 0000010000000000
-        /// number is not normal if all exponent is 1 or 0
-        /// 0x7f00 is 0 11111 1100000000
-        /// 0x7b00 is 0 11110 1100000000
-        const bool can_round = ((val_bit_repr & 0x7F00) < 0x7B00) ? true : false;
         /// s 11111 xxx xxxx xxxx - is nan (if some x is 1) or inf (if all x is 0)
         const bool is_naninf = ((val_bit_repr & fp16_inf) == fp16_inf) ? true : false;
         /* nearest rounding masks */
@@ -39,7 +32,7 @@ void emulate_f8e5m2_on_fp16(const float16* const arg_f, float16* out_f, size_t c
         /// rne_tie - 0x180 is      0 00000 0110000000 or 384.0
         uint16_t rnmask_tie = (val_bit_repr & rne_tie);
 
-        if (!is_naninf && can_round) {
+        if (!is_naninf) {
             /* round to nearest even, if rne_mask is enabled */
             /* 0 00000 0010000000, find grs patterns */
             // 0xx - do nothing
@@ -47,8 +40,10 @@ void emulate_f8e5m2_on_fp16(const float16* const arg_f, float16* out_f, size_t c
             // 101, 110, 111 - round up > 0x0080
             val_bit_repr += (((rnmask > 0x0080) || (rnmask_tie == rne_tie)) << lshift);
         }
-        val_bit_repr &= mask_mant;                                         /* truncation */
-        val_bit_repr -= (((val_bit_repr & 0x7F00) == fp16_inf) << lshift); /* clamp */
+        val_bit_repr &= mask_mant; /* truncation */
+        if (use_clamp) {
+            val_bit_repr -= (((val_bit_repr & 0x7F00) == fp16_inf) << lshift); /* clamp */
+        }
         out_u[i] = val_bit_repr;
     }
 }
@@ -65,12 +60,11 @@ void emulate_f8e5m2_on_fp16(const float16* const arg_f, float16* out_f, size_t c
  * Exponent NaN values 15 8
  *
  */
-void emulate_f8e4m3_on_fp16(const float16* arg_f, float16* out_f, size_t count) {
+void emulate_f8e4m3_on_fp16(const float16* arg_f, float16* out_f, size_t count, bool use_clamp) {
     const auto arg_u = reinterpret_cast<const uint16_t*>(arg_f);
     auto out_u = reinterpret_cast<uint16_t*>(out_f);
     uint16_t val_bit_repr;
 
-    constexpr auto use_clamp = true;
     constexpr auto exp_bits = 5;
     constexpr auto mbits = 9;
     constexpr auto non_mant_bits = exp_bits + 1;  /// exponent + sign
diff --git a/src/core/src/type/float8_e5m2.cpp b/src/core/src/type/float8_e5m2.cpp
index 177c79471d6c12..59f64f8c55d0d7 100644
--- a/src/core/src/type/float8_e5m2.cpp
+++ b/src/core/src/type/float8_e5m2.cpp
@@ -28,7 +28,7 @@ constexpr uint8_t f8e5m2_m_mask = 0x03;  // f8e5m2 mantissa bit mask
 
 uint8_t f32_to_f8e5m2_bits(const float value) {
     auto f16 = static_cast<float16>(value);
-    reference::func::emulate_f8e5m2_on_fp16(&f16, &f16, 1);
+    reference::func::emulate_f8e5m2_on_fp16(&f16, &f16, 1, false);
     return static_cast<uint8_t>((f16.to_bits() >> byte_shift));
 }
 }  // namespace
diff --git a/src/core/tests/eval.cpp b/src/core/tests/eval.cpp
index c521e9d1aa3bee..3e05409de47208 100644
--- a/src/core/tests/eval.cpp
+++ b/src/core/tests/eval.cpp
@@ -3569,10 +3569,13 @@ TEST(eval, evaluate_fake_convert_f32_to_f8e5m2_big_scale_1) {
     EXPECT_EQ(result.get_element_type(), et);
     EXPECT_EQ(result.get_shape(), data_shape);
 
-    constexpr auto inf = std::numeric_limits<float>::infinity();
-    EXPECT_THAT(
-        read_vector<float>(result),
-        Pointwise(FloatEq(), std::vector<float>{fp8::MAX_F8E5M2 / 2.f, fp8::MAX_F8E5M2, fp8::MAX_F8E5M2, inf, inf}));
+    EXPECT_THAT(read_vector<float>(result),
+                Pointwise(FloatEq(),
+                          std::vector<float>{fp8::MAX_F8E5M2 / 2.f,
+                                             fp8::MAX_F8E5M2,
+                                             fp8::MAX_F8E5M2,
+                                             fp8::MAX_F8E5M2,
+                                             fp8::MAX_F8E5M2}));
 }
 
 TEST(eval, evaluate_fake_convert_f32_matching_f8_to_f8e5m2_scale_1) {
diff --git a/src/core/tests/float8_e5m2.cpp b/src/core/tests/float8_e5m2.cpp
index 41c10d75736de4..6631db425af4c1 100644
--- a/src/core/tests/float8_e5m2.cpp
+++ b/src/core/tests/float8_e5m2.cpp
@@ -191,28 +191,28 @@ TEST(F8E5M2Test, f8e5m2_num_limits_exp) {
     EXPECT_EQ(max_exp10, 4);
 }
 
-TEST(F8E5M2Test, f32_ge_f8_max_round_to_inf) {
+TEST(F8E5M2Test, f32_as_f16_inf_gt_f8_max_round_to_inf) {
     const auto f8 = ov::float8_e5m2(65520.0f);
 
     EXPECT_EQ(f8.to_bits(), 0b01111100);
 }
 
-TEST(F8E5M2Test, f32_ge_f8_max_round_to_max) {
+TEST(F8E5M2Test, f32_gt_f16_max_gt_f8_max_round_to_inf) {
     const auto f8 = ov::float8_e5m2(65519.9f);
 
-    EXPECT_EQ(f8.to_bits(), 0b01111011);
+    EXPECT_EQ(f8.to_bits(), 0b01111100);
 }
 
-TEST(F8E5M2Test, f32_ge_f8_max_round_to_minus_inf) {
+TEST(F8E5M2Test, f32_as_f16_minus_inf_lt_f8_lowest_round_to_minus_inf) {
     const auto f8 = ov::float8_e5m2(-65520.0f);
 
     EXPECT_EQ(f8.to_bits(), 0b11111100);
 }
 
-TEST(F8E5M2Test, f32_ge_f8_max_round_to_lowest) {
+TEST(F8E5M2Test, f32_lt_f16_lowest_lt_f8_lowest_round_to_minus_inf) {
     const auto f8 = ov::float8_e5m2(-65519.9f);
 
-    EXPECT_EQ(f8.to_bits(), 0b11111011);
+    EXPECT_EQ(f8.to_bits(), 0b11111100);
 }
 
 template <class TContainer>

From 5589947e99be96a342d3d34a29abea4ba8fdc2ea Mon Sep 17 00:00:00 2001
From: xuchen-intel <chen.xu@intel.com>
Date: Mon, 27 Jan 2025 03:13:04 +0100
Subject: [PATCH 3/3] Revise FakeConvertDecomposition transformation test
 accordingly

---
 .../op_conversions/fake_convert_decomposition.cpp   | 12 ++++++------
 .../fake_convert_decomposition_test.cpp             | 13 +++++++++++--
 2 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/src/common/transformations/src/transformations/op_conversions/fake_convert_decomposition.cpp b/src/common/transformations/src/transformations/op_conversions/fake_convert_decomposition.cpp
index 000257652fb1f1..2df0f8867f1414 100644
--- a/src/common/transformations/src/transformations/op_conversions/fake_convert_decomposition.cpp
+++ b/src/common/transformations/src/transformations/op_conversions/fake_convert_decomposition.cpp
@@ -44,16 +44,16 @@ ov::pass::FakeConvertDecomposition::FakeConvertDecomposition() {
 
         // Align with clamp behavior of FakeConvert in ngraph reference
         const auto lower_bound = fake_convert_node->get_destination_element_type() == ov::element::f8e4m3
-                                     ? std::numeric_limits<ov::float8_e4m3>::lowest()
-                                     : std::numeric_limits<ov::float8_e5m2>::lowest();
+                                     ? static_cast<float>(std::numeric_limits<ov::float8_e4m3>::lowest())
+                                     : static_cast<float>(std::numeric_limits<ov::float8_e5m2>::lowest());
         const auto upper_bound = fake_convert_node->get_destination_element_type() == ov::element::f8e4m3
-                                     ? std::numeric_limits<ov::float8_e4m3>::max()
-                                     : std::numeric_limits<ov::float8_e5m2>::max();
+                                     ? static_cast<float>(std::numeric_limits<ov::float8_e4m3>::max())
+                                     : static_cast<float>(std::numeric_limits<ov::float8_e5m2>::max());
 
         std::shared_ptr<Node> result;
         const auto scale = decomp_ops.make<ov::op::v1::Multiply>(data, input_scale);
         if (fake_convert_node->get_input_size() == 2) {
-            const auto clamp = std::make_shared<ov::op::v0::Clamp>(scale, lower_bound, upper_bound);
+            const auto clamp = decomp_ops.make<ov::op::v0::Clamp>(scale, lower_bound, upper_bound);
             const auto downconvert =
                 decomp_ops.make<ov::op::v0::Convert>(clamp, fake_convert_node->get_destination_element_type());
             const auto upconvert = decomp_ops.make<ov::op::v0::Convert>(downconvert, input_type);
@@ -63,7 +63,7 @@ ov::pass::FakeConvertDecomposition::FakeConvertDecomposition() {
             const Output<Node> input_shift{fake_convert_node->input_value(2)};
             const auto shift = decomp_ops.make<ov::op::v1::Subtract>(scale, input_shift);
 
-            const auto clamp = std::make_shared<ov::op::v0::Clamp>(shift, lower_bound, upper_bound);
+            const auto clamp = decomp_ops.make<ov::op::v0::Clamp>(shift, lower_bound, upper_bound);
             const auto downconvert =
                 decomp_ops.make<ov::op::v0::Convert>(clamp, fake_convert_node->get_destination_element_type());
             const auto upconvert = decomp_ops.make<ov::op::v0::Convert>(downconvert, input_type);
diff --git a/src/common/transformations/tests/op_conversions/fake_convert_decomposition_test.cpp b/src/common/transformations/tests/op_conversions/fake_convert_decomposition_test.cpp
index 33b167ace11e24..63b1e5d7eac36b 100644
--- a/src/common/transformations/tests/op_conversions/fake_convert_decomposition_test.cpp
+++ b/src/common/transformations/tests/op_conversions/fake_convert_decomposition_test.cpp
@@ -80,17 +80,26 @@ TEST_P(FakeConvertDecompositionTest, CompareFunctions) {
         params.push_back(input_data);
         std::shared_ptr<Node> data = input_data;
 
+        const auto lower_bound = dst_prec == ov::element::f8e4m3
+                                 ? static_cast<float>(std::numeric_limits<ov::float8_e4m3>::lowest())
+                                 : static_cast<float>(std::numeric_limits<ov::float8_e5m2>::lowest());
+        const auto upper_bound = dst_prec == ov::element::f8e4m3
+                                 ? static_cast<float>(std::numeric_limits<ov::float8_e4m3>::max())
+                                 : static_cast<float>(std::numeric_limits<ov::float8_e5m2>::max());
+
         std::shared_ptr<Node> result;
         const auto scale = std::make_shared<ov::op::v1::Multiply>(data, input_scale);
         if (default_shift) {
-            const auto downconvert = std::make_shared<ov::op::v0::Convert>(scale, dst_prec);
+            const auto clamp = std::make_shared<ov::op::v0::Clamp>(scale, lower_bound, upper_bound);
+            const auto downconvert = std::make_shared<ov::op::v0::Convert>(clamp, dst_prec);
             const auto upconvert = std::make_shared<ov::op::v0::Convert>(downconvert, data_prec);
 
             result = std::make_shared<ov::op::v1::Divide>(upconvert, input_scale);
         } else {
             const auto shift = std::make_shared<ov::op::v1::Subtract>(scale, input_shift);
 
-            const auto downconvert = std::make_shared<ov::op::v0::Convert>(shift, dst_prec);
+            const auto clamp = std::make_shared<ov::op::v0::Clamp>(shift, lower_bound, upper_bound);
+            const auto downconvert = std::make_shared<ov::op::v0::Convert>(clamp, dst_prec);
             const auto upconvert = std::make_shared<ov::op::v0::Convert>(downconvert, data_prec);
 
             const auto deshift = std::make_shared<ov::op::v1::Add>(upconvert, input_shift);