From 7572b2d4717e5eaa2ec67032f42b1d879ec004d8 Mon Sep 17 00:00:00 2001 From: xuchen-intel Date: Thu, 23 Jan 2025 06:08:06 +0100 Subject: [PATCH 1/3] [CPU] Add Clamp for FakeConvertDecomposition --- .../fake_convert_decomposition.cpp | 15 +++++++++++-- src/core/reference/src/op/fake_convert.cpp | 3 ++- .../skip_tests_config.cpp | 3 +++ .../src/single_op/fake_convert.cpp | 21 +++++++++++++++++-- 4 files changed, 37 insertions(+), 5 deletions(-) diff --git a/src/common/transformations/src/transformations/op_conversions/fake_convert_decomposition.cpp b/src/common/transformations/src/transformations/op_conversions/fake_convert_decomposition.cpp index 7f0a44df6a151d..000257652fb1f1 100644 --- a/src/common/transformations/src/transformations/op_conversions/fake_convert_decomposition.cpp +++ b/src/common/transformations/src/transformations/op_conversions/fake_convert_decomposition.cpp @@ -7,6 +7,7 @@ #include "itt.hpp" #include "openvino/core/rt_info.hpp" #include "openvino/op/add.hpp" +#include "openvino/op/clamp.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/convert.hpp" #include "openvino/op/divide.hpp" @@ -41,11 +42,20 @@ ov::pass::FakeConvertDecomposition::FakeConvertDecomposition() { data = decomp_ops.add(data.get_node_shared_ptr()); } + // Align with clamp behavior of FakeConvert in ngraph reference + const auto lower_bound = fake_convert_node->get_destination_element_type() == ov::element::f8e4m3 + ? std::numeric_limits::lowest() + : std::numeric_limits::lowest(); + const auto upper_bound = fake_convert_node->get_destination_element_type() == ov::element::f8e4m3 + ? std::numeric_limits::max() + : std::numeric_limits::max(); + std::shared_ptr result; const auto scale = decomp_ops.make(data, input_scale); if (fake_convert_node->get_input_size() == 2) { + const auto clamp = std::make_shared(scale, lower_bound, upper_bound); const auto downconvert = - decomp_ops.make(scale, fake_convert_node->get_destination_element_type()); + decomp_ops.make(clamp, fake_convert_node->get_destination_element_type()); const auto upconvert = decomp_ops.make(downconvert, input_type); result = decomp_ops.make(upconvert, input_scale); @@ -53,8 +63,9 @@ ov::pass::FakeConvertDecomposition::FakeConvertDecomposition() { const Output input_shift{fake_convert_node->input_value(2)}; const auto shift = decomp_ops.make(scale, input_shift); + const auto clamp = std::make_shared(shift, lower_bound, upper_bound); const auto downconvert = - decomp_ops.make(shift, fake_convert_node->get_destination_element_type()); + decomp_ops.make(clamp, fake_convert_node->get_destination_element_type()); const auto upconvert = decomp_ops.make(downconvert, input_type); const auto deshift = decomp_ops.make(upconvert, input_shift); diff --git a/src/core/reference/src/op/fake_convert.cpp b/src/core/reference/src/op/fake_convert.cpp index cf7c20beb7de5c..4222af7568b07b 100644 --- a/src/core/reference/src/op/fake_convert.cpp +++ b/src/core/reference/src/op/fake_convert.cpp @@ -47,7 +47,8 @@ void emulate_f8e5m2_on_fp16(const float16* const arg_f, float16* out_f, size_t c // 101, 110, 111 - round up > 0x0080 val_bit_repr += (((rnmask > 0x0080) || (rnmask_tie == rne_tie)) << lshift); } - val_bit_repr &= mask_mant; /* truncation */ + val_bit_repr &= mask_mant; /* truncation */ + val_bit_repr -= (((val_bit_repr & 0x7F00) == fp16_inf) << lshift); /* clamp */ out_u[i] = val_bit_repr; } } diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 4eb4fa819e3224..f8b8f4b59422fc 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -175,6 +175,9 @@ std::vector disabledTestPatterns() { R"(.*ConvertCPULayerTest.*outFmts=(nhwc|nChw8c|nChw16c).*)", // Issue: MFDNN-12917. The oneDNN emitter of conversion from fp32 to fp8 has rounding issue. R"(.*ConvertCPULayerTest.*(\[1.1.1080.1920\]|\(2.17.5.4\))_.*_inputPRC=f32_targetPRC=f8e4m3_.*)", + // Issue: 123320 + // Input precision bf16 is converted to fp32 by logic in core_config.cpp during ngraph reference test. + R"(.*FakeConvertLayerTest.*dataPrecision=bf16.*)", // Need to generate sequence exactly in the i64 data type. Enable in scope of i64 enabling. R"(.*RandomUniformLayerTestCPU.*OutPrc=i64.*)", // Issue: 123815 (Tests are sensintive to available thread count on testing machines) diff --git a/src/tests/functional/shared_test_classes/src/single_op/fake_convert.cpp b/src/tests/functional/shared_test_classes/src/single_op/fake_convert.cpp index d207a8dabfb883..d571c38d41be1f 100644 --- a/src/tests/functional/shared_test_classes/src/single_op/fake_convert.cpp +++ b/src/tests/functional/shared_test_classes/src/single_op/fake_convert.cpp @@ -4,6 +4,8 @@ #include "shared_test_classes/single_op/fake_convert.hpp" +#include + #include "openvino/opsets/opset1.hpp" #include "openvino/opsets/opset13.hpp" @@ -52,9 +54,24 @@ void FakeConvertLayerTest::SetUp() { init_input_shapes(data_shapes); + std::vector scale_values(ov::shape_size(scale_shape)); + std::vector shift_values(ov::shape_size(shift_shape)); + std::mt19937 gen(0); + std::uniform_real_distribution dis(0, static_cast(ov::shape_size(scale_shape))); + for (auto& scale_value : scale_values) + scale_value = dis(gen); + for (auto& shift_value : shift_values) + shift_value = dis(gen); + + if (data_prec == ov::element::f16) { + configuration.insert(ov::hint::inference_precision(ov::element::f16)); + } else if (data_prec == ov::element::bf16) { + configuration.insert(ov::hint::inference_precision(ov::element::bf16)); + } + const auto data = std::make_shared(data_prec, inputDynamicShapes.front()); - const auto scale = std::make_shared(data_prec, scale_shape); - const auto shift = std::make_shared(data_prec, shift_shape); + const auto scale = std::make_shared(data_prec, scale_shape, scale_values); + const auto shift = std::make_shared(data_prec, shift_shape, shift_values); const auto fake_convert = default_shift ? std::make_shared(data, scale, dst_prec) : std::make_shared(data, scale, shift, dst_prec); From c54f4279025772abcf06774f70632def29b35b1d Mon Sep 17 00:00:00 2001 From: xuchen-intel Date: Sun, 26 Jan 2025 07:45:09 +0100 Subject: [PATCH 2/3] Retain non-clamp behavior for Convert layer --- .../openvino/reference/fake_convert.hpp | 10 ++++++---- src/core/reference/src/op/fake_convert.cpp | 20 +++++++------------ src/core/src/type/float8_e5m2.cpp | 2 +- src/core/tests/eval.cpp | 11 ++++++---- src/core/tests/float8_e5m2.cpp | 12 +++++------ 5 files changed, 27 insertions(+), 28 deletions(-) diff --git a/src/core/reference/include/openvino/reference/fake_convert.hpp b/src/core/reference/include/openvino/reference/fake_convert.hpp index 8a0e8a94c91844..3cec7e8aa7f27c 100644 --- a/src/core/reference/include/openvino/reference/fake_convert.hpp +++ b/src/core/reference/include/openvino/reference/fake_convert.hpp @@ -18,23 +18,25 @@ namespace func { * * @param arg_f Pointer to the input data. * @param out_f Pointer to the otuput data. - * @param count Number of elements in the data input. + * @param count Number of elements in the data input. + * @param use_clamp If use clamp. */ -void emulate_f8e5m2_on_fp16(const float16* const arg_f, float16* out_f, size_t count); +void emulate_f8e5m2_on_fp16(const float16* const arg_f, float16* out_f, size_t count, bool use_clamp = true); /** * @brief Emulation of conversion fp16 value to f8e4m3 format * * @param arg_f Pointer to the input data. * @param out_f Pointer to the otuput data. - * @param count Number of elements in the data input. + * @param count Number of elements in the data input. + * @param use_clamp If use clamp. * * Exponent denormal values 0 -7 * Exponent normal values 1..15 -6..8 (7 - exponent) * Exponent NaN values 15 8 * */ -void emulate_f8e4m3_on_fp16(const float16* arg_f, float16* out_f, size_t count); +void emulate_f8e4m3_on_fp16(const float16* arg_f, float16* out_f, size_t count, bool use_clamp = true); } // namespace func namespace fake_convert_details { diff --git a/src/core/reference/src/op/fake_convert.cpp b/src/core/reference/src/op/fake_convert.cpp index 4222af7568b07b..98e5ef7046e86b 100644 --- a/src/core/reference/src/op/fake_convert.cpp +++ b/src/core/reference/src/op/fake_convert.cpp @@ -7,7 +7,7 @@ namespace ov { namespace reference { namespace func { -void emulate_f8e5m2_on_fp16(const float16* const arg_f, float16* out_f, size_t count) { +void emulate_f8e5m2_on_fp16(const float16* const arg_f, float16* out_f, size_t count, bool use_clamp) { const auto arg_u = reinterpret_cast(arg_f); auto out_u = reinterpret_cast(out_f); uint16_t val_bit_repr; @@ -24,13 +24,6 @@ void emulate_f8e5m2_on_fp16(const float16* const arg_f, float16* out_f, size_t c for (size_t i = 0; i < count; ++i) { /// converts float number to half precision in round-to-nearest-even mode and returns half with converted value. val_bit_repr = arg_u[i]; - /// 0x7c00 = 0111110000000000 - exponent mask - /// s 11111 xxx xxxx xxxx - is nan (if some x is 1) or inf (if all x is 0) - /// 0x7800 is 0111100000000000 and 0x400 is 0000010000000000 - /// number is not normal if all exponent is 1 or 0 - /// 0x7f00 is 0 11111 1100000000 - /// 0x7b00 is 0 11110 1100000000 - const bool can_round = ((val_bit_repr & 0x7F00) < 0x7B00) ? true : false; /// s 11111 xxx xxxx xxxx - is nan (if some x is 1) or inf (if all x is 0) const bool is_naninf = ((val_bit_repr & fp16_inf) == fp16_inf) ? true : false; /* nearest rounding masks */ @@ -39,7 +32,7 @@ void emulate_f8e5m2_on_fp16(const float16* const arg_f, float16* out_f, size_t c /// rne_tie - 0x180 is 0 00000 0110000000 or 384.0 uint16_t rnmask_tie = (val_bit_repr & rne_tie); - if (!is_naninf && can_round) { + if (!is_naninf) { /* round to nearest even, if rne_mask is enabled */ /* 0 00000 0010000000, find grs patterns */ // 0xx - do nothing @@ -47,8 +40,10 @@ void emulate_f8e5m2_on_fp16(const float16* const arg_f, float16* out_f, size_t c // 101, 110, 111 - round up > 0x0080 val_bit_repr += (((rnmask > 0x0080) || (rnmask_tie == rne_tie)) << lshift); } - val_bit_repr &= mask_mant; /* truncation */ - val_bit_repr -= (((val_bit_repr & 0x7F00) == fp16_inf) << lshift); /* clamp */ + val_bit_repr &= mask_mant; /* truncation */ + if (use_clamp) { + val_bit_repr -= (((val_bit_repr & 0x7F00) == fp16_inf) << lshift); /* clamp */ + } out_u[i] = val_bit_repr; } } @@ -65,12 +60,11 @@ void emulate_f8e5m2_on_fp16(const float16* const arg_f, float16* out_f, size_t c * Exponent NaN values 15 8 * */ -void emulate_f8e4m3_on_fp16(const float16* arg_f, float16* out_f, size_t count) { +void emulate_f8e4m3_on_fp16(const float16* arg_f, float16* out_f, size_t count, bool use_clamp) { const auto arg_u = reinterpret_cast(arg_f); auto out_u = reinterpret_cast(out_f); uint16_t val_bit_repr; - constexpr auto use_clamp = true; constexpr auto exp_bits = 5; constexpr auto mbits = 9; constexpr auto non_mant_bits = exp_bits + 1; /// exponent + sign diff --git a/src/core/src/type/float8_e5m2.cpp b/src/core/src/type/float8_e5m2.cpp index 177c79471d6c12..59f64f8c55d0d7 100644 --- a/src/core/src/type/float8_e5m2.cpp +++ b/src/core/src/type/float8_e5m2.cpp @@ -28,7 +28,7 @@ constexpr uint8_t f8e5m2_m_mask = 0x03; // f8e5m2 mantissa bit mask uint8_t f32_to_f8e5m2_bits(const float value) { auto f16 = static_cast(value); - reference::func::emulate_f8e5m2_on_fp16(&f16, &f16, 1); + reference::func::emulate_f8e5m2_on_fp16(&f16, &f16, 1, false); return static_cast((f16.to_bits() >> byte_shift)); } } // namespace diff --git a/src/core/tests/eval.cpp b/src/core/tests/eval.cpp index c521e9d1aa3bee..3e05409de47208 100644 --- a/src/core/tests/eval.cpp +++ b/src/core/tests/eval.cpp @@ -3569,10 +3569,13 @@ TEST(eval, evaluate_fake_convert_f32_to_f8e5m2_big_scale_1) { EXPECT_EQ(result.get_element_type(), et); EXPECT_EQ(result.get_shape(), data_shape); - constexpr auto inf = std::numeric_limits::infinity(); - EXPECT_THAT( - read_vector(result), - Pointwise(FloatEq(), std::vector{fp8::MAX_F8E5M2 / 2.f, fp8::MAX_F8E5M2, fp8::MAX_F8E5M2, inf, inf})); + EXPECT_THAT(read_vector(result), + Pointwise(FloatEq(), + std::vector{fp8::MAX_F8E5M2 / 2.f, + fp8::MAX_F8E5M2, + fp8::MAX_F8E5M2, + fp8::MAX_F8E5M2, + fp8::MAX_F8E5M2})); } TEST(eval, evaluate_fake_convert_f32_matching_f8_to_f8e5m2_scale_1) { diff --git a/src/core/tests/float8_e5m2.cpp b/src/core/tests/float8_e5m2.cpp index 41c10d75736de4..6631db425af4c1 100644 --- a/src/core/tests/float8_e5m2.cpp +++ b/src/core/tests/float8_e5m2.cpp @@ -191,28 +191,28 @@ TEST(F8E5M2Test, f8e5m2_num_limits_exp) { EXPECT_EQ(max_exp10, 4); } -TEST(F8E5M2Test, f32_ge_f8_max_round_to_inf) { +TEST(F8E5M2Test, f32_as_f16_inf_gt_f8_max_round_to_inf) { const auto f8 = ov::float8_e5m2(65520.0f); EXPECT_EQ(f8.to_bits(), 0b01111100); } -TEST(F8E5M2Test, f32_ge_f8_max_round_to_max) { +TEST(F8E5M2Test, f32_gt_f16_max_gt_f8_max_round_to_inf) { const auto f8 = ov::float8_e5m2(65519.9f); - EXPECT_EQ(f8.to_bits(), 0b01111011); + EXPECT_EQ(f8.to_bits(), 0b01111100); } -TEST(F8E5M2Test, f32_ge_f8_max_round_to_minus_inf) { +TEST(F8E5M2Test, f32_as_f16_minus_inf_lt_f8_lowest_round_to_minus_inf) { const auto f8 = ov::float8_e5m2(-65520.0f); EXPECT_EQ(f8.to_bits(), 0b11111100); } -TEST(F8E5M2Test, f32_ge_f8_max_round_to_lowest) { +TEST(F8E5M2Test, f32_lt_f16_lowest_lt_f8_lowest_round_to_minus_inf) { const auto f8 = ov::float8_e5m2(-65519.9f); - EXPECT_EQ(f8.to_bits(), 0b11111011); + EXPECT_EQ(f8.to_bits(), 0b11111100); } template From 5589947e99be96a342d3d34a29abea4ba8fdc2ea Mon Sep 17 00:00:00 2001 From: xuchen-intel Date: Mon, 27 Jan 2025 03:13:04 +0100 Subject: [PATCH 3/3] Revise FakeConvertDecomposition transformation test accordingly --- .../op_conversions/fake_convert_decomposition.cpp | 12 ++++++------ .../fake_convert_decomposition_test.cpp | 13 +++++++++++-- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/common/transformations/src/transformations/op_conversions/fake_convert_decomposition.cpp b/src/common/transformations/src/transformations/op_conversions/fake_convert_decomposition.cpp index 000257652fb1f1..2df0f8867f1414 100644 --- a/src/common/transformations/src/transformations/op_conversions/fake_convert_decomposition.cpp +++ b/src/common/transformations/src/transformations/op_conversions/fake_convert_decomposition.cpp @@ -44,16 +44,16 @@ ov::pass::FakeConvertDecomposition::FakeConvertDecomposition() { // Align with clamp behavior of FakeConvert in ngraph reference const auto lower_bound = fake_convert_node->get_destination_element_type() == ov::element::f8e4m3 - ? std::numeric_limits::lowest() - : std::numeric_limits::lowest(); + ? static_cast(std::numeric_limits::lowest()) + : static_cast(std::numeric_limits::lowest()); const auto upper_bound = fake_convert_node->get_destination_element_type() == ov::element::f8e4m3 - ? std::numeric_limits::max() - : std::numeric_limits::max(); + ? static_cast(std::numeric_limits::max()) + : static_cast(std::numeric_limits::max()); std::shared_ptr result; const auto scale = decomp_ops.make(data, input_scale); if (fake_convert_node->get_input_size() == 2) { - const auto clamp = std::make_shared(scale, lower_bound, upper_bound); + const auto clamp = decomp_ops.make(scale, lower_bound, upper_bound); const auto downconvert = decomp_ops.make(clamp, fake_convert_node->get_destination_element_type()); const auto upconvert = decomp_ops.make(downconvert, input_type); @@ -63,7 +63,7 @@ ov::pass::FakeConvertDecomposition::FakeConvertDecomposition() { const Output input_shift{fake_convert_node->input_value(2)}; const auto shift = decomp_ops.make(scale, input_shift); - const auto clamp = std::make_shared(shift, lower_bound, upper_bound); + const auto clamp = decomp_ops.make(shift, lower_bound, upper_bound); const auto downconvert = decomp_ops.make(clamp, fake_convert_node->get_destination_element_type()); const auto upconvert = decomp_ops.make(downconvert, input_type); diff --git a/src/common/transformations/tests/op_conversions/fake_convert_decomposition_test.cpp b/src/common/transformations/tests/op_conversions/fake_convert_decomposition_test.cpp index 33b167ace11e24..63b1e5d7eac36b 100644 --- a/src/common/transformations/tests/op_conversions/fake_convert_decomposition_test.cpp +++ b/src/common/transformations/tests/op_conversions/fake_convert_decomposition_test.cpp @@ -80,17 +80,26 @@ TEST_P(FakeConvertDecompositionTest, CompareFunctions) { params.push_back(input_data); std::shared_ptr data = input_data; + const auto lower_bound = dst_prec == ov::element::f8e4m3 + ? static_cast(std::numeric_limits::lowest()) + : static_cast(std::numeric_limits::lowest()); + const auto upper_bound = dst_prec == ov::element::f8e4m3 + ? static_cast(std::numeric_limits::max()) + : static_cast(std::numeric_limits::max()); + std::shared_ptr result; const auto scale = std::make_shared(data, input_scale); if (default_shift) { - const auto downconvert = std::make_shared(scale, dst_prec); + const auto clamp = std::make_shared(scale, lower_bound, upper_bound); + const auto downconvert = std::make_shared(clamp, dst_prec); const auto upconvert = std::make_shared(downconvert, data_prec); result = std::make_shared(upconvert, input_scale); } else { const auto shift = std::make_shared(scale, input_shift); - const auto downconvert = std::make_shared(shift, dst_prec); + const auto clamp = std::make_shared(shift, lower_bound, upper_bound); + const auto downconvert = std::make_shared(clamp, dst_prec); const auto upconvert = std::make_shared(downconvert, data_prec); const auto deshift = std::make_shared(upconvert, input_shift);