Skip to content

Commit

Permalink
[CPU] Add Clamp for FakeConvertDecomposition
Browse files Browse the repository at this point in the history
  • Loading branch information
xuchen-intel committed Jan 24, 2025
1 parent 5469038 commit 876d289
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "itt.hpp"
#include "openvino/core/rt_info.hpp"
#include "openvino/op/add.hpp"
#include "openvino/op/clamp.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/op/convert.hpp"
#include "openvino/op/divide.hpp"
Expand Down Expand Up @@ -41,20 +42,28 @@ ov::pass::FakeConvertDecomposition::FakeConvertDecomposition() {
data = decomp_ops.add(data.get_node_shared_ptr());
}

// Align with clamp behavior of FakeConvert in ngraph reference
const auto lower_bound = fake_convert_node->get_destination_element_type() == ov::element::f8e4m3 ?
std::numeric_limits<ov::float8_e4m3>::lowest() : std::numeric_limits<ov::float8_e5m2>::lowest();
const auto upper_bound = fake_convert_node->get_destination_element_type() == ov::element::f8e4m3 ?
std::numeric_limits<ov::float8_e4m3>::max() : std::numeric_limits<ov::float8_e5m2>::max();

std::shared_ptr<Node> result;
const auto scale = decomp_ops.make<ov::op::v1::Multiply>(data, input_scale);
if (fake_convert_node->get_input_size() == 2) {
const auto clamp = std::make_shared<ov::op::v0::Clamp>(scale, lower_bound, upper_bound);
const auto downconvert =
decomp_ops.make<ov::op::v0::Convert>(scale, fake_convert_node->get_destination_element_type());
decomp_ops.make<ov::op::v0::Convert>(clamp, fake_convert_node->get_destination_element_type());
const auto upconvert = decomp_ops.make<ov::op::v0::Convert>(downconvert, input_type);

result = decomp_ops.make<ov::op::v1::Divide>(upconvert, input_scale);
} else {
const Output<Node> input_shift{fake_convert_node->input_value(2)};
const auto shift = decomp_ops.make<ov::op::v1::Subtract>(scale, input_shift);

const auto clamp = std::make_shared<ov::op::v0::Clamp>(shift, lower_bound, upper_bound);
const auto downconvert =
decomp_ops.make<ov::op::v0::Convert>(shift, fake_convert_node->get_destination_element_type());
decomp_ops.make<ov::op::v0::Convert>(clamp, fake_convert_node->get_destination_element_type());
const auto upconvert = decomp_ops.make<ov::op::v0::Convert>(downconvert, input_type);

const auto deshift = decomp_ops.make<ov::op::v1::Add>(upconvert, input_shift);
Expand Down
1 change: 1 addition & 0 deletions src/core/reference/src/op/fake_convert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ void emulate_f8e5m2_on_fp16(const float16* const arg_f, float16* out_f, size_t c
val_bit_repr += (((rnmask > 0x0080) || (rnmask_tie == rne_tie)) << lshift);
}
val_bit_repr &= mask_mant; /* truncation */
val_bit_repr -= (((val_bit_repr & 0x7F00) == fp16_inf) << lshift); /* clamp */
out_u[i] = val_bit_repr;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,9 @@ std::vector<std::string> disabledTestPatterns() {
R"(.*ConvertCPULayerTest.*outFmts=(nhwc|nChw8c|nChw16c).*)",
// Issue: MFDNN-12917. The oneDNN emitter of conversion from fp32 to fp8 has rounding issue.
R"(.*ConvertCPULayerTest.*(\[1.1.1080.1920\]|\(2.17.5.4\))_.*_inputPRC=f32_targetPRC=f8e4m3_.*)",
// Issue: 123320
// Input precision bf16 is converted to fp32 by logic in core_config.cpp during ngraph reference test.
R"(.*FakeConvertLayerTest.*dataPrecision=bf16.*)",
// Need to generate sequence exactly in the i64 data type. Enable in scope of i64 enabling.
R"(.*RandomUniformLayerTestCPU.*OutPrc=i64.*)",
// Issue: 123815 (Tests are sensintive to available thread count on testing machines)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#include "openvino/opsets/opset1.hpp"
#include "openvino/opsets/opset13.hpp"

#include <random>

namespace ov {
namespace test {
std::string FakeConvertLayerTest::getTestCaseName(const testing::TestParamInfo<FakeConvertParams>& obj) {
Expand Down Expand Up @@ -52,9 +54,24 @@ void FakeConvertLayerTest::SetUp() {

init_input_shapes(data_shapes);

std::vector<float> scale_values(ov::shape_size(scale_shape));
std::vector<float> shift_values(ov::shape_size(shift_shape));
std::mt19937 gen(0);
std::uniform_real_distribution<float> dis(0, static_cast<float>(ov::shape_size(scale_shape)));
for (auto& scale_value : scale_values)
scale_value = dis(gen);
for (auto& shift_value : shift_values)
shift_value = dis(gen);

if (data_prec == ov::element::f16) {
configuration.insert(ov::hint::inference_precision(ov::element::f16));
} else if (data_prec == ov::element::bf16) {
configuration.insert(ov::hint::inference_precision(ov::element::bf16));
}

const auto data = std::make_shared<opset1::Parameter>(data_prec, inputDynamicShapes.front());
const auto scale = std::make_shared<opset1::Constant>(data_prec, scale_shape);
const auto shift = std::make_shared<opset1::Constant>(data_prec, shift_shape);
const auto scale = std::make_shared<opset1::Constant>(data_prec, scale_shape, scale_values);
const auto shift = std::make_shared<opset1::Constant>(data_prec, shift_shape, shift_values);

const auto fake_convert = default_shift ? std::make_shared<opset13::FakeConvert>(data, scale, dst_prec)
: std::make_shared<opset13::FakeConvert>(data, scale, shift, dst_prec);
Expand Down

0 comments on commit 876d289

Please sign in to comment.