Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GPU] updates to use ConcatTransformation of LPT for activations scaling #28757

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ namespace activations_scaling {

class TRANSFORMATIONS_API ScaleDownSingleLayer;
class TRANSFORMATIONS_API EliminateScalarMul;
class TRANSFORMATIONS_API MulConcatTransformation;
class TRANSFORMATIONS_API MulShareTransformation;
class TRANSFORMATIONS_API MoveDownScalarMul;

Expand Down Expand Up @@ -56,29 +55,6 @@ class ov::pass::activations_scaling::EliminateScalarMul : public ov::pass::Match
EliminateScalarMul();
};

// input_a const_a input_b const_b input_c const_c
// \ / \ / \ /
// Multiply_a Multiply_b Multiply_c
// \ | /
// \ | /
// ---------- Concat ------------
// ==>
// (const_a (const_b (const_c
// input_a /const_c) input_b /const_c) input_c /const_c)
// \ / \ / \ /
// Multiply_a Multiply_b Multiply_c
// \ | /
// \ | /
// ---------- Concat ------------
// | const_c
// | /
// Multiply
class ov::pass::activations_scaling::MulConcatTransformation : public ov::pass::MatcherPass {
public:
OPENVINO_MATCHER_PASS_RTTI("MulConcatTransformation", "0");
MulConcatTransformation();
};

// input input
// / \ |
// Norm Mul ==> Mul (expect to be fused into the input layer)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#include "low_precision/network_helper.hpp"
#include "openvino/core/rt_info.hpp"
#include "openvino/op/add.hpp"
#include "openvino/op/concat.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/op/convert.hpp"
#include "openvino/op/convolution.hpp"
Expand Down Expand Up @@ -212,92 +211,6 @@ ov::pass::activations_scaling::EliminateScalarMul::EliminateScalarMul() {
this->register_matcher(m, callback);
}

ov::pass::activations_scaling::MulConcatTransformation::MulConcatTransformation() {
MATCHER_SCOPE(MulConcatTransformation);

auto concat_m = wrap_type<ov::op::v0::Concat>();

ov::matcher_pass_callback callback = [=](pattern::Matcher& m) {
const auto& pattern_map = m.get_pattern_value_map();

OPENVINO_ASSERT(pattern_map.count(concat_m), "Not found any Concat layer");

auto concat = pattern_map.at(concat_m).get_node_shared_ptr();

if (transformation_callback(concat)) {
return false;
}

// check if all inputs are Multiply with scalar operand
ov::Output<ov::Node> last_dep_const = {};
ov::element::Type last_dep_const_type = ov::element::undefined;
for (auto& input : concat->inputs()) {
auto dep_node = ov::as_type_ptr<ov::op::v1::Multiply>(input.get_source_output().get_node_shared_ptr());
if (!dep_node) {
return false;
}
auto dep_const0 =
ov::as_type_ptr<ov::op::v0::Constant>(dep_node->input(0).get_source_output().get_node_shared_ptr());
auto dep_const1 =
ov::as_type_ptr<ov::op::v0::Constant>(dep_node->input(1).get_source_output().get_node_shared_ptr());
if (!dep_const0 && !dep_const1) {
return false;
}
last_dep_const =
dep_const0 ? dep_node->input(0).get_source_output() : dep_node->input(1).get_source_output();
if (!is_scalar_node(last_dep_const))
return false;
if (last_dep_const_type != ov::element::undefined &&
last_dep_const_type != last_dep_const.get_element_type())
return false;
last_dep_const_type = last_dep_const.get_element_type();
}

auto target_inputs = concat->get_output_target_inputs(0);

for (auto& input : concat->inputs()) {
auto dep_node = input.get_source_output().get_node_shared_ptr();
auto dep_input0 = dep_node->input(0).get_source_output().get_node();
size_t const_index = ov::is_type<ov::op::v0::Constant>(dep_input0) ? 0 : 1;
size_t activation_index = ov::is_type<ov::op::v0::Constant>(dep_input0) ? 1 : 0;

auto dep_type = dep_node->get_output_element_type(0);
auto new_mul = std::make_shared<ov::op::TypeRelaxed<ov::op::v1::Multiply>>(
std::vector<element::Type>{dep_type, dep_type},
std::vector<element::Type>{dep_type},
ov::op::TemporaryReplaceOutputType(dep_node->input(activation_index).get_source_output(), dep_type)
.get(),
ov::op::TemporaryReplaceOutputType(
ov::op::util::eltwise_fold<ov::op::v1::Divide>(dep_node->input(const_index).get_source_output(),
last_dep_const),
dep_type)
.get());
new_mul->set_friendly_name(dep_node->get_friendly_name() + "_c");
ov::copy_runtime_info(dep_node, new_mul);

input.replace_source_output(new_mul);
}

auto concat_type = concat->get_output_element_type(0);
auto new_mul = std::make_shared<ov::op::TypeRelaxed<ov::op::v1::Multiply>>(
std::vector<element::Type>{concat_type, concat_type},
std::vector<element::Type>{concat_type},
ov::op::TemporaryReplaceOutputType(concat->output(0), concat_type).get(),
ov::op::TemporaryReplaceOutputType(last_dep_const, concat_type).get());
new_mul->set_friendly_name(concat->get_friendly_name() + "_c");
ov::copy_runtime_info(concat, new_mul);

for (auto& in : target_inputs) {
in.replace_source_output(new_mul);
}

return true;
};

auto m = std::make_shared<ov::pass::pattern::Matcher>(concat_m, "MulConcatTransformation");
this->register_matcher(m, callback);
}

ov::pass::activations_scaling::MulShareTransformation::MulShareTransformation() {
MATCHER_SCOPE(MulShareTransformation);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
#include "common_test_utils/graph_comparator.hpp"
#include "common_test_utils/ov_test_utils.hpp"
#include "openvino/op/add.hpp"
#include "openvino/op/concat.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/op/convolution.hpp"
#include "openvino/op/group_normalization.hpp"
Expand Down Expand Up @@ -98,38 +97,6 @@ TEST_F(TransformationTestsF, EliminateScalarMulTest) {
}
}

TEST_F(TransformationTestsF, ConcatTransformationTest) {
{
auto input0 = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{6, 12, 10, 24});
auto scale_const0 = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{1}, {10});
auto mul0 = std::make_shared<ov::op::v1::Multiply>(input0, scale_const0);
auto input1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{6, 12, 10, 24});
auto scale_const1 = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{1}, {10});
auto mul1 = std::make_shared<ov::op::v1::Multiply>(input1, scale_const1);
auto concat = std::make_shared<ov::op::v0::Concat>(OutputVector{mul0, mul1}, 0);
auto convert = std::make_shared<ov::op::v0::Convert>(concat, ov::element::f32);
auto result = std::make_shared<ov::op::v0::Result>(convert);

model = std::make_shared<ov::Model>(ov::ResultVector{result}, ov::ParameterVector{input0, input1});
manager.register_pass<ov::pass::activations_scaling::MulConcatTransformation>();
}
{
auto input0 = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{6, 12, 10, 24});
auto scale_const0 = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{1}, {1});
auto mul0 = std::make_shared<ov::op::v1::Multiply>(input0, scale_const0);
auto input1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{6, 12, 10, 24});
auto scale_const1 = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{1}, {1});
auto mul1 = std::make_shared<ov::op::v1::Multiply>(input1, scale_const1);
auto concat = std::make_shared<ov::op::v0::Concat>(OutputVector{mul0, mul1}, 0);
auto new_scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{1}, {10});
auto new_mul = std::make_shared<ov::op::v1::Multiply>(concat, new_scale_const);
auto convert = std::make_shared<ov::op::v0::Convert>(new_mul, ov::element::f32);
auto result = std::make_shared<ov::op::v0::Result>(convert);

model_ref = std::make_shared<ov::Model>(ov::ResultVector{result}, ov::ParameterVector{input0, input1});
}
}

TEST_F(TransformationTestsF, MoveDownScalarMulTest) {
{
auto input0 = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{6, 12, 10, 24});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -942,7 +942,6 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
pass_config->disable<GroupConvolutionTransformation>();
pass_config->disable<MatMulTransformation>();
pass_config->disable<MVNTransformation>();
pass_config->disable<ConcatTransformation>();

pass_config->set_callback<FoldConvertTransformation>(
[](const std::shared_ptr<const ov::Node> &node) -> bool {
Expand All @@ -961,7 +960,6 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
auto params = LayerTransformation::Params(false, infer_precision, {infer_precision}, true, true);
auto lpt_pass = manager.register_pass<LowPrecision>(supportedPrecisions, perTensorQuantization, params);
lpt_pass->add_main<ov::pass::activations_scaling::EliminateScalarMul>();
lpt_pass->add_main<ov::pass::activations_scaling::MulConcatTransformation>();
lpt_pass->add_main<ov::pass::activations_scaling::MoveDownScalarMul>();

// Move up remained scalar-multiply layers
Expand Down
Loading