diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/transformations_pipeline.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/transformations_pipeline.hpp index 29e2b88d1642f6..3ff3ebce5919d5 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/transformations_pipeline.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/transformations_pipeline.hpp @@ -11,6 +11,7 @@ #include "intel_gpu/runtime/execution_config.hpp" #include "intel_gpu/runtime/device.hpp" +#include "transformations/convert_precision.hpp" namespace ov { namespace intel_gpu { @@ -22,6 +23,8 @@ class TransformationsPipeline { void apply(std::shared_ptr func); private: + static bool fuse_type_to_convert(const std::shared_ptr& node, const precisions_map& precisions); + const ExecutionConfig& config; std::shared_ptr m_context; cldnn::device_info device_info; diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index a2bdac78fcb805..7a9c9d475b662f 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -12,6 +12,7 @@ #include #include +#include "openvino/opsets/opset10.hpp" #include "intel_gpu/plugin/transformations_pipeline.hpp" #include "intel_gpu/runtime/debug_configuration.hpp" #include "intel_gpu/runtime/itt.hpp" @@ -282,6 +283,49 @@ extern bool query_microkernels_supported(cldnn::engine& e, const cldnn::Executio namespace ov { namespace intel_gpu { +bool TransformationsPipeline::fuse_type_to_convert(const std::shared_ptr& node, const precisions_map& precisions) { + auto convert = ov::as_type_ptr(node); + if (!convert) + return false; + const auto& from = node->get_output_element_type(0); + auto it = precisions.find(from); + if (it == precisions.end()) + return false; + const auto& to = it->second; + + if (convert->get_convert_element_type() == ov::element::boolean && to.is_integral_number()) { + // For Convert node, converting precision from numerical data types to boolean will lead to mathematical + // error, because here the output precision boolean is replaced by u8: + // - floating point value 0.01 is converted to be 1 for boolean, but 0 for u8 - need to insert Ceil. + // - either float or int values should be clipped with the interval [0; 1] to mimic bool cast behavior, i.e. + // 0 - is false, 1 - is true + // - to perform clamping correctly an Abs op should be inserted before Clamp + // Thus an Abs, Ceil and Clamp nodes should be added before the Convert node for this scenario. + ov::pass::NodeRegistry reg; + const auto& in_prec = convert->get_input_element_type(0); + auto parent_node = convert->input_value(0).get_node_shared_ptr(); + auto item = precisions.find(in_prec); + if (item != precisions.end()) { + // Add convert node for unsupported precision, such as FP64 or INT64 + parent_node = reg.make(parent_node, item->second); + } + if (in_prec.is_signed()) { + parent_node = reg.make(parent_node); + } + if (in_prec.is_real()) { + parent_node = reg.make(parent_node); + } + parent_node = reg.make(parent_node, 0, 1); + const auto new_convert = reg.make(parent_node, to); + new_convert->set_friendly_name(convert->get_friendly_name()); + ov::copy_runtime_info(convert, reg.get()); + ov::replace_node(convert, new_convert); + return true; + } + convert->set_convert_element_type(to); + return true; +} + void TransformationsPipeline::apply(std::shared_ptr func) { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "TransformationsPipeline::apply"); using const_node_ptr = const std::shared_ptr; @@ -404,6 +448,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { const bool keep_precision_sensitive_in_fp32_1 = true; const bool convert_input_output_precision = false; const bool store_original_precision_as_rt_attribute = true; + manager.register_pass(fp_convert_precision_map, empty_fuse_map, keep_precision_sensitive_in_fp32_1, @@ -412,6 +457,18 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); + { // To convert to f16 input to boolean which is converted to u8, add abs + ceiling + clamp before convert. + type_to_fuse_map type_to_fuse = {{ov::opset10::Convert::get_type_info_static(), fuse_type_to_convert}}; + precisions_map convert_precision_map = {{ov::element::boolean, ov::element::u8}}; + manager.register_pass(convert_precision_map, + type_to_fuse, + false, + false, + false); + + manager.register_pass(); + } + pass_config->set_callback([&](const std::shared_ptr node){ GPU_DEBUG_IF(cldnn::debug_configuration::get_instance()->enable_sdpa != -1) { GPU_DEBUG_CODE(return cldnn::debug_configuration::get_instance()->enable_sdpa == 1);