[GPU] add convert precision with type_to_fuse_map which convert boole…

…an to u8
openvinotoolkit · Jan 24, 2025 · 5b559ea · 5b559ea
1 parent 5469038
commit 5b559ea
Show file tree

Hide file tree

Showing 2 changed files with 60 additions and 0 deletions.
diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/transformations_pipeline.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/transformations_pipeline.hpp
@@ -11,6 +11,7 @@
 
 #include "intel_gpu/runtime/execution_config.hpp"
 #include "intel_gpu/runtime/device.hpp"
+#include "transformations/convert_precision.hpp"
 
 namespace ov {
 namespace intel_gpu {
@@ -22,6 +23,8 @@ class TransformationsPipeline {
     void apply(std::shared_ptr<ov::Model> func);
 
 private:
+    static bool fuse_type_to_convert(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions);
+
     const ExecutionConfig& config;
     std::shared_ptr<RemoteContextImpl> m_context;
     cldnn::device_info device_info;

diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
@@ -12,6 +12,7 @@
 #include <tuple>
 #include <vector>
 
+#include "openvino/opsets/opset10.hpp"
 #include "intel_gpu/plugin/transformations_pipeline.hpp"
 #include "intel_gpu/runtime/debug_configuration.hpp"
 #include "intel_gpu/runtime/itt.hpp"
@@ -282,6 +283,49 @@ extern bool query_microkernels_supported(cldnn::engine& e, const cldnn::Executio
 namespace ov {
 namespace intel_gpu {
 
+bool TransformationsPipeline::fuse_type_to_convert(const std::shared_ptr<ov::Node>& node, const precisions_map& precisions) {
+    auto convert = ov::as_type_ptr<ov::opset10::Convert>(node);
+    if (!convert)
+        return false;
+    const auto& from = node->get_output_element_type(0);
+    auto it = precisions.find(from);
+    if (it == precisions.end())
+        return false;
+    const auto& to = it->second;
+
+    if (convert->get_convert_element_type() == ov::element::boolean && to.is_integral_number()) {
+        // For Convert node, converting precision from numerical data types to boolean will lead to mathematical
+        // error, because here the output precision boolean is replaced by u8:
+        //  - floating point value 0.01 is converted to be 1 for boolean, but 0 for u8 - need to insert Ceil.
+        //  - either float or int values should be clipped with the interval [0; 1] to mimic bool cast behavior, i.e.
+        //  0 - is false, 1 - is true
+        //  - to perform clamping correctly an Abs op should be inserted before Clamp
+        // Thus an Abs, Ceil and Clamp nodes should be added before the Convert node for this scenario.
+        ov::pass::NodeRegistry reg;
+        const auto& in_prec = convert->get_input_element_type(0);
+        auto parent_node = convert->input_value(0).get_node_shared_ptr();
+        auto item = precisions.find(in_prec);
+        if (item != precisions.end()) {
+            // Add convert node for unsupported precision, such as FP64 or INT64
+            parent_node = reg.make<ov::opset10::Convert>(parent_node, item->second);
+        }
+        if (in_prec.is_signed()) {
+            parent_node = reg.make<ov::opset10::Abs>(parent_node);
+        }
+        if (in_prec.is_real()) {
+            parent_node = reg.make<ov::opset10::Ceiling>(parent_node);
+        }
+        parent_node = reg.make<ov::opset10::Clamp>(parent_node, 0, 1);
+        const auto new_convert = reg.make<ov::opset10::Convert>(parent_node, to);
+        new_convert->set_friendly_name(convert->get_friendly_name());
+        ov::copy_runtime_info(convert, reg.get());
+        ov::replace_node(convert, new_convert);
+        return true;
+    }
+    convert->set_convert_element_type(to);
+    return true;
+}
+
 void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
     OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "TransformationsPipeline::apply");
     using const_node_ptr = const std::shared_ptr<const ov::Node>;
@@ -404,6 +448,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
         const bool keep_precision_sensitive_in_fp32_1 = true;
         const bool convert_input_output_precision = false;
         const bool store_original_precision_as_rt_attribute = true;
+
         manager.register_pass<ov::pass::ConvertPrecision>(fp_convert_precision_map,
                                                           empty_fuse_map,
                                                           keep_precision_sensitive_in_fp32_1,
@@ -412,6 +457,18 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
 
         manager.register_pass<ov::pass::CommonOptimizations>();
 
+        { // To convert to f16 input to boolean which is converted to u8, add abs + ceiling + clamp before convert.
+            type_to_fuse_map type_to_fuse = {{ov::opset10::Convert::get_type_info_static(), fuse_type_to_convert}};
+            precisions_map convert_precision_map = {{ov::element::boolean, ov::element::u8}};
+            manager.register_pass<ov::pass::ConvertPrecision>(convert_precision_map,
+                                                          type_to_fuse,
+                                                          false,
+                                                          false,
+                                                          false);
+
+            manager.register_pass<ov::pass::EliminateConvert>();
+        }
+
         pass_config->set_callback<ov::pass::ScaledDotProductAttentionDecomposition>([&](const std::shared_ptr<const ov::Node> node){
             GPU_DEBUG_IF(cldnn::debug_configuration::get_instance()->enable_sdpa != -1) {
                 GPU_DEBUG_CODE(return cldnn::debug_configuration::get_instance()->enable_sdpa == 1);