From f772b8f2af2100592a89a316daf52237acb234b3 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Wed, 4 Dec 2024 11:28:44 +0400 Subject: [PATCH] [GPU] Parse runtime_options from model RT info and apply to config Signed-off-by: Vladimir Paramuzov --- .../intel_gpu/runtime/execution_config.hpp | 14 +++ src/plugins/intel_gpu/src/plugin/plugin.cpp | 2 + .../src/runtime/execution_config.cpp | 9 ++ .../tests/functional/behavior/properties.cpp | 86 +++++++++++++++++++ 4 files changed, 111 insertions(+) create mode 100644 src/plugins/intel_gpu/tests/functional/behavior/properties.cpp diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp index 0af98bf1e952d0..3e854e4c9c5ada 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp @@ -138,6 +138,10 @@ class ExecutionConfig { void apply_user_properties(const cldnn::device_info& info); + // Note that RT info property value has lower priority than values set by user via core.set_property or passed to compile_model call + // So this method should be called after setting all user properties, but before apply_user_properties() call. + void apply_rt_info(const ov::RTMap& rt_info); + std::string to_string() const; protected: @@ -147,6 +151,16 @@ class ExecutionConfig { void apply_priority_hints(const cldnn::device_info& info); void apply_debug_options(const cldnn::device_info& info); + template + void apply_rt_info_property(const ov::Property& property, const ov::RTMap& rt_info) { + if (!is_set_by_user(property)) { + auto rt_info_val = rt_info.find(property.name()); + if (rt_info_val != rt_info.end()) { + set_user_property(property(rt_info_val->second.template as())); + } + } + } + private: ov::AnyMap internal_properties; ov::AnyMap user_properties; diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 7d010a9b590e2e..058e540f9a420e 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -189,6 +189,7 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< ExecutionConfig config = m_configs_map.at(device_id); config.set_user_property(orig_config); + config.apply_rt_info(model->get_rt_info()); config.apply_user_properties(context->get_engine().get_device_info()); set_cache_info(model, config); @@ -278,6 +279,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& ExecutionConfig config = m_configs_map.at(device_id); config.set_user_property(orig_config); + config.apply_rt_info(model->get_rt_info()); config.apply_user_properties(ctx->get_engine().get_device_info()); ProgramBuilder prog(ctx->get_engine(), config); diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 44758f73289edb..38683dd64cefd9 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -254,6 +254,15 @@ void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) { user_properties.clear(); } +void ExecutionConfig::apply_rt_info(const ov::RTMap& rt_info) { + if (rt_info.find("runtime_options") != rt_info.end()) { + auto runtime_options = rt_info.at("runtime_options").as(); + apply_rt_info_property(ov::hint::kv_cache_precision, runtime_options); + apply_rt_info_property(ov::hint::dynamic_quantization_group_size, runtime_options); + apply_rt_info_property(ov::hint::activations_scale_factor, runtime_options); + } +} + std::string ExecutionConfig::to_string() const { std::stringstream s; s << "internal properties:\n"; diff --git a/src/plugins/intel_gpu/tests/functional/behavior/properties.cpp b/src/plugins/intel_gpu/tests/functional/behavior/properties.cpp new file mode 100644 index 00000000000000..4f635c8522bac5 --- /dev/null +++ b/src/plugins/intel_gpu/tests/functional/behavior/properties.cpp @@ -0,0 +1,86 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/runtime/properties.hpp" +#include "base/ov_behavior_test_utils.hpp" +#include "openvino/runtime/core.hpp" +#include "common_test_utils/subgraph_builders/conv_pool_relu.hpp" + +namespace { + +class TestPropertiesGPU : public ::testing::Test { +public: + std::shared_ptr model; + + void SetUp() override { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + model = ov::test::utils::make_conv_pool_relu(); + } +}; + +TEST_F(TestPropertiesGPU, RTInfoPropertiesWithDefault) { + ov::Core core; + ov::Any type; + ov::Any size; + ov::Any scale; + ov::CompiledModel compiled_model; + model->set_rt_info("f16", "runtime_options", ov::hint::kv_cache_precision.name()); + model->set_rt_info("0", "runtime_options", ov::hint::dynamic_quantization_group_size.name()); + model->set_rt_info("8.0", "runtime_options", ov::hint::activations_scale_factor.name()); + + OV_ASSERT_NO_THROW(compiled_model = core.compile_model(model, ov::test::utils::DEVICE_GPU)); + OV_ASSERT_NO_THROW(type = compiled_model.get_property(ov::hint::kv_cache_precision)); + OV_ASSERT_NO_THROW(size = compiled_model.get_property(ov::hint::dynamic_quantization_group_size)); + OV_ASSERT_NO_THROW(scale = compiled_model.get_property(ov::hint::activations_scale_factor)); + ASSERT_EQ(type.as(), ov::element::f16); + ASSERT_EQ(size.as(), 0); + ASSERT_EQ(scale.as(), 8.0f); +} + +TEST_F(TestPropertiesGPU, RTInfoPropertiesWithUserValuesFromCore) { + ov::Core core; + ov::Any type; + ov::Any size; + ov::Any scale; + ov::CompiledModel compiled_model; + model->set_rt_info("f16", "runtime_options", ov::hint::kv_cache_precision.name()); + model->set_rt_info("0", "runtime_options", ov::hint::dynamic_quantization_group_size.name()); + model->set_rt_info("8.0", "runtime_options", ov::hint::activations_scale_factor.name()); + core.set_property(ov::hint::kv_cache_precision(ov::element::u8)); + core.set_property(ov::hint::dynamic_quantization_group_size(16)); + core.set_property(ov::hint::activations_scale_factor(4.0f)); + + OV_ASSERT_NO_THROW(compiled_model = core.compile_model(model, ov::test::utils::DEVICE_GPU)); + OV_ASSERT_NO_THROW(type = compiled_model.get_property(ov::hint::kv_cache_precision)); + OV_ASSERT_NO_THROW(size = compiled_model.get_property(ov::hint::dynamic_quantization_group_size)); + OV_ASSERT_NO_THROW(scale = compiled_model.get_property(ov::hint::activations_scale_factor)); + ASSERT_EQ(type.as(), ov::element::u8); + ASSERT_EQ(size.as(), 16); + ASSERT_EQ(scale.as(), 4.0f); +} + +TEST_F(TestPropertiesGPU, RTInfoPropertiesWithUserValuesFromCompileModel) { + ov::Core core; + ov::Any type; + ov::Any size; + ov::Any scale; + ov::CompiledModel compiled_model; + model->set_rt_info("f16", "runtime_options", ov::hint::kv_cache_precision.name()); + model->set_rt_info("0", "runtime_options", ov::hint::dynamic_quantization_group_size.name()); + model->set_rt_info("8.0", "runtime_options", ov::hint::activations_scale_factor.name()); + ov::AnyMap config; + config[ov::hint::kv_cache_precision.name()] = "u8"; + config[ov::hint::dynamic_quantization_group_size.name()] = "16"; + config[ov::hint::activations_scale_factor.name()] = "4.0"; + + OV_ASSERT_NO_THROW(compiled_model = core.compile_model(model, ov::test::utils::DEVICE_GPU, config)); + OV_ASSERT_NO_THROW(type = compiled_model.get_property(ov::hint::kv_cache_precision)); + OV_ASSERT_NO_THROW(size = compiled_model.get_property(ov::hint::dynamic_quantization_group_size)); + OV_ASSERT_NO_THROW(scale = compiled_model.get_property(ov::hint::activations_scale_factor)); + ASSERT_EQ(type.as(), ov::element::u8); + ASSERT_EQ(size.as(), 16); + ASSERT_EQ(scale.as(), 4.0f); +} + +} // namespace