From 8b02601fdacd21f4dfdbde65f3cc4e3548a6ce8a Mon Sep 17 00:00:00 2001 From: "Kim, Eddy" Date: Thu, 23 Jan 2025 19:02:26 +0900 Subject: [PATCH] disabled activations scaling for LLMs on all platforms --- src/plugins/intel_gpu/src/plugin/plugin.cpp | 4 +++- src/plugins/intel_gpu/src/runtime/execution_config.cpp | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 4058b38dd78584..81265c26a3c98e 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -28,6 +28,7 @@ #include "openvino/core/deprecated.hpp" #include "openvino/op/gather.hpp" #include "openvino/op/concat.hpp" +#include "openvino/op/paged_attention.hpp" #include "openvino/pass/manager.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "openvino/pass/pattern/op/or.hpp" @@ -84,7 +85,8 @@ const auto is_llm = [](const std::shared_ptr& model) -> bool { auto kvcache_matcher = std::make_shared(present, "KVCacheMatcher"); for (auto& op : model->get_ordered_ops()) { - if (kvcache_matcher->match(op)) { + if (kvcache_matcher->match(op) || + ov::is_type(op)) { return true; } } diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 4ba78c74ee7597..71adc5032491f9 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -276,7 +276,7 @@ void ExecutionConfig::apply_rt_info(const cldnn::device_info& info, const ov::RT if (!info.supports_immad) { apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); } - if (!info.supports_immad || !is_llm) + if (!is_llm) apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); }