Skip to content

Commit

Permalink
[GPU] disabling activations scaling for LLMs on all platforms (#28633)
Browse files Browse the repository at this point in the history
### Details:
- backport of #28632 to
2025.0
  • Loading branch information
e-ddykim authored Jan 23, 2025
1 parent ed5255d commit e67c6cd
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 2 deletions.
4 changes: 3 additions & 1 deletion src/plugins/intel_gpu/src/plugin/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "openvino/core/deprecated.hpp"
#include "openvino/op/gather.hpp"
#include "openvino/op/concat.hpp"
#include "openvino/op/paged_attention.hpp"
#include "openvino/pass/manager.hpp"
#include "openvino/pass/pattern/op/wrap_type.hpp"
#include "openvino/pass/pattern/op/or.hpp"
Expand Down Expand Up @@ -84,7 +85,8 @@ const auto is_llm = [](const std::shared_ptr<const ov::Model>& model) -> bool {
auto kvcache_matcher = std::make_shared<ov::pass::pattern::Matcher>(present, "KVCacheMatcher");

for (auto& op : model->get_ordered_ops()) {
if (kvcache_matcher->match(op)) {
if (kvcache_matcher->match(op) ||
ov::is_type<ov::op::PagedAttentionExtension>(op)) {
return true;
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_gpu/src/runtime/execution_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ void ExecutionConfig::apply_rt_info(const cldnn::device_info& info, const ov::RT
if (!info.supports_immad) {
apply_rt_info_property(ov::hint::kv_cache_precision, rt_info);
}
if (!info.supports_immad || !is_llm)
if (!is_llm)
apply_rt_info_property(ov::hint::activations_scale_factor, rt_info);
apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info);
}
Expand Down

0 comments on commit e67c6cd

Please sign in to comment.