From d4e5cb930dda1ff6196c438c942d500180305a3b Mon Sep 17 00:00:00 2001 From: Sergey Shlyapnikov Date: Thu, 23 Jan 2025 18:01:29 +0400 Subject: [PATCH] [GPU] Fix synchronization in PagedAttention operation when KV-cache rotation is enabled but skipped for the current iteration --- src/plugins/intel_gpu/src/graph/impls/ocl/paged_attention.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/paged_attention.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/paged_attention.cpp index 1bcd4b0bb10fe2..0f285f57e9d18f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/paged_attention.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/paged_attention.cpp @@ -346,7 +346,7 @@ struct paged_attention_impl : multi_stage_primitive { std::vector res_events; std::vector dep_events = events; - if (has_rotated_blocks) { + if (has_rotated_blocks && !_kernels_data[Stage::KV_CACHE_ROTATE].kernels[0].skip_execution) { execute_stage(dep_events, instance, res_events, Stage::KV_CACHE_ROTATE, is_mixed_mode); dep_events = res_events; }