From fc4964cd8c44b3369d37203429fb83c9b894d700 Mon Sep 17 00:00:00 2001 From: Wang Wangwang Date: Tue, 7 Jan 2025 11:45:03 +0800 Subject: [PATCH 1/3] Fix reorder ops in cpu implementation with cl_mem --- src/plugins/intel_gpu/src/graph/impls/cpu/reorder.cpp | 2 +- src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.hpp | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/reorder.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/reorder.cpp index fc7eb5329c362a..24d0df3c1116f9 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/reorder.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/reorder.cpp @@ -56,7 +56,7 @@ struct reorder_impl : public typed_primitive_impl { auto output_mem_ptr = instance.output_memory_ptr(); cldnn::mem_lock input_lock(input_mem_ptr, stream); - cldnn::mem_lock output_lock(output_mem_ptr, stream); + cldnn::mem_lock output_lock(output_mem_ptr, stream); input_host_tensors.push_back(make_tensor(params->input_layouts[0], input_lock.data())); output_host_tensors.push_back(make_tensor(params->output_layouts[0], output_lock.data())); diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.hpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.hpp index e37518de3982a8..c3a3f126c6ed10 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.hpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.hpp @@ -39,6 +39,9 @@ struct gpu_buffer : public lockable_gpu_mem, public memory { assert(0 == _lock_count); return _buffer; } + void* buffer_ptr() const override { + return _buffer.get(); + } event::ptr copy_from(stream& stream, const void* data_ptr, size_t src_offset, size_t dst_offset, size_t size, bool blocking) override; event::ptr copy_from(stream& stream, const memory& src_mem, size_t src_offset, size_t dst_offset, size_t size, bool blocking) override; From 550d39a4489f04076a223dfb1cde3d3daaca3372 Mon Sep 17 00:00:00 2001 From: Wang Wangwang Date: Tue, 7 Jan 2025 14:14:38 +0800 Subject: [PATCH 2/3] Update src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.hpp Co-authored-by: River Li --- src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.hpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.hpp index c3a3f126c6ed10..9aeedd07eefe4e 100644 --- a/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.hpp +++ b/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.hpp @@ -40,7 +40,7 @@ struct gpu_buffer : public lockable_gpu_mem, public memory { return _buffer; } void* buffer_ptr() const override { - return _buffer.get(); + return get_buffer().get(); } event::ptr copy_from(stream& stream, const void* data_ptr, size_t src_offset, size_t dst_offset, size_t size, bool blocking) override; From 1b4468aa6da4920a4fb7dc9727a39f6bb07e9dd8 Mon Sep 17 00:00:00 2001 From: Wang Wangwang Date: Thu, 23 Jan 2025 14:39:32 +0800 Subject: [PATCH 3/3] Update access modifiers for CPU impls --- src/plugins/intel_gpu/src/graph/impls/cpu/broadcast.cpp | 2 +- src/plugins/intel_gpu/src/graph/impls/cpu/concat.cpp | 2 +- src/plugins/intel_gpu/src/graph/impls/cpu/fake_convert.cpp | 2 +- src/plugins/intel_gpu/src/graph/impls/cpu/gather.cpp | 2 +- src/plugins/intel_gpu/src/graph/impls/cpu/range.cpp | 2 +- src/plugins/intel_gpu/src/graph/impls/cpu/scatter_update.cpp | 2 +- src/plugins/intel_gpu/src/graph/impls/cpu/select.cpp | 2 +- src/plugins/intel_gpu/src/graph/impls/cpu/tile.cpp | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/broadcast.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/broadcast.cpp index bb159019053124..f0f5929fa767da 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/broadcast.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/broadcast.cpp @@ -99,7 +99,7 @@ struct broadcast_impl : public typed_primitive_impl { auto output_mem_ptr = instance.output_memory_ptr(); - cldnn::mem_lock output_lock(output_mem_ptr, stream); + cldnn::mem_lock output_lock(output_mem_ptr, stream); output_host_tensors.push_back(make_tensor(params->output_layouts[0], output_lock.data())); OPENVINO_ASSERT(op->evaluate(output_host_tensors, input_host_tensors), diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/concat.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/concat.cpp index c542ab21a20183..7b9d9a70c99eb5 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/concat.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/concat.cpp @@ -80,7 +80,7 @@ struct concatenation_impl : public typed_primitive_impl { auto output_mem_ptr = instance.output_memory_ptr(); - cldnn::mem_lock output_lock(output_mem_ptr, stream); + cldnn::mem_lock output_lock(output_mem_ptr, stream); output_host_tensors.push_back(make_tensor(params->output_layouts[0], output_lock.data())); diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/fake_convert.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/fake_convert.cpp index a5f94741c40bf5..bcbf05e5af7593 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/fake_convert.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/fake_convert.cpp @@ -74,7 +74,7 @@ struct fake_convert_impl : public typed_primitive_impl { auto output_mem_ptr = instance.output_memory_ptr(); - cldnn::mem_lock output_lock(output_mem_ptr, stream); + cldnn::mem_lock output_lock(output_mem_ptr, stream); for (size_t i = 0; i < input_mem_ptrs.size(); i++) input_host_tensors.push_back(make_tensor(params->input_layouts[i], input_mem_ptrs[i]->lock(stream, mem_lock_type::read))); diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/gather.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/gather.cpp index 130ee6f8181cf0..ae25e630a3d2aa 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/gather.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/gather.cpp @@ -82,7 +82,7 @@ struct gather_impl : public typed_primitive_impl { auto output_mem_ptr = instance.output_memory_ptr(); - cldnn::mem_lock output_lock(output_mem_ptr, stream); + cldnn::mem_lock output_lock(output_mem_ptr, stream); for (size_t i = 0; i < input_mem_ptrs.size(); i++) input_host_tensors.push_back(make_tensor(params->input_layouts[i], input_mem_ptrs[i]->lock(stream, mem_lock_type::read))); diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/range.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/range.cpp index 9d4aafb812cbc9..9cadc1d412921b 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/range.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/range.cpp @@ -55,7 +55,7 @@ struct range_impl : public typed_primitive_impl { auto output_mem_ptr = instance.output_memory_ptr(); - cldnn::mem_lock output_lock(output_mem_ptr, stream); + cldnn::mem_lock output_lock(output_mem_ptr, stream); for (size_t i = 0; i < input_mem_ptrs.size(); i++) input_host_tensors.push_back(make_tensor(params->input_layouts[i], input_mem_ptrs[i]->lock(stream, mem_lock_type::read))); diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/scatter_update.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/scatter_update.cpp index 7e2b2beadf74c0..3953e0ab0234ae 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/scatter_update.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/scatter_update.cpp @@ -71,7 +71,7 @@ struct scatter_update_impl : public typed_primitive_impl { auto output_mem_ptr = instance.output_memory_ptr(); - cldnn::mem_lock output_lock(output_mem_ptr, stream); + cldnn::mem_lock output_lock(output_mem_ptr, stream); for (size_t i = 0; i < input_mem_ptrs.size(); i++) input_host_tensors.push_back(make_tensor(params->input_layouts[i], input_mem_ptrs[i]->lock(stream, mem_lock_type::read))); diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/select.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/select.cpp index 7c9c501bedf8de..26692703295c94 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/select.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/select.cpp @@ -76,7 +76,7 @@ struct select_impl : public typed_primitive_impl