Skip to content

Commit

Permalink
ocl: modularization of OpenCL kernel code and preparation for CP2K's …
Browse files Browse the repository at this point in the history
…Offload/DBM/DBT

* Exposed interface, and fixed implementation (c_dbcsr_acc_opencl_info_devptr).
* Fixed c_dbcsr_acc_dev_mem_deallocate (svm_interop).
* Moved reusable OpenCL code into common header files.
* Resolve first-level include files (acc_opencl.sh).
* Git-ignore generated files.
* Updated tuned parameters.
  • Loading branch information
hfp committed Jan 9, 2024
1 parent 41dc085 commit e2c80a1
Show file tree
Hide file tree
Showing 6 changed files with 765 additions and 734 deletions.
4 changes: 4 additions & 0 deletions src/acc/opencl/acc_opencl.h
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,9 @@ typedef struct c_dbcsr_acc_opencl_info_hostptr_t {
} c_dbcsr_acc_opencl_info_hostptr_t;
c_dbcsr_acc_opencl_info_hostptr_t* c_dbcsr_acc_opencl_info_hostptr(void* memory);

/** Determines cl_mem object and offset of memory. */
void* c_dbcsr_acc_opencl_info_devptr(const void* memory, const size_t* amount, size_t* offset);

/** Information about streams (c_dbcsr_acc_stream_create). */
typedef struct c_dbcsr_acc_opencl_info_stream_t {
void* pointer;
Expand Down Expand Up @@ -328,6 +331,7 @@ int c_dbcsr_acc_opencl_set_active_device(int thread_id, int device_id);
/** Get preferred multiple and max. size of workgroup (kernel- or device-specific). */
int c_dbcsr_acc_opencl_wgsize(cl_device_id device, cl_kernel kernel, size_t* max_value, size_t* preferred_multiple);
/** Assemble various flags for calling clBuildProgram into the given buffer.*/
/** Combines build-params and build-options, some optional flags (try_build_options), and applies language std. (cl_std). */
int c_dbcsr_acc_opencl_build_flags(const char build_params[], const char build_options[], const char try_build_options[],
const char cl_std[], char buffer[], size_t buffer_size);
/**
Expand Down
18 changes: 10 additions & 8 deletions src/acc/opencl/acc_opencl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -119,15 +119,17 @@ then
fi
fi
trap 'trap_exit' EXIT
RNAME=$(${BASENAME} "$(${DIRNAME} "$1")")
ANAME=$(${TR} '[:lower:]' '[:upper:]' <<<"${RNAME}")
NFILES_OCL=0
for CLFILE in ${*:1:${#@}-1}; do
if [ "${CLFILE##*.}" = "cl" ]; then
if [ -e "${CLFILE}" ]; then
BNAME=$(${BASENAME} "${CLFILE}" .cl)
UNAME=$(${TR} '[:lower:]' '[:upper:]' <<<"${BNAME}")
SNAME=OPENCL_LIBSMM_STRING_${UNAME}
VNAME=opencl_libsmm_source_${BNAME}
MNAME=OPENCL_LIBSMM_SOURCE_${UNAME}
CNAME=$(${BASENAME} "${CLFILE}" .cl | ${SED} "s/${RNAME}_//")
BNAME=$(${TR} '[:lower:]' '[:upper:]' <<<"${CNAME}")
SNAME=OPENCL_${ANAME}_STRING_${BNAME}
VNAME=opencl_${RNAME}_source_${CNAME}
MNAME=OPENCL_${ANAME}_SOURCE_${BNAME}
if [ "0" != "$((0<(NFILES_OCL)))" ]; then
echo
elif [ "${BANNER}" ] && [ "0" != "${BANNER}" ]; then
Expand Down Expand Up @@ -190,9 +192,9 @@ then
done
DEVPAT="s/${DELIM}..*//"
DEVICES=$(for CSVFILE in "${CSVFILES[@]}"; do ${SED} "1d;/^[[:space:]]*$/d;${DEVPAT}" "${CSVFILE}"; done | ${SORT} -u)
SNAME=OPENCL_LIBSMM_STRING_PARAMS_SMM
VNAME=opencl_libsmm_params_smm
DNAME=opencl_libsmm_devices
SNAME=OPENCL_${ANAME}_STRING_PARAMS_SMM
VNAME=opencl_${RNAME}_params_smm
DNAME=opencl_${RNAME}_devices
MNAME=$(${TR} '[:lower:]' '[:upper:]' <<<"${VNAME}")
NNAME=$(${TR} '[:lower:]' '[:upper:]' <<<"${DNAME}")
if [ "${DEVICES}" ]; then
Expand Down
72 changes: 48 additions & 24 deletions src/acc/opencl/acc_opencl_mem.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,34 +61,37 @@ c_dbcsr_acc_opencl_info_hostptr_t* c_dbcsr_acc_opencl_info_hostptr(void* memory)
}


void* c_dbcsr_acc_opencl_info_devptr(const void* memory, size_t* offset) {
void* c_dbcsr_acc_opencl_info_devptr(const void* memory, const size_t* amount, size_t* offset) {
void* result = NULL;
# if defined(ACC_OPENCL_MEM_OFFSET) && LIBXSMM_VERSION4(1, 17, 0, 0) < LIBXSMM_VERSION_NUMBER && \
defined(ACC_OPENCL_HANDLES_MAXCOUNT) && (0 < ACC_OPENCL_HANDLES_MAXCOUNT)
if (NULL != c_dbcsr_acc_opencl_config.clmems && NULL != memory) {
const char* const buffer = (const char*)memory;
char* base = NULL;
const size_t size = ACC_OPENCL_HANDLES_MAXCOUNT * c_dbcsr_acc_opencl_config.nthreads;
size_t i = c_dbcsr_acc_opencl_config.nclmems;
for (; i < size; ++i) {
const size_t n = ACC_OPENCL_HANDLES_MAXCOUNT * c_dbcsr_acc_opencl_config.nthreads;
size_t i = c_dbcsr_acc_opencl_config.nclmems, hit = (size_t)-1;
for (; i < n; ++i) {
void** const handle = c_dbcsr_acc_opencl_config.clmems[i];
char* const mem = (char*)(NULL != handle ? *handle : NULL);
if (NULL != mem) {
if (mem == buffer) { /* fast-path */
result = c_dbcsr_acc_opencl_config.clmems[i];
if (NULL != offset) *offset = 0;
break;
}
else if (base < mem && mem < buffer && NULL != offset) {
result = c_dbcsr_acc_opencl_config.clmems[i];
*offset = buffer - base;
base = mem;
if (mem == buffer) { /* fast-path */
if (NULL != offset) *offset = 0;
result = handle;
break;
}
else {
size_t d = buffer - mem, s = 0;
if (d < hit && NULL != offset &&
(NULL == amount ||
(CL_SUCCESS == clGetMemObjectInfo((cl_mem)mem, CL_MEM_SIZE, sizeof(size_t), &s, NULL) && (*amount + d) <= s)))
{
*offset = hit = d;
result = handle;
}
}
}
}
# else
LIBXSMM_UNUSED(memory);
LIBXSMM_UNUSED(amount);
LIBXSMM_UNUSED(offset);
# endif
return result;
Expand Down Expand Up @@ -261,7 +264,7 @@ int c_dbcsr_acc_dev_mem_allocate(void** dev_mem, size_t nbytes) {
if (NULL != handle) {
*handle = buffer;
# if defined(ACC_OPENCL_MEM_DEBUG)
printf("c_dbcsr_acc_dev_mem_allocate: %p @ %p\n", buffer, handle);
printf("c_dbcsr_acc_dev_mem_allocate: %p size=%llu\n", buffer, (unsigned long long)nbytes);
# endif
}
else result = EXIT_FAILURE;
Expand Down Expand Up @@ -303,16 +306,18 @@ int c_dbcsr_acc_dev_mem_deallocate(void* dev_mem) {
# pragma omp critical(c_dbcsr_acc_dev_mem_deallocate)
# endif
{
void** handle = c_dbcsr_acc_opencl_info_devptr(dev_mem, NULL /*offset*/);
void** handle = c_dbcsr_acc_opencl_info_devptr(dev_mem, NULL /*amount*/, NULL /*offset*/);
if (NULL != handle) {
void** const pfree = c_dbcsr_acc_opencl_config.clmems[c_dbcsr_acc_opencl_config.nclmems];
libxsmm_pfree(pfree, c_dbcsr_acc_opencl_config.clmems, &c_dbcsr_acc_opencl_config.nclmems);
*handle = *pfree;
# if defined(ACC_OPENCL_MEM_DEBUG)
printf("c_dbcsr_acc_dev_mem_deallocate: %p @ %p\n", buffer, handle);
printf("c_dbcsr_acc_dev_mem_deallocate: %p\n", buffer);
# endif
}
# if !defined(NDEBUG) || defined(ACC_OPENCL_MEM_DEBUG)
else result = EXIT_FAILURE;
# endif
}
# endif
# if defined(CL_VERSION_2_0)
Expand Down Expand Up @@ -373,11 +378,15 @@ int c_dbcsr_acc_memcpy_h2d(const void* host_mem, void* dev_mem, size_t nbytes, v
defined(ACC_OPENCL_HANDLES_MAXCOUNT) && (0 < ACC_OPENCL_HANDLES_MAXCOUNT)
assert(NULL != c_dbcsr_acc_opencl_config.clmems);
{
void* const handle = c_dbcsr_acc_opencl_info_devptr(dev_mem, &offset);
void* const handle = c_dbcsr_acc_opencl_info_devptr(dev_mem, &nbytes, &offset);
if (NULL != handle) buffer = *(cl_mem*)handle;
# if !defined(NDEBUG) || defined(ACC_OPENCL_MEM_DEBUG)
else result = EXIT_FAILURE;
# endif
}
# if !defined(NDEBUG) || defined(ACC_OPENCL_MEM_DEBUG)
if (EXIT_SUCCESS == result)
# endif
# endif
{
/*const*/ cl_command_queue queue = *ACC_OPENCL_STREAM(
Expand Down Expand Up @@ -416,11 +425,15 @@ int c_dbcsr_acc_memcpy_d2h(const void* dev_mem, void* host_mem, size_t nbytes, v
defined(ACC_OPENCL_HANDLES_MAXCOUNT) && (0 < ACC_OPENCL_HANDLES_MAXCOUNT)
assert(NULL != c_dbcsr_acc_opencl_config.clmems);
{
void* const handle = c_dbcsr_acc_opencl_info_devptr(dev_mem, &offset);
void* const handle = c_dbcsr_acc_opencl_info_devptr(dev_mem, &nbytes, &offset);
if (NULL != handle) buffer = *(cl_mem*)handle;
# if !defined(NDEBUG) || defined(ACC_OPENCL_MEM_DEBUG)
else result = EXIT_FAILURE;
# endif
}
# if !defined(NDEBUG) || defined(ACC_OPENCL_MEM_DEBUG)
if (EXIT_SUCCESS == result)
# endif
# endif
{
/*const*/ cl_command_queue queue = *ACC_OPENCL_STREAM(
Expand Down Expand Up @@ -469,15 +482,22 @@ int c_dbcsr_acc_memcpy_d2d(const void* devmem_src, void* devmem_dst, size_t nbyt
defined(ACC_OPENCL_HANDLES_MAXCOUNT) && (0 < ACC_OPENCL_HANDLES_MAXCOUNT)
assert(NULL != c_dbcsr_acc_opencl_config.clmems);
{
void* const handle_src = c_dbcsr_acc_opencl_info_devptr(devmem_src, &src_offset);
void* const handle_dst = c_dbcsr_acc_opencl_info_devptr(devmem_dst, &dst_offset);
void* const handle_src = c_dbcsr_acc_opencl_info_devptr(devmem_src, &nbytes, &src_offset);
void* const handle_dst = c_dbcsr_acc_opencl_info_devptr(devmem_dst, &nbytes, &dst_offset);
if (NULL != handle_src) src = *(cl_mem*)handle_src;
# if !defined(NDEBUG) || defined(ACC_OPENCL_MEM_DEBUG)
else result = EXIT_FAILURE;
# endif
if (NULL != handle_dst) dst = *(cl_mem*)handle_dst;
# if !defined(NDEBUG) || defined(ACC_OPENCL_MEM_DEBUG)
else result = EXIT_FAILURE;
# endif
}
# if !defined(NDEBUG) || defined(ACC_OPENCL_MEM_DEBUG)
if (EXIT_SUCCESS == result)
# endif
# endif
if (EXIT_SUCCESS == result) {
{
/*const*/ cl_command_queue queue = *ACC_OPENCL_STREAM(
# if defined(ACC_OPENCL_STREAM_NULL)
NULL == stream ? c_dbcsr_acc_opencl_stream_default() :
Expand Down Expand Up @@ -538,11 +558,15 @@ int c_dbcsr_acc_opencl_memset(void* dev_mem, int value, size_t offset, size_t nb
# if defined(ACC_OPENCL_MEM_OFFSET) && LIBXSMM_VERSION4(1, 17, 0, 0) < LIBXSMM_VERSION_NUMBER && \
defined(ACC_OPENCL_HANDLES_MAXCOUNT) && (0 < ACC_OPENCL_HANDLES_MAXCOUNT)
if (0 == offset && NULL != c_dbcsr_acc_opencl_config.clmems) {
void* const handle = c_dbcsr_acc_opencl_info_devptr(dev_mem, &offset);
void* const handle = c_dbcsr_acc_opencl_info_devptr(dev_mem, &nbytes, &offset);
if (NULL != handle) buffer = *(cl_mem*)handle;
# if !defined(NDEBUG) || defined(ACC_OPENCL_MEM_DEBUG)
else result = EXIT_FAILURE;
# endif
}
# if !defined(NDEBUG) || defined(ACC_OPENCL_MEM_DEBUG)
if (EXIT_SUCCESS == result)
# endif
# endif
{
/*const*/ cl_command_queue queue = *ACC_OPENCL_STREAM(
Expand Down
1 change: 1 addition & 0 deletions src/acc/opencl/smm/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
opencl_kernels.h
.with_gpu
34 changes: 17 additions & 17 deletions src/acc/opencl/smm/opencl_libsmm.c
Original file line number Diff line number Diff line change
Expand Up @@ -444,8 +444,8 @@ int libsmm_acc_init(void) {
memset(&perfest, 0, sizeof(perfest));
if (NULL == env_params || '0' != *env_params) {
char buffer[ACC_OPENCL_BUFFERSIZE], bufname[ACC_OPENCL_BUFFERSIZE], control = '0';
# if defined(OPENCL_LIBSMM_DEVICES)
const int ndevices_params = (int)(sizeof(OPENCL_LIBSMM_DEVICES) / sizeof(*OPENCL_LIBSMM_DEVICES));
# if defined(OPENCL_KERNELS_DEVICES)
const int ndevices_params = (int)(sizeof(OPENCL_KERNELS_DEVICES) / sizeof(*OPENCL_KERNELS_DEVICES));
unsigned int ntuned = 0;
# endif
opencl_libsmm_smm_t config;
Expand All @@ -470,7 +470,7 @@ int libsmm_acc_init(void) {
result = EXIT_FAILURE;
break;
}
# if defined(OPENCL_LIBSMM_DEVICES)
# if defined(OPENCL_KERNELS_DEVICES)
else ++ntuned;
# endif
}
Expand All @@ -494,9 +494,9 @@ int libsmm_acc_init(void) {
}
else control = '2';
}
# if defined(OPENCL_LIBSMM_PARAMS_SMM) && defined(OPENCL_LIBSMM_DEVICES)
# if defined(OPENCL_KERNELS_PARAMS_SMM) && defined(OPENCL_KERNELS_DEVICES)
if (EXIT_SUCCESS == result && '1' != control) {
const char *line = OPENCL_LIBSMM_PARAMS_SMM, *next;
const char *line = OPENCL_KERNELS_PARAMS_SMM, *next;
# if LIBXSMM_VERSION4(1, 17, 0, 0) < LIBXSMM_VERSION_NUMBER
cl_device_id active_id = NULL;
unsigned int active_uid;
Expand All @@ -508,11 +508,11 @@ int libsmm_acc_init(void) {
{
int i = 0, best = 0;
for (; i < ndevices_params; ++i) {
const int score = libxsmm_strimatch(bufname, OPENCL_LIBSMM_DEVICES[i], NULL);
const int score = libxsmm_strimatch(bufname, OPENCL_KERNELS_DEVICES[i], NULL);
unsigned int uid;
if (best < score ||
((best == score) &&
EXIT_SUCCESS == c_dbcsr_acc_opencl_device_uid(NULL /*device*/, OPENCL_LIBSMM_DEVICES[i], &uid) &&
EXIT_SUCCESS == c_dbcsr_acc_opencl_device_uid(NULL /*device*/, OPENCL_KERNELS_DEVICES[i], &uid) &&
uid == active_uid))
{
active_match = i;
Expand All @@ -534,7 +534,7 @@ int libsmm_acc_init(void) {
opencl_libsmm_smm_t* config_init;
const int i = atoi(bufname);
if (0 >= ndevices_params || 0 == c_dbcsr_acc_opencl_config.devmatch || 0 > i || ndevices_params <= i ||
EXIT_SUCCESS != c_dbcsr_acc_opencl_device_uid(NULL /*device*/, OPENCL_LIBSMM_DEVICES[i], &key.devuid))
EXIT_SUCCESS != c_dbcsr_acc_opencl_device_uid(NULL /*device*/, OPENCL_KERNELS_DEVICES[i], &key.devuid))
{
key.devuid = 0;
}
Expand All @@ -559,7 +559,7 @@ int libsmm_acc_init(void) {
EXIT_SUCCESS == c_dbcsr_acc_opencl_device_name(active_id, bufname, ACC_OPENCL_BUFFERSIZE, NULL /*platform*/,
0 /*platform_maxlen*/, /*cleanup*/ 0))
{
fprintf(stderr, "INFO ACC/LIBSMM: PARAMS of \"%s\" used for \"%s\"\n", OPENCL_LIBSMM_DEVICES[i], bufname);
fprintf(stderr, "INFO ACC/LIBSMM: PARAMS of \"%s\" used for \"%s\"\n", OPENCL_KERNELS_DEVICES[i], bufname);
info = 1;
}
}
Expand All @@ -584,7 +584,7 @@ int libsmm_acc_init(void) {
key.devuid = 0;
if (NULL != libxsmm_xregister(&key, sizeof(key), sizeof(config), &config)) {
c_dbcsr_acc_opencl_config.devmatch = 0; /* disable device-match */
# if defined(OPENCL_LIBSMM_DEVICES)
# if defined(OPENCL_KERNELS_DEVICES)
ntuned = LIBXSMM_MAX(ntuned, 1); /* no destinction of overridden or new */
# endif
}
Expand All @@ -594,15 +594,15 @@ int libsmm_acc_init(void) {
fprintf(stderr, "WARN LIBSMM: failed to open parameter file!\n");
}
}
# if defined(OPENCL_LIBSMM_DEVICES)
# if defined(OPENCL_KERNELS_DEVICES)
if (0 != c_dbcsr_acc_opencl_config.verbosity && 0 != ntuned) {
fprintf(stderr, "INFO ACC/LIBSMM: PARAMS in %u set%s loaded targeting ", ntuned, 1 != ntuned ? "s" : "");
if (0 != c_dbcsr_acc_opencl_config.devmatch) {
fprintf(stderr, "%i device%s\n", ndevices_params, 1 != ndevices_params ? "s" : "");
if (1 < c_dbcsr_acc_opencl_config.verbosity || 0 > c_dbcsr_acc_opencl_config.verbosity) {
unsigned int i = 0;
for (; i < (unsigned int)ndevices_params; ++i) {
fprintf(stderr, "INFO ACC/LIBSMM: PARAMS -> \"%s\"\n", OPENCL_LIBSMM_DEVICES[i]);
fprintf(stderr, "INFO ACC/LIBSMM: PARAMS -> \"%s\"\n", OPENCL_KERNELS_DEVICES[i]);
}
}
}
Expand Down Expand Up @@ -763,7 +763,7 @@ c_dbcsr_acc_bool_t libsmm_acc_is_thread_safe(void) {
int libsmm_acc_transpose(const int* dev_trs_stack, int offset, int stack_size, void* dev_data, libsmm_acc_data_t datatype, int m,
int n, int max_kernel_dim, void* stream) {
int result = EXIT_SUCCESS;
# if !defined(OPENCL_LIBSMM_SOURCE_TRANSPOSE)
# if !defined(OPENCL_KERNELS_SOURCE_TRANSPOSE)
result = EXIT_FAILURE;
# else
const int mn = m * n;
Expand Down Expand Up @@ -845,7 +845,7 @@ int libsmm_acc_transpose(const int* dev_trs_stack, int offset, int stack_size, v
}
}
if ('\0' != *tname && 0 < nchar && (int)sizeof(build_params) > nchar) {
result = c_dbcsr_acc_opencl_kernel(0 /*source_is_file*/, OPENCL_LIBSMM_SOURCE_TRANSPOSE, fname, build_params, buffer,
result = c_dbcsr_acc_opencl_kernel(0 /*source_is_file*/, OPENCL_KERNELS_SOURCE_TRANSPOSE, fname, build_params, buffer,
NULL /*try*/, NULL /*try_ok*/, NULL /*extnames*/, 0 /*num_exts*/, &new_config.kernel);
if (EXIT_SUCCESS == result) {
result = c_dbcsr_acc_opencl_wgsize(active_device, new_config.kernel, &wgsize_max, NULL /*prefmult*/);
Expand All @@ -856,7 +856,7 @@ int libsmm_acc_transpose(const int* dev_trs_stack, int offset, int stack_size, v
nchar = LIBXSMM_SNPRINTF(
build_params, sizeof(build_params), param_format, cmem, inplace, fname, m, n, (int)new_config.wgsize, tname);
if (0 < nchar && (int)sizeof(build_params) > nchar) {
result = c_dbcsr_acc_opencl_kernel(0 /*source_is_file*/, OPENCL_LIBSMM_SOURCE_TRANSPOSE, fname, build_params,
result = c_dbcsr_acc_opencl_kernel(0 /*source_is_file*/, OPENCL_KERNELS_SOURCE_TRANSPOSE, fname, build_params,
buffer, NULL /*try*/, NULL /*try_ok*/, NULL /*extnames*/, 0 /*num_exts*/, &new_config.kernel);
}
else result = EXIT_FAILURE;
Expand Down Expand Up @@ -1118,7 +1118,7 @@ int libsmm_acc_process(const int* host_param_stack, const int* dev_param_stack,
c_dbcsr_acc_bool_t def_mnk, void* stream, void* c_stream) {
int result = EXIT_SUCCESS;
const int nparams = 3;
# if !defined(OPENCL_LIBSMM_SOURCE_MULTIPLY)
# if !defined(OPENCL_KERNELS_SOURCE_MULTIPLY)
result = EXIT_FAILURE;
# else
LIBXSMM_UNUSED(c_stream); /* TODO */
Expand Down Expand Up @@ -1565,7 +1565,7 @@ int libsmm_acc_process(const int* host_param_stack, const int* dev_param_stack,
if (EXIT_SUCCESS == result) {
const char* const env_kernel = getenv("OPENCL_LIBSMM_SMM_KERNEL");
result = c_dbcsr_acc_opencl_kernel(NULL == env_kernel ? 0 : 1,
NULL == env_kernel ? OPENCL_LIBSMM_SOURCE_MULTIPLY : env_kernel, fname, build_params, buffer, NULL /*cl_try*/,
NULL == env_kernel ? OPENCL_KERNELS_SOURCE_MULTIPLY : env_kernel, fname, build_params, buffer, NULL /*cl_try*/,
NULL /*cl_try_ok*/, extensions, sizeof(extensions) / sizeof(*extensions), new_config.kernel + kernel_idx);
if (EXIT_SUCCESS == result) {
size_t wgsize_max_kernel = wgsize_max;
Expand Down
Loading

0 comments on commit e2c80a1

Please sign in to comment.