Skip to content

Commit

Permalink
ocl: adjusted finalization-flow of OpenCL backend
Browse files Browse the repository at this point in the history
* Fixed case when ACC_OPENCL_MEM_DEVPTR is turned off at compile-time.
* Ensure termination message appears one time at most (cleanup).
* Introduced ACC_OPENCL_EVENT_CHAIN and ACC_OPENCL_EVENT_WAIT.
* Removed compile-time option for ACC_OPENCL_MEM_CPYSYNC.
* Removed compile-time setting (ACC_OPENCL_STREAM_NULL).
* Introduced WA-levels to distinct certain WAs.
* Ensure final cleanup (atexit).
  • Loading branch information
hfp committed Mar 6, 2024
1 parent 24f24f2 commit ab07d39
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 125 deletions.
115 changes: 60 additions & 55 deletions src/acc/opencl/acc_opencl.c
Original file line number Diff line number Diff line change
Expand Up @@ -138,15 +138,52 @@ int c_dbcsr_acc_opencl_order_devices(const void* dev_a, const void* dev_b) {
}


LIBXSMM_ATTRIBUTE_CTOR void c_dbcsr_acc_opencl_init(void) {
/* attempt to automatically initialize backend */
ACC_OPENCL_EXPECT(EXIT_SUCCESS == c_dbcsr_acc_init());
}
/* attempt to automatically initialize backend */
LIBXSMM_ATTRIBUTE_CTOR void c_dbcsr_acc_opencl_init(void) { ACC_OPENCL_EXPECT(EXIT_SUCCESS == c_dbcsr_acc_init()); }


/* attempt to automatically finalize backend */
LIBXSMM_ATTRIBUTE_DTOR void c_dbcsr_acc_opencl_finalize(void) {
/* attempt to automatically finalize backend */
ACC_OPENCL_EXPECT(EXIT_SUCCESS == c_dbcsr_acc_finalize());
assert(c_dbcsr_acc_opencl_config.ndevices < ACC_OPENCL_MAXNDEVS);
if (0 != c_dbcsr_acc_opencl_config.ndevices) {
int i;
for (i = 0; i < ACC_OPENCL_MAXNDEVS; ++i) {
const cl_device_id device_id = c_dbcsr_acc_opencl_config.devices[i];
if (NULL != device_id) {
# if defined(CL_VERSION_1_2) && defined(_DEBUG)
ACC_OPENCL_EXPECT(EXIT_SUCCESS == clReleaseDevice(device_id));
# endif
/* c_dbcsr_acc_opencl_create_context scans for non-NULL devices */
c_dbcsr_acc_opencl_config.devices[i] = NULL;
}
}
if (NULL != c_dbcsr_acc_opencl_config.device.stream.queue) { /* release private stream */
clReleaseCommandQueue(c_dbcsr_acc_opencl_config.device.stream.queue); /* ignore return code */
}
if (NULL != c_dbcsr_acc_opencl_config.device.context) {
const cl_context context = c_dbcsr_acc_opencl_config.device.context;
c_dbcsr_acc_opencl_config.device.context = NULL;
clReleaseContext(context); /* ignore return code */
}
for (i = 0; i < ACC_OPENCL_NLOCKS; ++i) { /* destroy locks */
ACC_OPENCL_DESTROY((ACC_OPENCL_LOCKTYPE*)(c_dbcsr_acc_opencl_locks + ACC_OPENCL_CACHELINE * i));
}
/* release/reset buffers */
# if defined(ACC_OPENCL_MEM_DEVPTR)
free(c_dbcsr_acc_opencl_config.memptrs);
free(c_dbcsr_acc_opencl_config.memptr_data);
# endif
free(c_dbcsr_acc_opencl_config.streams);
free(c_dbcsr_acc_opencl_config.stream_data);
free(c_dbcsr_acc_opencl_config.events);
free(c_dbcsr_acc_opencl_config.event_data);
/* clear entire configuration structure */
memset(&c_dbcsr_acc_opencl_config, 0, sizeof(c_dbcsr_acc_opencl_config));
# if defined(ACC_OPENCL_CACHE_DID)
c_dbcsr_acc_opencl_active_id = 0; /* reset cached active device-ID */
# endif
libxsmm_finalize();
}
}


Expand Down Expand Up @@ -178,7 +215,7 @@ int c_dbcsr_acc_init(void) {
const int nccs = (NULL == env_nccs ? ACC_OPENCL_NCCS : atoi(env_nccs));
# endif
const char *const env_neo = getenv("NEOReadDebugKeys"), *const env_wa = getenv("ACC_OPENCL_WA");
const int neo = (NULL == env_neo ? 1 : atoi(env_neo)), wa = neo * (NULL == env_wa ? 1 : atoi(env_wa));
const int neo = (NULL == env_neo ? 1 : atoi(env_neo)), wa = neo * (NULL == env_wa ? 2 : atoi(env_wa));
# if defined(ACC_OPENCL_ASYNC)
const char* const env_async = (ACC_OPENCL_ASYNC);
const int async_default = 3;
Expand Down Expand Up @@ -257,15 +294,13 @@ int c_dbcsr_acc_init(void) {
# endif
if (0 != wa) { /* environment is populated before touching the compute runtime */
static char* key_value[] = {
"NEOReadDebugKeys=1", "DirectSubmissionOverrideBlitterSupport=0", "EnableRecoverablePageFaults=0"};
for (i = 0; i < sizeof(key_value) / sizeof(*key_value); ++i) {
const char* const sep = strchr(key_value[i], '=');
const size_t n = (NULL != sep ? (sep - key_value[i]) : 0);
if (0 < n && n < ACC_OPENCL_BUFFERSIZE) {
memcpy(buffer, key_value[i], n);
buffer[n] = '\0';
ACC_OPENCL_EXPECT(NULL != getenv(buffer) || 0 == LIBXSMM_PUTENV(key_value[i]));
}
"NEOReadDebugKeys=1", "EnableRecoverablePageFaults=0", "DirectSubmissionOverrideBlitterSupport=0"};
if (NULL == env_neo) ACC_OPENCL_EXPECT(0 == LIBXSMM_PUTENV(key_value[0]));
if (NULL == getenv("EnableRecoverablePageFaults")) {
ACC_OPENCL_EXPECT(0 == LIBXSMM_PUTENV(key_value[1]));
}
if (NULL == getenv("DirectSubmissionOverrideBlitterSupport") && 2 <= wa) {
ACC_OPENCL_EXPECT(0 == LIBXSMM_PUTENV(key_value[2]));
}
}
# if defined(ACC_OPENCL_CACHE_DIR)
Expand Down Expand Up @@ -612,15 +647,15 @@ int c_dbcsr_acc_finalize(void) {
# else
int result = EXIT_SUCCESS;
# endif
static void (*cleanup)(void) = c_dbcsr_acc_opencl_finalize;
# if defined(__DBCSR_ACC) && defined(ACC_OPENCL_PROFILE)
int routine_handle;
static const char* const routine_name_ptr = LIBXSMM_FUNCNAME;
static const int routine_name_len = (int)sizeof(LIBXSMM_FUNCNAME) - 1;
c_dbcsr_timeset((const char**)&routine_name_ptr, &routine_name_len, &routine_handle);
# endif
if (0 != c_dbcsr_acc_opencl_config.ndevices) {
int i;
assert(c_dbcsr_acc_opencl_config.ndevices < ACC_OPENCL_MAXNDEVS);
assert(c_dbcsr_acc_opencl_config.ndevices < ACC_OPENCL_MAXNDEVS);
if (0 != c_dbcsr_acc_opencl_config.ndevices && NULL != cleanup) {
if (0 != c_dbcsr_acc_opencl_config.verbosity) {
cl_device_id device = NULL;
int d;
Expand All @@ -642,42 +677,8 @@ int c_dbcsr_acc_finalize(void) {
*/
if (EXIT_SUCCESS == result) result = libsmm_acc_finalize();
# endif
libxsmm_finalize();
for (i = 0; i < ACC_OPENCL_MAXNDEVS; ++i) {
const cl_device_id device_id = c_dbcsr_acc_opencl_config.devices[i];
if (NULL != device_id) {
# if defined(CL_VERSION_1_2) && defined(_DEBUG)
ACC_OPENCL_CHECK(clReleaseDevice(device_id), "release device", result);
# endif
/* c_dbcsr_acc_opencl_create_context scans for non-NULL devices */
c_dbcsr_acc_opencl_config.devices[i] = NULL;
}
}
if (NULL != c_dbcsr_acc_opencl_config.device.stream.queue) { /* release private stream */
clReleaseCommandQueue(c_dbcsr_acc_opencl_config.device.stream.queue); /* ignore return code */
}
if (NULL != c_dbcsr_acc_opencl_config.device.context) {
const cl_context context = c_dbcsr_acc_opencl_config.device.context;
c_dbcsr_acc_opencl_config.device.context = NULL;
clReleaseContext(context); /* ignore return code */
}
for (i = 0; i < ACC_OPENCL_NLOCKS; ++i) { /* destroy locks */
ACC_OPENCL_DESTROY((ACC_OPENCL_LOCKTYPE*)(c_dbcsr_acc_opencl_locks + ACC_OPENCL_CACHELINE * i));
}
/* release/reset buffers */
# if defined(ACC_OPENCL_MEM_DEVPTR)
free(c_dbcsr_acc_opencl_config.memptrs);
free(c_dbcsr_acc_opencl_config.memptr_data);
# endif
free(c_dbcsr_acc_opencl_config.streams);
free(c_dbcsr_acc_opencl_config.stream_data);
free(c_dbcsr_acc_opencl_config.events);
free(c_dbcsr_acc_opencl_config.event_data);
/* clear entire configuration structure */
memset(&c_dbcsr_acc_opencl_config, 0, sizeof(c_dbcsr_acc_opencl_config));
# if defined(ACC_OPENCL_CACHE_DID)
c_dbcsr_acc_opencl_active_id = 0; /* reset cached active device-ID */
# endif
if (EXIT_SUCCESS == result) result = atexit(cleanup);
cleanup = NULL;
}
# if defined(__DBCSR_ACC) && defined(ACC_OPENCL_PROFILE)
c_dbcsr_timestop(&routine_handle);
Expand Down Expand Up @@ -997,8 +998,10 @@ int c_dbcsr_acc_opencl_set_active_device(ACC_OPENCL_LOCKTYPE* lock, int device_i
CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0 /* terminator */
};
# endif
# if defined(ACC_OPENCL_MEM_DEVPTR)
cl_platform_id platform = NULL;
cl_bitfield bitfield = 0;
# endif
c_dbcsr_acc_opencl_config.device.intel = (EXIT_SUCCESS ==
c_dbcsr_acc_opencl_device_vendor(active_id, "intel", 0 /*use_platform_name*/));
c_dbcsr_acc_opencl_config.device.nv = (EXIT_SUCCESS ==
Expand Down Expand Up @@ -1026,6 +1029,7 @@ int c_dbcsr_acc_opencl_set_active_device(ACC_OPENCL_LOCKTYPE* lock, int device_i
{
c_dbcsr_acc_opencl_config.device.unified = CL_FALSE;
}
# if defined(ACC_OPENCL_MEM_DEVPTR)
if (0 != (4 & c_dbcsr_acc_opencl_config.xhints) && 2 <= *c_dbcsr_acc_opencl_config.device.std_level &&
0 != c_dbcsr_acc_opencl_config.device.intel && 0 == c_dbcsr_acc_opencl_config.device.unified &&
EXIT_SUCCESS == clGetDeviceInfo(active_id, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &platform, NULL) &&
Expand All @@ -1046,6 +1050,7 @@ int c_dbcsr_acc_opencl_set_active_device(ACC_OPENCL_LOCKTYPE* lock, int device_i
ptr = clGetExtensionFunctionAddressForPlatform(platform, "clMemFreeINTEL");
LIBXSMM_ASSIGN127(&c_dbcsr_acc_opencl_config.device.clMemFreeINTEL, &ptr);
}
# endif
# if defined(ACC_OPENCL_CMDAGR)
if (0 != c_dbcsr_acc_opencl_config.device.intel) { /* device vendor (above) can now be used */
int result_cmdagr = EXIT_SUCCESS;
Expand Down
22 changes: 11 additions & 11 deletions src/acc/opencl/acc_opencl.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,6 @@
#ifndef ACC_OPENCL_H
#define ACC_OPENCL_H

#if defined(__OFFLOAD_OPENCL) && !defined(__OPENCL)
# define __OPENCL
#endif

#if defined(__OPENCL)
# if !defined(CL_TARGET_OPENCL_VERSION)
# define CL_TARGET_OPENCL_VERSION 220
Expand Down Expand Up @@ -108,11 +104,7 @@
# define ACC_OPENCL_STREAM_PRIORITIES
# endif
#endif
/* Stream-argument (ACC-interface) can be NULL (synchronous) */
#if !defined(ACC_OPENCL_STREAM_NULL) && 1
# define ACC_OPENCL_STREAM_NULL
#endif
/* Support arithmetic for device-pointers (DBM) */
/* Support arithmetic for device-pointers */
#if !defined(ACC_OPENCL_MEM_DEVPTR) && 1
# define ACC_OPENCL_MEM_DEVPTR
#endif
Expand Down Expand Up @@ -169,6 +161,11 @@
clCreateCommandQueue(CTX, DEV, (cl_command_queue_properties)(NULL != (PROPS) ? ((PROPS)[1]) : 0), RESULT)
#endif

/* Support for other libraries, e.g., CP2K's DBM/DBT */
#if defined(ACC_OPENCL_MEM_DEVPTR) && defined(__OFFLOAD_OPENCL) && !defined(__OPENCL)
# define __OPENCL
#endif

#if LIBXSMM_VERSION4(1, 17, 0, 0) < LIBXSMM_VERSION_NUMBER
# define ACC_OPENCL_EXPECT(EXPR) LIBXSMM_EXPECT(EXPR)
# define LIBXSMM_STRISTR libxsmm_stristr
Expand Down Expand Up @@ -252,7 +249,10 @@ typedef struct c_dbcsr_acc_opencl_stream_t {
typedef struct c_dbcsr_acc_opencl_device_t {
/** Activated device context. */
cl_context context;
/** Stream for internal purpose. */
/**
* Stream for internal purpose, e.g., stream-argument
* (ACC-interface) can be NULL (synchronous)
*/
c_dbcsr_acc_opencl_stream_t stream;
/** OpenCL compiler flag (language standard). */
char std_flag[16];
Expand Down Expand Up @@ -353,7 +353,7 @@ int c_dbcsr_acc_opencl_info_devptr(
c_dbcsr_acc_opencl_info_memptr_t* info, const void* memory, size_t elsize, const size_t* amount, size_t* offset);
/** Finds an existing stream for the given thread-ID (or NULL). */
const c_dbcsr_acc_opencl_stream_t* c_dbcsr_acc_opencl_stream(ACC_OPENCL_LOCKTYPE* lock, int thread_id);
/** Determines default-stream (see ACC_OPENCL_STREAM_NULL). */
/** Determines default-stream (see c_dbcsr_acc_opencl_device_t::stream). */
const c_dbcsr_acc_opencl_stream_t* c_dbcsr_acc_opencl_stream_default(void);
/** Like c_dbcsr_acc_memset_zero, but supporting an arbitrary value used as initialization pattern. */
int c_dbcsr_acc_opencl_memset(void* dev_mem, int value, size_t offset, size_t nbytes, void* stream);
Expand Down
40 changes: 25 additions & 15 deletions src/acc/opencl/acc_opencl_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
# if !defined(ACC_OPENCL_EVENT_FLUSH) && 0
# define ACC_OPENCL_EVENT_FLUSH
# endif
# if !defined(ACC_OPENCL_EVENT_CHAIN) && 0
# define ACC_OPENCL_EVENT_CHAIN
# endif
# if !defined(ACC_OPENCL_EVENT_WAIT) && 0
# define ACC_OPENCL_EVENT_WAIT
# endif


# if defined(__cplusplus)
Expand Down Expand Up @@ -76,21 +82,19 @@ int c_dbcsr_acc_stream_wait_event(void* stream, void* event) { /* wait for an ev
static const int routine_name_len = (int)sizeof(LIBXSMM_FUNCNAME) - 1;
c_dbcsr_timeset((const char**)&routine_name_ptr, &routine_name_len, &routine_handle);
# endif
# if defined(ACC_OPENCL_STREAM_NULL)
str = (NULL != stream ? ACC_OPENCL_STREAM(stream) : c_dbcsr_acc_opencl_stream_default());
# else
str = ACC_OPENCL_STREAM(stream);
# endif
assert(NULL != str && NULL != str->queue && NULL != event);
clevent = *ACC_OPENCL_EVENT(event);
if (NULL != clevent) {
# if defined(CL_VERSION_1_2)
cl_event clevent_result = NULL;
result = clEnqueueBarrierWithWaitList(str->queue, 1, &clevent, &clevent_result);
if (EXIT_SUCCESS == result) {
# if defined(ACC_OPENCL_EVENT_CHAIN)
result = clReleaseEvent(clevent);
assert(NULL != clevent_result);
*(cl_event*)event = (EXIT_SUCCESS == result ? clevent_result : NULL);
# endif
}
else
# else
Expand Down Expand Up @@ -122,23 +126,29 @@ int c_dbcsr_acc_event_record(void* event, void* stream) {
static const int routine_name_len = (int)sizeof(LIBXSMM_FUNCNAME) - 1;
c_dbcsr_timeset((const char**)&routine_name_ptr, &routine_name_len, &routine_handle);
# endif
# if defined(ACC_OPENCL_STREAM_NULL)
str = (NULL != stream ? ACC_OPENCL_STREAM(stream) : c_dbcsr_acc_opencl_stream_default());
# else
str = ACC_OPENCL_STREAM(stream);
# endif
assert(NULL != str && NULL != str->queue && NULL != event);
clevent = *ACC_OPENCL_EVENT(event);
# if defined(ACC_OPENCL_EVENT_FLUSH)
result = clFlush(str->queue);
if (EXIT_SUCCESS == result)
# endif
{
# if defined(CL_VERSION_1_2)
result = (NULL == clevent ? clEnqueueMarkerWithWaitList(str->queue, 0, NULL, &clevent_result)
: clEnqueueMarkerWithWaitList(str->queue, 1, &clevent, &clevent_result));
# if defined(ACC_OPENCL_EVENT_WAIT)
if (NULL != clevent) result = clEnqueueMarkerWithWaitList(str->queue, 1, &clevent, &clevent_result);
else
# endif
{
result = clEnqueueMarkerWithWaitList(str->queue, 0, NULL, &clevent_result);
}
# else
if (NULL != clevent) result = clEnqueueWaitForEvents(str->queue, 1, &clevent);
if (EXIT_SUCCESS == result) result = clEnqueueMarker(str->queue, &clevent_result);
# endif
# if defined(ACC_OPENCL_EVENT_FLUSH)
if (EXIT_SUCCESS == result) result = clFlush(str->queue);
# if defined(ACC_OPENCL_EVENT_WAIT)
if (NULL != clevent) result = clEnqueueWaitForEvents(str->queue, 1, &clevent);
# endif
if (EXIT_SUCCESS == result) result = clEnqueueMarker(str->queue, &clevent_result);
# endif
}
if (NULL != clevent) {
const int result_release = clReleaseEvent(clevent);
if (EXIT_SUCCESS == result) result = result_release;
Expand Down
Loading

0 comments on commit ab07d39

Please sign in to comment.