Skip to content

Commit

Permalink
ocl: updated CMake dependencies for OpenCL kernel
Browse files Browse the repository at this point in the history
* Made NULL-queue a soft-error (c_dbcsr_acc_opencl_device_synchronize).
* Support c_dbcsr_acc_stream_sync with default-stream (NULL).
* Made default definition of MAD available.
* Fixed c_dbcsr_acc_stream_destroy.
* Updated tuned parameters.
  • Loading branch information
hfp committed Jan 24, 2024
1 parent f150622 commit e4f2120
Show file tree
Hide file tree
Showing 7 changed files with 185 additions and 170 deletions.
6 changes: 4 additions & 2 deletions src/acc/opencl/acc_opencl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1077,8 +1077,10 @@ int c_dbcsr_acc_opencl_device_synchronize(int thread_id) {
for (; i < c_dbcsr_acc_opencl_config.nstreams; ++i) {
void* const stream = streams[i];
if (NULL != stream) {
result = c_dbcsr_acc_stream_sync(stream);
if (EXIT_SUCCESS != result) break;
if (NULL != *ACC_OPENCL_STREAM(stream)) { /* soft-error? */
result = c_dbcsr_acc_stream_sync(stream);
if (EXIT_SUCCESS != result) break;
}
}
# if defined(ACC_OPENCL_STREAM_COMPACT)
else break;
Expand Down
36 changes: 18 additions & 18 deletions src/acc/opencl/acc_opencl_mem.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,11 @@ int c_dbcsr_acc_host_mem_allocate(void** host_mem, size_t nbytes, void* stream)
NULL == host_ptr ? CL_MEM_ALLOC_HOST_PTR : CL_MEM_USE_HOST_PTR, nbytes, host_ptr, &result);
assert(CL_SUCCESS == result || NULL == memory);
if (CL_SUCCESS == result) {
/*const*/ cl_command_queue queue = *ACC_OPENCL_STREAM(
# if defined(ACC_OPENCL_STREAM_NULL)
NULL == stream ? c_dbcsr_acc_opencl_stream_default() :
cl_command_queue queue = *ACC_OPENCL_STREAM(NULL != stream ? stream : c_dbcsr_acc_opencl_stream_default());
# else
cl_command_queue queue = *ACC_OPENCL_STREAM(stream);
# endif
stream);
void* const mapped = clEnqueueMapBuffer(
queue, memory, CL_TRUE /*blocking*/, CL_MAP_READ | CL_MAP_WRITE, 0 /*offset*/, nbytes, 0, NULL, NULL, &result);
assert(CL_SUCCESS == result || NULL == mapped);
Expand Down Expand Up @@ -182,11 +182,11 @@ int c_dbcsr_acc_host_mem_deallocate(void* host_mem, void* stream) {
c_dbcsr_acc_opencl_info_hostptr_t* const meminfo = c_dbcsr_acc_opencl_info_hostptr(host_mem);
if (NULL != meminfo->memory) {
const c_dbcsr_acc_opencl_info_hostptr_t info = *meminfo; /* copy meminfo prior to unmap */
/*const*/ cl_command_queue queue = *ACC_OPENCL_STREAM(
# if defined(ACC_OPENCL_STREAM_NULL)
NULL == stream ? c_dbcsr_acc_opencl_stream_default() :
cl_command_queue queue = *ACC_OPENCL_STREAM(NULL != stream ? stream : c_dbcsr_acc_opencl_stream_default());
# else
cl_command_queue queue = *ACC_OPENCL_STREAM(stream);
# endif
stream);
int result_release;
result = clEnqueueUnmapMemObject(queue, info.memory, info.mapped, 0, NULL, NULL);
# if defined(CL_VERSION_2_0)
Expand Down Expand Up @@ -395,11 +395,11 @@ int c_dbcsr_acc_memcpy_h2d(const void* host_mem, void* dev_mem, size_t nbytes, v
# endif
# endif
{
/*const*/ cl_command_queue queue = *ACC_OPENCL_STREAM(
# if defined(ACC_OPENCL_STREAM_NULL)
NULL == stream ? c_dbcsr_acc_opencl_stream_default() :
cl_command_queue queue = *ACC_OPENCL_STREAM(NULL != stream ? stream : c_dbcsr_acc_opencl_stream_default());
# else
cl_command_queue queue = *ACC_OPENCL_STREAM(stream);
# endif
stream);
result = clEnqueueWriteBuffer(
queue, buffer, 0 == (1 & c_dbcsr_acc_opencl_config.async), offset, nbytes, host_mem, 0, NULL, NULL);
# if defined(ACC_OPENCL_STREAM_NULL)
Expand Down Expand Up @@ -444,11 +444,11 @@ int c_dbcsr_acc_memcpy_d2h(const void* dev_mem, void* host_mem, size_t nbytes, v
# endif
# endif
{
/*const*/ cl_command_queue queue = *ACC_OPENCL_STREAM(
# if defined(ACC_OPENCL_STREAM_NULL)
NULL == stream ? c_dbcsr_acc_opencl_stream_default() :
cl_command_queue queue = *ACC_OPENCL_STREAM(NULL != stream ? stream : c_dbcsr_acc_opencl_stream_default());
# else
cl_command_queue queue = *ACC_OPENCL_STREAM(stream);
# endif
stream);
result = clEnqueueReadBuffer(
queue, buffer, 0 == (2 & c_dbcsr_acc_opencl_config.async), offset, nbytes, host_mem, 0, NULL, NULL);
if (CL_SUCCESS == result) {
Expand Down Expand Up @@ -506,11 +506,11 @@ int c_dbcsr_acc_memcpy_d2d(const void* devmem_src, void* devmem_dst, size_t nbyt
# endif
# endif
{
/*const*/ cl_command_queue queue = *ACC_OPENCL_STREAM(
# if defined(ACC_OPENCL_STREAM_NULL)
NULL == stream ? c_dbcsr_acc_opencl_stream_default() :
cl_command_queue queue = *ACC_OPENCL_STREAM(NULL != stream ? stream : c_dbcsr_acc_opencl_stream_default());
# else
cl_command_queue queue = *ACC_OPENCL_STREAM(stream);
# endif
stream);
if (0 == (2 & c_dbcsr_acc_opencl_config.devcopy)) {
result = clEnqueueCopyBuffer(queue, src, dst, src_offset, dst_offset, nbytes, 0, NULL, NULL);
}
Expand Down Expand Up @@ -579,11 +579,11 @@ int c_dbcsr_acc_opencl_memset(void* dev_mem, int value, size_t offset, size_t nb
# endif
# endif
{
/*const*/ cl_command_queue queue = *ACC_OPENCL_STREAM(
# if defined(ACC_OPENCL_STREAM_NULL)
NULL == stream ? c_dbcsr_acc_opencl_stream_default() :
cl_command_queue queue = *ACC_OPENCL_STREAM(NULL != stream ? stream : c_dbcsr_acc_opencl_stream_default());
# else
cl_command_queue queue = *ACC_OPENCL_STREAM(stream);
# endif
stream);
if (0 == (1 & c_dbcsr_acc_opencl_config.devcopy)) {
static LIBXSMM_TLS cl_long pattern = 0;
size_t size_of_pattern = 1;
Expand Down
15 changes: 10 additions & 5 deletions src/acc/opencl/acc_opencl_stream.c
Original file line number Diff line number Diff line change
Expand Up @@ -248,13 +248,13 @@ int c_dbcsr_acc_stream_destroy(void* stream) {
void** const streams = c_dbcsr_acc_opencl_config.streams + tid * c_dbcsr_acc_opencl_config.nstreams;
for (i = 0; i < c_dbcsr_acc_opencl_config.nstreams; ++i) {
if (stream == streams[i]) {
int k = i;
# if defined(ACC_OPENCL_STREAM_COMPACT)
const int j = i + 1, k = c_dbcsr_acc_opencl_config.nstreams - j;
const int j = i + 1;
if (j < c_dbcsr_acc_opencl_config.nstreams && NULL != streams[j]) { /* compacting streams is not thread-safe */
k = c_dbcsr_acc_opencl_config.nstreams - j;
memmove(streams + i, streams + j, sizeof(void*) * k);
}
# else
const int k = i;
# endif
streams[k] = NULL;
tid = c_dbcsr_acc_opencl_config.nthreads; /* leave outer loop */
Expand Down Expand Up @@ -321,6 +321,7 @@ int c_dbcsr_acc_stream_priority_range(int* least, int* greatest) {


int c_dbcsr_acc_stream_sync(void* stream) {
cl_command_queue queue = NULL;
int result = EXIT_SUCCESS;
# if defined(ACC_OPENCL_STREAM_PRIORITIES)
const int* const priority = NULL;
Expand All @@ -331,8 +332,12 @@ int c_dbcsr_acc_stream_sync(void* stream) {
static const int routine_name_len = (int)sizeof(LIBXSMM_FUNCNAME) - 1;
c_dbcsr_timeset((const char**)&routine_name_ptr, &routine_name_len, &routine_handle);
# endif
assert(NULL != stream);
result = clFinish(*ACC_OPENCL_STREAM(stream));
# if defined(ACC_OPENCL_STREAM_NULL)
queue = *ACC_OPENCL_STREAM(NULL != stream ? stream : c_dbcsr_acc_opencl_stream_default());
# else
queue = *ACC_OPENCL_STREAM(stream);
# endif
result = clFinish(queue);
# if defined(__DBCSR_ACC) && defined(ACC_OPENCL_PROFILE)
c_dbcsr_timestop(&routine_handle);
# endif
Expand Down
11 changes: 9 additions & 2 deletions src/acc/opencl/common/opencl_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,15 @@
# define UNROLL_FORCE(N)
#endif

#define MIN(A, B) ((A) < (B) ? (A) : (B))
#define MAX(A, B) ((A) < (B) ? (B) : (A))
#if !defined(MIN)
# define MIN(A, B) ((A) < (B) ? (A) : (B))
#endif
#if !defined(MAX)
# define MAX(A, B) ((A) < (B) ? (B) : (A))
#endif
#if !defined(MAD)
# define MAD fma
#endif

#if !defined(LU) || (-1 == LU)
# define UNROLL_OUTER(N)
Expand Down
3 changes: 2 additions & 1 deletion src/acc/opencl/smm/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
set(LIBSMM_ACC_HEADER_KERNELS ${CMAKE_CURRENT_SOURCE_DIR}/opencl_kernels.h)

set(SMM_ACC_KERNEL_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/../acc_opencl.sh)
set(SMM_ACC_COMMON ../common/opencl_atomics.h ../common/opencl_common.h)
set(SMM_ACC_KERNELS kernels/multiply.cl kernels/transpose.cl)
list(TRANSFORM SMM_ACC_KERNELS PREPEND ${CMAKE_CURRENT_SOURCE_DIR}/)

Expand All @@ -18,7 +19,7 @@ add_custom_target(
parameters ALL
COMMAND ${SMM_ACC_KERNEL_SCRIPT} ${SMM_ACC_KERNELS} ${SMM_ACC_PARAMS}
${LIBSMM_ACC_HEADER_KERNELS}
DEPENDS ${SMM_ACC_KERNEL_SCRIPT} ${SMM_ACC_KERNELS}
DEPENDS ${SMM_ACC_KERNEL_SCRIPT} ${SMM_ACC_KERNELS} ${SMM_ACC_COMMON}
BYPRODUCTS ${LIBSMM_ACC_HEADER_KERNELS}
COMMENT "ACC/LIBSMM OpenCL: collecting tuned kernel parameters...")

Expand Down
2 changes: 1 addition & 1 deletion src/acc/opencl/smm/opencl_libsmm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1376,7 +1376,7 @@ int libsmm_acc_process(const int* host_param_stack, const int* dev_param_stack,
const int slm_c = (1 != new_config.ac ? 0 : (LIBXSMM_ISPOT(m_max * typesize) + 1));
/* compose build parameters and flags */
nchar = LIBXSMM_SNPRINTF(build_params, sizeof(build_params),
"-DMAD=fma -DT=%s -DINTEL=%u -DGLOBAL=%s -DSWG=%i -DSGS=%i -DFN=%s -DREPEAT=%i -DLU=%i "
"-DT=%s -DINTEL=%u -DGLOBAL=%s -DSWG=%i -DSGS=%i -DFN=%s -DREPEAT=%i -DLU=%i "
"-DSM=%i -DSN=%i -DSK=%i -DBS=%i -DVL=%i %s -DBM=%i -DBN=%i -DBK=%i "
"%s %s %s %s %s %s %s %s ", /* space! */
tname, 0 != devinfo->intel ? devinfo->uid : 0, cmem, (int)new_config.wgsize[kernel_idx], (int)sgs, fname,
Expand Down
Loading

0 comments on commit e4f2120

Please sign in to comment.