Skip to content

Commit

Permalink
ocl: leverage constant minibatch-size
Browse files Browse the repository at this point in the history
* Allow filtering values written back to global memory (debug).
* Introduced constant minibatch-size (-DBSC).
* Refined/revised loop-unroll (BSC).
  • Loading branch information
hfp committed Oct 19, 2023
1 parent 671a081 commit 304698b
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 15 deletions.
22 changes: 14 additions & 8 deletions src/acc/opencl/smm/kernels/multiply.cl
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ __attribute__((overloadable)) T atomic_add(GLOBAL_VOLATILE(T) *, T);
# endif
# endif
#endif
#define ACCUMULATE(A, B) ATOMIC_ADD_GLOBAL(A, B)

#if !defined(cl_intel_global_float_atomics) || (1 != TN)
# if defined(ATOMIC32_ADD64)
Expand Down Expand Up @@ -282,6 +283,7 @@ FN(global T* restrict cdata, GLOBAL const T* restrict adata, GLOBAL const T* res
for (short m = 0; m < SM; ++m) cnm[m] = ZERO;
# endif
# if defined(SLM_P)
UNROLL_FORCE(3 * BS)
for (int i = idx; i < (3 * batchsize); i += SWG) params[i] = pbase[i] - 1;
# endif
# if defined(BARRIER) && (MAX(1, SGS) < SWG) && (defined(SLM_C) || defined(SLM_P))
Expand All @@ -291,7 +293,11 @@ FN(global T* restrict cdata, GLOBAL const T* restrict adata, GLOBAL const T* res
if (WRK <= idx) return;
# endif
c0 = params[2] - IDXBASE;
# if defined(BSC) && (1 != BK) && (1 != UM)
UNROLL_OUTER(REPEAT * BS)
# else
UNROLL_FORCE(1)
# endif
# if (1 < REPEAT)
for (int item = 0; item < (REPEAT * batchsize); ++item) {
const int i = item % batchsize;
Expand Down Expand Up @@ -413,7 +419,7 @@ FN(global T* restrict cdata, GLOBAL const T* restrict adata, GLOBAL const T* res
if (ZERO != CNM(idx, bn))
# endif
{
ATOMIC_ADD_GLOBAL(&CDX(m, bn + n0), CNM(idx, bn));
ACCUMULATE(&CDX(m, bn + n0), CNM(idx, bn));
CNM(idx, bn) = ZERO; /* reset */
}
}
Expand Down Expand Up @@ -471,7 +477,7 @@ FN(global T* restrict cdata, GLOBAL const T* restrict adata, GLOBAL const T* res
if (ZERO != CNM(idx, bm))
# endif
{
ATOMIC_ADD_GLOBAL(&CDX(bm + m0, n), CNM(idx, bm));
ACCUMULATE(&CDX(bm + m0, n), CNM(idx, bm));
CNM(idx, bm) = ZERO; /* reset */
}
}
Expand Down Expand Up @@ -520,7 +526,7 @@ FN(global T* restrict cdata, GLOBAL const T* restrict adata, GLOBAL const T* res
if (ZERO != CNM(idx, bm))
# endif
{
ATOMIC_ADD_GLOBAL(&CDX(bm + m0, n), CNM(idx, bm));
ACCUMULATE(&CDX(bm + m0, n), CNM(idx, bm));
CNM(idx, bm) = ZERO; /* reset */
}
}
Expand Down Expand Up @@ -578,7 +584,7 @@ FN(global T* restrict cdata, GLOBAL const T* restrict adata, GLOBAL const T* res
if (ZERO != CNM(idx, m))
# endif
{
ATOMIC_ADD_GLOBAL(&CDX(m, idx), CNM(idx, m));
ACCUMULATE(&CDX(m, idx), CNM(idx, m));
CNM(idx, m) = ZERO; /* reset */
}
}
Expand Down Expand Up @@ -620,7 +626,7 @@ FN(global T* restrict cdata, GLOBAL const T* restrict adata, GLOBAL const T* res
if (ZERO != CNM(idx, u))
# endif
{
ATOMIC_ADD_GLOBAL(&CDX(u + m, idx), CNM(idx, u));
ACCUMULATE(&CDX(u + m, idx), CNM(idx, u));
CNM(idx, u) = ZERO; /* reset */
}
# endif
Expand Down Expand Up @@ -658,7 +664,7 @@ FN(global T* restrict cdata, GLOBAL const T* restrict adata, GLOBAL const T* res
if (ZERO != CNM(idx, u))
# endif
{
ATOMIC_ADD_GLOBAL(&CDX(u + m, idx), CNM(idx, u));
ACCUMULATE(&CDX(u + m, idx), CNM(idx, u));
CNM(idx, u) = ZERO; /* reset */
}
# endif
Expand Down Expand Up @@ -695,7 +701,7 @@ FN(global T* restrict cdata, GLOBAL const T* restrict adata, GLOBAL const T* res
# if defined(ATOMIC_INC_NZ)
if (ZERO != CNM(nc, mc)) {
# endif
ATOMIC_ADD_GLOBAL(&CDX(m, n), CNM(nc, mc));
ACCUMULATE(&CDX(m, n), CNM(nc, mc));
CNM(nc, mc) = ZERO; /* reset */
# if defined(ATOMIC_INC_NZ)
}
Expand Down Expand Up @@ -727,7 +733,7 @@ FN(global T* restrict cdata, GLOBAL const T* restrict adata, GLOBAL const T* res
if (ZERO != CNM(idx, m))
# endif
{
ATOMIC_ADD_GLOBAL(&CDX(m, idx), CNM(idx, m));
ACCUMULATE(&CDX(m, idx), CNM(idx, m));
CNM(idx, m) = ZERO; /* reset */
}
}
Expand Down
14 changes: 7 additions & 7 deletions src/acc/opencl/smm/opencl_libsmm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1531,23 +1531,23 @@ int libsmm_acc_process(const int* host_param_stack, const int* dev_param_stack,
/* compose build parameters and flags */
nchar = LIBXSMM_SNPRINTF(build_params, sizeof(build_params),
"-DMAD=fma -DINTEL=%u -DGLOBAL=%s -DSWG=%i -DSGS=%i -DFN=%s -DREPEAT=%i -DLU=%i "
"-DSM=%i -DSN=%i -DSK=%i -DBS=%i -DBM=%i -DBN=%i -DBK=%i -DT=%s -DTN=%i "
"-DSM=%i -DSN=%i -DSK=%i -DBS=%i %s -DBM=%i -DBN=%i -DBK=%i -DT=%s -DTN=%i "
"%s %s %s %s %s %s %s %s %s %s -D\"ATOMIC_ADD_GLOBAL(A,B)=%s\" %s %s",
0 != devinfo->intel ? devinfo->uid : 0, cmem, (int)new_config.wgsize[kernel_idx], (int)sgs, fname,
NULL == env_nrepeat ? 1 : atoi(env_nrepeat), new_config.lu, m_max, n_max, k_max, bs, new_config.bm, new_config.bn,
new_config.bk, tname, datatype, 0 == new_config.nz ? "" : "-DATOMIC_INC_NZ", 0 == new_config.al ? "" : "-DAL",
NULL == env_nrepeat ? 1 : atoi(env_nrepeat), new_config.lu, m_max, n_max, k_max, bs,
bs == new_config.bs ? "-DBSC" : "", new_config.bm, new_config.bn, new_config.bk, tname, datatype,
0 == new_config.nz ? "" : "-DATOMIC_INC_NZ", 0 == new_config.al ? "" : "-DAL",
0 == new_config.tb ? "" : "-DTRACK_B", 0 != new_config.tc ? "-DTRACK_C" : "", 0 == new_config.ap ? "" : "-DSLM_P",
0 == new_config.aa ? "" : (1 == new_config.aa ? "-DSLM_A=1" : (2 == new_config.aa ? "-DSLM_A=2" : "-DREG_A")),
0 == new_config.ab ? "" : (1 == new_config.ab ? "-DSLM_B=1" : (2 == new_config.ab ? "-DSLM_B=2" : "-DREG_B")),
0 == new_config.ac ? "" : (1 == new_config.ac ? "-DSLM_C=1" : "-DSLM_C=2"), atomic_type, atomic_ops, atomic_exp,
atomic_expr2, barrier_expr);
if (0 < nchar && (int)sizeof(build_params) > nchar) {
const char* const cl_debug = (
# if !defined(NDBGDEV)
const char* const cl_debug = ((0 != devinfo->intel && CL_DEVICE_TYPE_CPU != device_type) ? "-gline-tables-only"
: "");
# else
const char* const cl_debug = "";
(0 != devinfo->intel && CL_DEVICE_TYPE_CPU != device_type) ? "-gline-tables-only" :
# endif
"");
nchar = LIBXSMM_SNPRINTF(buffer, sizeof(buffer), "-cl-fast-relaxed-math -cl-denorms-are-zero %s %s %s",
NULL == env_cl ? "" : env_cl, (0 == new_config.flags || 0 == devinfo->intel) ? "" : intel_xf, cl_debug);
if (0 >= nchar || (int)sizeof(buffer) <= nchar) result = EXIT_FAILURE;
Expand Down

0 comments on commit 304698b

Please sign in to comment.