Skip to content

Commit

Permalink
fix profiling execute_multipass
Browse files Browse the repository at this point in the history
- fix clGetDeviceInfo(CL_DEVICE_MAX_WORK_ITEM_SIZES) by using the
proper size

- clamp all localThreads elements with regard to CL_MAX_WORK_GROUP_SIZE

- fix the size using to create/read the output buffer

Fix #2238
  • Loading branch information
rjodinchr committed Jan 22, 2025
1 parent d058dfd commit 3d0cd94
Showing 1 changed file with 24 additions and 10 deletions.
34 changes: 24 additions & 10 deletions test_conformance/profiling/execute_multipass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
//
#include "harness/compat.h"

#include <algorithm>
#include <stdio.h>
#include <string.h>
#include <time.h>
Expand Down Expand Up @@ -97,23 +98,35 @@ static int run_kernel( cl_device_id device, cl_context context, cl_command_queue
cl_ulong queueStart, submitStart, writeStart, writeEnd;
size_t threads[3];
size_t localThreads[3];
size_t maxWorkgroupSize;
int err = 0;

// set thread dimensions
threads[0] = w;
threads[1] = h;
threads[2] = d;

err = clGetDeviceInfo( device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof( cl_uint ), (size_t*)localThreads, NULL );
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
3 * sizeof(size_t), (size_t *)localThreads, NULL);
if (err)
{
localThreads[0] = 256; localThreads[1] = 1; localThreads[2] = 1;
err = 0;
log_error("clGetDeviceInfo(CL_DEVICE_MAX_WORK_ITEM_SIZES) failed\n");
return -1;
}
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t),
&maxWorkgroupSize, NULL);
if (err)
{
log_error("clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed\n");
return -1;
}
if( localThreads[0] > threads[0] )
localThreads[0] = threads[0];
if( localThreads[1] > threads[1] )
localThreads[1] = threads[1];
localThreads[0] =
std::min({ localThreads[0], threads[0], maxWorkgroupSize });
localThreads[1] = std::min(
{ localThreads[1], threads[1], maxWorkgroupSize / localThreads[0] });
localThreads[2] =
std::min({ localThreads[2], threads[2],
maxWorkgroupSize / (localThreads[0] * localThreads[1]) });

cl_sampler sampler = clCreateSampler( context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err );
if( err ){
Expand All @@ -131,9 +144,9 @@ static int run_kernel( cl_device_id device, cl_context context, cl_command_queue
}

// allocate an array memory object to load the filter weights
size_t outptr_size = sizeof(cl_uchar) * w * h * d * nChannels;
memobjs[1] =
clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_float) * w * h * d * nChannels, NULL, &err);
clCreateBuffer(context, CL_MEM_READ_WRITE, outptr_size, NULL, &err);
if( memobjs[1] == (cl_mem)0 ){
log_error( " unable to create array using clCreateBuffer\n" );
clReleaseMemObject( memobjs[0] );
Expand Down Expand Up @@ -237,7 +250,8 @@ static int run_kernel( cl_device_id device, cl_context context, cl_command_queue
}

// read output image
err = clEnqueueReadBuffer(queue, memobjs[1], CL_TRUE, 0, w*h*d*nChannels*4, outptr, 0, NULL, NULL);
err = clEnqueueReadBuffer(queue, memobjs[1], CL_TRUE, 0, outptr_size,
outptr, 0, NULL, NULL);
if( err != CL_SUCCESS ){
print_error( err, "clReadImage failed\n" );
clReleaseKernel( kernel[0] );
Expand Down

0 comments on commit 3d0cd94

Please sign in to comment.