Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allocations: Make buffer kernel more efficient for multiple allocations #2235

Open
wants to merge 1 commit into
base: main
Choose a base branch
from

Conversation

lakshmih
Copy link

  • Fix malloc for 'access_string' and 'kernel_string'.
  • Fix typo in 'number_of_work_itmes'.

- Fix malloc for 'access_string' and 'kernel_string'.
- Fix typo in 'number_of_work_itmes'.
@lakshmih
Copy link
Author

This is how the change would modify the kernel:

Old:

__kernel void sample_test( __global uint *buffer0,  __global uint *buffer1,  __global uint *buffer2,  __global uint *buffer3,  __global uint *result, __global ulong *array_sizes, uint per_item)
{
	int tid = get_global_id(0);
	uint r = 0;
	ulong i;
	for(i=(ulong)tid*(ulong)per_item; i<(ulong)(1+tid)*(ulong)per_item; i++) {
		if (i<array_sizes[0]) r += buffer0[i];
		if (i<array_sizes[1]) r += buffer1[i];
		if (i<array_sizes[2]) r += buffer2[i];
		if (i<array_sizes[3]) r += buffer3[i];
	}
	result[tid] = r;
}

New:

__kernel void sample_test( __global uint *buffer0,  __global uint *buffer1,  __global uint *buffer2,  __global uint *buffer3,  __global uint *result, __global ulong *array_sizes, uint per_item)
{
	int tid = get_global_id(0);
	uint r = 0;
	ulong i;
	ulong end0 = min((ulong)(1+tid)*(ulong)per_item, array_sizes[0]);
	for(i=(ulong)tid*(ulong)per_item; i<end0; i++) {
		r += buffer0[i];
	}
	ulong end1 = min((ulong)(1+tid)*(ulong)per_item, array_sizes[1]);
	for(i=(ulong)tid*(ulong)per_item; i<end1; i++) {
		r += buffer1[i];
	}
	ulong end2 = min((ulong)(1+tid)*(ulong)per_item, array_sizes[2]);
	for(i=(ulong)tid*(ulong)per_item; i<end2; i++) {
		r += buffer2[i];
	}
	ulong end3 = min((ulong)(1+tid)*(ulong)per_item, array_sizes[3]);
	for(i=(ulong)tid*(ulong)per_item; i<end3; i++) {
		r += buffer3[i];
	}
	result[tid] = r;
}

@lakshmih lakshmih requested review from kpet and bashbaug January 23, 2025 16:56
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

1 participant