gpt4 book ai didi

c - OpenCL 中的 NDRangeKernel 函数中的局部大小错误

转载 作者:行者123 更新时间:2023-11-30 15:58:14 26 4
gpt4 key购买 nike

当我乘以 2 来增加局部大小值时,尽管 globalsize/localsize 是整数,但出现错误。我无法弄清楚问题出在哪里。

    Kernel:
__kernel void add(__global float *a,
__global float *b,
__global float *answer,
__local float *shared,
__local float *result)
{
int gid = get_global_id(0);
int lid = get_local_id(0);
int lsize = get_local_size(0);

float tempa, tempb;
shared[lid] = a[gid];
shared[lid + lsize] = b[gid];
barrier(CLK_LOCAL_MEM_FENCE);

for(int k = 0; k < lsize; k++){
tempa = shared[lid + k];
tempb = shared[lid + lsize + k];
result[lid + k] = tempa + tempb;
}
barrier(CLK_LOCAL_MEM_FENCE);

answer[gid] = result[lid];
}

假设本地大小为 2^n 。如果 n < 5,则程序正常运行,否则程序崩溃。C 中的主机代码:

size_t global_work_size = n;    //n = 1000000
size_t local_work_size = 32;
size_t sharedSize = (2 * local_work_size) * sizeof(float);
size_t resultSize = local_work_size * sizeof(float);

err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &cmDevBufInA); //HERE
err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &cmDevBufInB);
err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &cmDevBufOut);
err |= clSetKernelArg(kernel, 3, sharedSize, NULL);
err |= clSetKernelArg(kernel, 4, resultSize, NULL);
assert(err == CL_SUCCESS);

//EXECUTION AND READ
cl_event calculation;


err = clEnqueueNDRangeKernel(cmd_queue, kernel, 1, NULL, &global_work_size, &local_work_size,0, NULL, &calculation);
assert(err == CL_SUCCESS);
clFinish(cmd_queue);

最佳答案

“lid + lsize + k”的最大值是多少 tempb = 共享[盖子 + lsize + k];

盖子 = 31 lsize = 32 k = 32

但共享分配为

size_t 共享大小 = (2 * local_work_size) * sizeof(float);

关于c - OpenCL 中的 NDRangeKernel 函数中的局部大小错误,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/9909352/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com