gpt4 book ai didi

c - 使用 OpenCL 处理的大型阵列的错误信息

转载 作者:太空宇宙 更新时间:2023-11-04 04:01:51 26 4
gpt4 key购买 nike

当我使用大型数组 (1000 x 10000) 时,我遇到了内核执行问题,它没有在正确的位置写入信息。但是对于小数组,没有问题,我检索到正确的结果。对于内核执行,我使用 ATI Mobility RADEON HD 4300 系列的 GPU。

C 代码示例是:

#include <stdio.h>
#include <stdlib.h>

#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif

#define MAX_SOURCE_SIZE (0x100000)
#define MAX_SIZE 108
#define NCOLS 1000
#define NROWS 10000

int main(void) {
char* source_name = "mykernel.cl";
char* source_code;
size_t source_size;
cl_platform_id platformId = NULL;
cl_uint nbplatforms;
cl_device_id deviceId = NULL;
cl_uint nbdevices;
cl_context context = NULL;
cl_int errcode;
cl_command_queue commandQueue = NULL;
cl_program program;
size_t global_work_size[2];
size_t local_work_size[2];

FILE* fh;

//Retrieving platform information
errcode = clGetPlatformIDs(1, &platformId, &nbplatforms);

//Retrieving device (GPU) information
errcode = clGetDeviceIDs(platformId, CL_DEVICE_TYPE_GPU, 1, &deviceId, &nbdevices);

//Creation of a working context
context = clCreateContext(NULL, 1, &deviceId, NULL, NULL, &errcode);

commandQueue = clCreateCommandQueue(context, deviceId, 0, &errcode);

//Opening and reading the kernel source file
if((fh = fopen(source_name, "r")) == NULL){
fprintf(stderr, "Failed to open the file containing the kernel source !\n");
exit(EXIT_FAILURE);
}
source_code = (char*) malloc (MAX_SOURCE_SIZE * sizeof(char));
source_size = fread(source_code, sizeof(char), MAX_SOURCE_SIZE, fh);
fclose(fh);

program = clCreateProgramWithSource(context, 1, (const char**) &source_code, (const size_t*) &source_size, &errcode);

//Building kernel
errcode = clBuildProgram(program, 1, &deviceId, NULL, NULL, NULL);

//Creation of the kernel program
cl_kernel kernel = clCreateKernel(program, "mykernel", &errcode);
unsigned int *op1 = (unsigned int*) malloc (NCOLS * NROWS * sizeof(unsigned int));

cl_mem op1buff = clCreateBuffer(context, CL_MEM_WRITE_ONLY, NCOLS * NROWS * sizeof(unsigned int), NULL, &errcode);

clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*) &op1buff);

global_work_size[0] = NCOLS;
global_work_size[1] = NROWS;
local_work_size[0] = NCOLS;
local_work_size[1] = 1;

clEnqueueNDRangeKernel(commandQueue, kernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL);

errcode = clEnqueueReadBuffer(commandQueue, op1buff, CL_TRUE, 0, NCOLS * NROWS * sizeof(unsigned int), (void*)op1, 0, NULL, NULL);

for(int i = 0; i < NROWS; i++){
for(int j = 0; j < NCOLS; j++)
printf("[index:%d - %u] ", i*NCOLS+j, op1[i*NCOLS+j]);
printf("\n");
}

return EXIT_SUCCESS;
}

内核源代码放在名为 mykernel.cl 的文件中,如下所示:

__kernel void mykernel(__global unsigned int* op1buf){
unsigned int index = get_group_id(1) * get_global_size(0) + get_local_id(0);
op1buf[index] = index;
}

当我使用大型数组时,执行此程序会返回从数组中读取的意外值。例如:

[index:0 - 16777215] [index:1 - 16777215] [index:2 - 16777215] [index:3 - 16777215] ...
[index:1000 - 3438339071] [index:1001 - 3941660159] [index:1002 - 1650092117] [index:1003 - 2529976771] ...
[index:1000 - 3438339071] [index:1001 - 3941660159] [index:1002 - 1650092117] [index:1003 - 2529976771] ...
[index:3000 - 16777215] [index:3001 - 16777215] [index:3002 - 16777215] [index:3003 - 16777215] ...
[index:4000 - 3438339071] [index:4001 - 3941660159] [index:4002 - 1650092117] [index:4003 - 2529976771] ...
....

我的代码可能出了什么问题,或者我没有考虑到 GPU 使用方面的问题?

提前致谢。

最佳答案

1000 显然对于您的设备来说太大了。将 clGetDeviceInfo 与 CL_DEVICE_MAX_WORK_GROUP_SIZE 结合使用以确定您可以使用的最大值。

关于c - 使用 OpenCL 处理的大型阵列的错误信息,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/10775621/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com