gpt4 book ai didi

c++ - OpenCL 程序未执行

转载 作者:行者123 更新时间:2023-11-28 00:33:19 25 4
gpt4 key购买 nike

我稍微修改了this程序,但现在没有运行。这是我的代码:

#include <iostream>
#include "CL/cl.h"
#include <math.h>

using namespace std;
#define MYSIZE 1000

#if defined(cl_khr_fp64) //Khronos extension available
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#define DOUBLE_SUPPORT_AVAILABLE
#elif defined(cl_amd_fp64) //AMD extension available
#pragma OPENCL EXTENSION cl_amd_fp64 : enable
#define DOUBLE_SUPPORT_AVAILABLE
#endif

#ifdef DOUBLE_SUPPORT_AVAILABLE

//double
typedef double myreal;
const char *SOURCE = "\n" \
"__kernel void addArray(__global double *A, __global double *B, __global double *C, const unsigned int size) \n" \
"{ \n" \
"int i = get_global_id(0); \n" \
"if(i < size) \n" \
" C[i] = A[i] + B[i]; \n" \
"} \n" \
"\n";

#else

//float
typedef float myreal;
const char *SOURCE = "\n" \
"__kernel void addArray(__global float *A, __global float *B, __global float *C, const unsigned int size) \n" \
"{ \n" \
"int i = get_global_id(0); \n" \
"if(i < size) \n" \
" C[i] = A[i] + B[i]; \n" \
"} \n" \
"\n";

#endif

int main(int argc, char *argv[])
{
int devType = CL_DEVICE_TYPE_GPU;
unsigned int count = MYSIZE;
cl_int err;//err returned from API
size_t global;//global size
size_t local;//local size
cl_platform_id platform;
cl_device_id device;
cl_context context;
cl_command_queue commands;
cl_program program;
cl_kernel kernel;

//connect to a compute device
err = clGetPlatformIDs(1, &platform, NULL);
if(err != CL_SUCCESS)
{
cerr << "ERROR: Could not find a platform" << endl;
return -1;
}

//get a device of the appropriate type
err = clGetDeviceIDs(platform, devType, 1, &device, NULL);
if(err != CL_SUCCESS)
{
cerr << "ERROR: Could not find a device" << endl;
return -1;
}

//create a context
context = clCreateContext(0, 1, &device, NULL, NULL, &err);
if(!context || (err != CL_SUCCESS))
{
cerr << "ERROR: Could not create a context" << endl;
return -1;
}

//create a command queue
commands = clCreateCommandQueue(context, device, 0, &err);
if(!commands || (err != CL_SUCCESS))
{
cerr << "ERROR: Could not create a command queue" << endl;
return -1;
}

//create the compute program from source
program = clCreateProgramWithSource(context, 1, (const char **) &SOURCE, NULL, &err);
if(!program || (err != CL_SUCCESS))
{
cerr << "ERROR: Could not create a program from source" << endl;
return -1;
}

//build the program executable
err = clBuildProgram(program, NULL, NULL, NULL, NULL, NULL);
if(err != CL_SUCCESS)
{
size_t len;
char buffer[2048];
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);

cerr << "ERROR: Could not build the program executable" << endl;
cerr << buffer << endl;
return -1;
}

//create the kernel
kernel = clCreateKernel(program, "addArray", &err);
if(!kernel || (err != CL_SUCCESS))
{
cerr << "Could not create the kernel" << endl;
return -1;
}

myreal *A = new myreal[MYSIZE];
myreal *B = new myreal[MYSIZE];
myreal *C = new myreal[MYSIZE];
for(int i = 0; i < MYSIZE; i++)
{
A[i] = sqrt(i);
B[i] = -sqrt(i);
}
unsigned int correct = 0;//correct answers
cl_mem A_cl;
cl_mem B_cl;
cl_mem C_cl;

//create device memory buffer
A_cl = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(myreal) * count, NULL, NULL);
B_cl = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(myreal) * count, NULL, NULL);
C_cl = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(myreal) * count, NULL, NULL);
if(!A_cl || !B_cl || !C_cl)
{
cerr << "Could not create device memory buffer" << endl;
return -1;
}

//transfer data to device
err = clEnqueueWriteBuffer(commands, A_cl, CL_TRUE, 0, sizeof(myreal) * count, A, 0, NULL, NULL);
if(err != CL_SUCCESS)
{
cerr << "Could not transfer data to device" << endl;
return -1;
}

err = clEnqueueWriteBuffer(commands, B_cl, CL_TRUE, 0, sizeof(myreal) * count, B, 0, NULL, NULL);
if(err != CL_SUCCESS)
{
cerr << "Could not transfer data to device" << endl;
return -1;
}

//set the arguments to the compute kernel
err = 0;
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &A_cl);
err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &B_cl);
err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &C_cl);
err |= clSetKernelArg(kernel, 3, sizeof(unsigned int), &count);
if(err != CL_SUCCESS)
{
cerr << "Could not set args for kernel" << endl;
return -1;
}

//get max work group size
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL);
if(err != CL_SUCCESS)
{
cerr << "Could not get the kernel work group size" << endl;
return -1;
}

//execute the kernel using max work group size
global = count;
err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
if(err != CL_SUCCESS)
{
cerr << "Could not enqueue the kernel for execution: " << err << endl;
return -1;
}

//wait for all commands to finish
clFinish(commands);

//read back the results to C
err = clEnqueueReadBuffer(commands, C_cl, CL_TRUE, 0, sizeof(myreal) * count, C, 0, NULL, NULL);
if(err != CL_SUCCESS)
{
cerr << "Could not read data from C" << endl;
return -1;
}

//validate the results
correct = 0;
myreal check = 0;
for(int i = 0; i < MYSIZE; i++)
{
check = A[i] + B[i] - C[i];
if((check < 1e-14) && (check > -1e-14))
correct++;
}

cout << "Computed " << correct << " correct results with " << ((correct / MYSIZE) * 100) << "% success rate!" << endl;
delete[] A;
delete[] B;
delete[] C;

clReleaseMemObject(A_cl);
clReleaseMemObject(B_cl);
clReleaseMemObject(C_cl);
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseCommandQueue(commands);
clReleaseContext(context);

return 0;

return 0;
}

我得到的输出是:

Could not enqueue the kernel for execution: -54

这个错误-54是什么意思?

我在 Windows7 64 位上使用 MinGW 4.4 32 位编译器。我的 GPU 是 ATI Radeon 7670m,它有 OpenCL 1.1 驱动程序。我正在使用适用于 64 位的 APP SDK 2.9。

最佳答案

在 Khronos 引用 cl.h 中,-54 表示 CL_INVALID_WORK_GROUP_SIZE相当 self 解释。

提示:如果您对工作组大小没有限制,那么您可以传递 NULL 而不是 local 并让入队函数为您计算出来。

关于c++ - OpenCL 程序未执行,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/21968036/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com