gpt4 book ai didi

c++ - OpenCL内核编译错误

转载 作者:行者123 更新时间:2023-11-28 08:02:56 25 4
gpt4 key购买 nike

你好,当我使用 Mac OS + OpenCL Framework 时,这段代码工作正常,但是当操作系统更改为 openSUSE 11.4 +(AMD 的 OpenCL 实现)时,代码会抛出这样的错误。似乎 typedef float clfft_complex[2];使这个错误。您对此有何看法?

错误:

Err: "/tmp/OCLRS2tPp.cl", line 4: error: kernel pointer arguments must point to
addrSpace global, local, or constant
__kernel void linear_interp(__global clfft_complex *input,
^

1 error detected in the compilation of "/tmp/OCLRS2tPp.cl".

Internal error: clc compiler invocation failed.

内核代码:

typedef float clfft_complex[2];

__kernel void linear_interp(__global clfft_complex *input,
__global clfft_complex *output)
{
int global_id = get_global_id(0);
input[global_id][0] = 1.5f;
input[global_id][1] = 5.5f;
}

主机代码:

//////////////////////////////////
/* Preparing OpenCL Environment */
//////////////////////////////////

cl_uint cl_platformsN = 0;
cl_platform_id *cl_platformIDs = NULL;

clGetPlatformIDs (0, NULL, &cl_platformsN);

cl_platformIDs = (cl_platform_id*)malloc( cl_platformsN * sizeof(cl_platform_id));
clGetPlatformIDs(cl_platformsN, cl_platformIDs, NULL);

cl_int status = CL_SUCCESS;
cl_device_id device; // Compute device
cl_context context; // Compute context

CL_CHECK_ERROR(clGetDeviceIDs(cl_platformIDs[0], DEVICE_TYPE, 1, &device, NULL));
context = clCreateContext(NULL, 1, &device, NULL, NULL, &status);

////////////
/* Device */
////////////
cl_uint wavefronts_per_SIMD = 7;
cl_int device_max_cu;

size_t wg_count;
size_t global_work_size;

#if DEVICE_TYPE == CL_DEVICE_TYPE_GPU
size_t local_work_size = 64;
#else
size_t local_work_size = 1;
#endif

// Get info about the compute units on the device
CL_CHECK_ERROR(clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &device_max_cu, NULL));
wg_count = device_max_cu * wavefronts_per_SIMD;


global_work_size = wg_count * local_work_size;

/////////////////////
/* Input Data Part */
/////////////////////

/* Input a slice properties */
int bits_per_sample;
int samples_per_pixel;
int theta_size;
int slice_size;

/* Read the slice */
clfft_complex *data_tiff = tiff_read_complex(tiff_input,
&bits_per_sample,
&samples_per_pixel,
&slice_size,
&theta_size);


////////////////////////
/* OpenCL - DFI Part */
////////////////////////

/* Sync events */
const int events_num = 5;
cl_event event_list[events_num];

/* Command Queue */
cl_command_queue command_queue = clCreateCommandQueue(context, device, 0, &status);

/* Program */
const char* programSource = load_program_source(KERNELS_FILE_PATH);
if(programSource == NULL) {
fprintf(stderr, "Programm '%s' can not be created. File was not found.", KERNELS_FILE_PATH);
return;
}

cl_program program = clCreateProgramWithSource(context, 1,
(const char**)&programSource, NULL,
&status);

status = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);

size_t paramValueSize = 1024 * 1024, param_value_size_ret;
char *paramValue;
paramValue = (char*)calloc(paramValueSize, sizeof(char));
status = clGetProgramBuildInfo( program,
device,
CL_PROGRAM_BUILD_LOG,
paramValueSize,
paramValue,
&param_value_size_ret);
printf("Err: %s", paramValue);

char buf[0x10000];
clGetProgramBuildInfo(program,
device,
CL_PROGRAM_BUILD_LOG,
0x10000,
buf,
NULL);

if(status != CL_SUCCESS) {
fprintf(stderr, "Programm '%s' can not be build. (%s)", KERNELS_FILE_PATH, opencl_map_error(status));
return;
}



/* Kernels */
cl_kernel kernel_linear_interp = clCreateKernel(program, "linear_interp", &status);

最佳答案

首先,我不知道为什么这段代码有效,但假设你的输入是一个内核指针参数(cl_mem),在全局中有一个特定的内存空间,那么我认为你不能只强制它具有另一个大小为 2 的维数组,将 __global *input[2] 作为参数,因为您已经在调用内核之前设置了参数类型。 (顺便说一句,你的 clSetKernelArg() 在哪里?)

第二,你为什么要对你的输入这样做?

input[global_id][0] = 1.5f;
input[global_id][1] = 5.5f;

因为输入内存空间通常应该是只读的..或者那个内核可能只是你内核的一部分?

无论如何,我不确定你在用那个内核做什么,所以:

  1. 如果这意味着您只需要一个常量 float[2] 变量,它适用到所有输入,然后你可以声明

    __constant float var[2] = {1.5f, 5.5f};

  2. 如果您所说的 input 实际上是您的 output,并且您想要在一个工作项中写两个 float ,然后你可以将类型更改为 float2,或者通过执行以下操作:

    vstore2((float2)(1.5f,5.5f), 0, input[global_id]);

    但不要忘记将本地工作项除以 2..

关于c++ - OpenCL内核编译错误,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/10916093/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com