gpt4 book ai didi

java - JOCL Char 不返回所有字符

转载 作者:行者123 更新时间:2023-12-01 20:21:00 25 4
gpt4 key购买 nike

package parallelencode;

import org.jocl.*;
import static org.jocl.CL.*;

public class ParallelEncode {
/**
* The source code of the OpenCL program to execute
*/
private static String programSource =
"__kernel void "+
"sampleKernel(__global const float *a,"+
" __global const float *b,"+
" __global uchar16 *c,"+
" __global char *d)"+
"{"+
" int gid = get_global_id(0);"+
" c[gid] = 'q';"+
" "+
" d[gid] = 'm';"+
"}";


/**
* The entry point of this sample
*
* @param args Not used
*/
public static void main(String args[])
{
// Create input- and output data
int n = 17;
float srcArrayA[] = new float[n];
float srcArrayB[] = new float[n];
char dstArray[] = new char[n];
char charArray[] = new char[n];
for (int i=0; i<n; i++)
{
srcArrayA[i] = i;
srcArrayB[i] = i;
}
Pointer srcA = Pointer.to(srcArrayA);
Pointer srcB = Pointer.to(srcArrayB);
Pointer dst = Pointer.to(dstArray);
Pointer cArr = Pointer.to(charArray);

// The platform, device type and device number
// that will be used
final int platformIndex = 0;
final long deviceType = CL_DEVICE_TYPE_ALL;
final int deviceIndex = 0;

// Enable exceptions and subsequently omit error checks in this sample
CL.setExceptionsEnabled(true);

// Obtain the number of platforms
int numPlatformsArray[] = new int[1];
clGetPlatformIDs(0, null, numPlatformsArray);
int numPlatforms = numPlatformsArray[0];

// Obtain a platform ID
cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
clGetPlatformIDs(platforms.length, platforms, null);
cl_platform_id platform = platforms[platformIndex];

// Initialize the context properties
cl_context_properties contextProperties = new cl_context_properties();
contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);

// Obtain the number of devices for the platform
int numDevicesArray[] = new int[1];
clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
int numDevices = numDevicesArray[0];

// Obtain a device ID
cl_device_id devices[] = new cl_device_id[numDevices];
clGetDeviceIDs(platform, deviceType, numDevices, devices, null);
cl_device_id device = devices[deviceIndex];

// Create a context for the selected device
cl_context context = clCreateContext(
contextProperties, 1, new cl_device_id[]{device},
null, null, null);

// Create a command-queue for the selected device
cl_command_queue commandQueue =
clCreateCommandQueue(context, device, 0, null);

// Allocate the memory objects for the input- and output data
cl_mem memObjects[] = new cl_mem[4];
memObjects[0] = clCreateBuffer(context,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
Sizeof.cl_float * n, srcA, null);
memObjects[1] = clCreateBuffer(context,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
Sizeof.cl_float * n, srcB, null);
memObjects[2] = clCreateBuffer(context,
CL_MEM_READ_WRITE,
Sizeof.cl_char * n, null, null);
memObjects[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, Sizeof.cl_char * n, null, null);

//char *h_rp = (char*)malloc(length);
//cl_mem d_rp = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length, h_rp, &err);
//err = clSetKernelArg(ckKernel, 0, sizeof(cl_mem), &d_rp)

// Create the program from the source code
cl_program program = clCreateProgramWithSource(context,
1, new String[]{ programSource }, null, null);

// Build the program
clBuildProgram(program, 0, null, null, null, null);

// Create the kernel
cl_kernel kernel = clCreateKernel(program, "sampleKernel", null);

// Set the arguments for the kernel
clSetKernelArg(kernel, 0, Sizeof.cl_mem, Pointer.to(memObjects[0]));
clSetKernelArg(kernel, 1, Sizeof.cl_mem, Pointer.to(memObjects[1]));
clSetKernelArg(kernel, 2, Sizeof.cl_mem, Pointer.to(memObjects[2]));
clSetKernelArg(kernel, 3, Sizeof.cl_mem, Pointer.to(memObjects[3]));

// Set the work-item dimensions
long global_work_size[] = new long[]{n};
long local_work_size[] = new long[]{1};

// Execute the kernel
clEnqueueNDRangeKernel(commandQueue, kernel, 1, null,
global_work_size, local_work_size, 0, null, null);

// Read the output data
clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE, 0,
n * Sizeof.cl_char, dst, 0, null, null);

clEnqueueReadBuffer(commandQueue, memObjects[3], CL_TRUE, 0,
n * Sizeof.cl_char, cArr, 0, null, null);

// Release kernel, program, and memory objects
clReleaseMemObject(memObjects[0]);
clReleaseMemObject(memObjects[1]);
clReleaseMemObject(memObjects[2]);
clReleaseMemObject(memObjects[3]);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseCommandQueue(commandQueue);
clReleaseContext(context);

System.out.println(java.util.Arrays.toString(dstArray));
System.out.println(java.util.Arrays.toString(charArray));
}
}

结果:

[?, ?, ?, ?, ?, ?, ?, ?, q,  ,  ,  ,  ,  ,  ,  ,  ]
[?, ?, ?, ?, ?, ?, ?, ?, m, , , , , , , , ]

为什么它不为数组中的每一个都生成一个 q,问号是什么?我尝试更改一些内容,例如将 int gid = get_global_id(0); 更改为 int gid = get_global_id(1);,最终结果类似于 [ q, , , ...][m, , , ...]。有人可以解释一下这一点,以及如何将多个 char 作为输入传递给 OpenCL 内核吗?

最佳答案

 int n = 17;

除了缓冲区副本之外,这都可以。

clEnqueueReadBuffer(commandQueue, memObjects[3], CL_TRUE, 0,
n * Sizeof.cl_char, cArr, 0, null, null);

这会读取 8 个半 char 值或 17 个字节。 java char(2 个字节)和设备端 char(1 个字节)之间不匹配。

这就是为什么您在第 17 个字节或第 9 个元素处看到正确的 q

java bool 数组也会出现类似的错误。

uchar16 表示 16 个字节。

    clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE, 0,
n * Sizeof.cl_char, dst, 0, null, null);

这需要乘以 16,除非每个元素都适用于所有 16 个元素。如果您的意思是 17 个元素,每个元素 16 字节,那么 n*16 应该在那里,并且主机端(java)应该给出字节数组。

关于java - JOCL Char 不返回所有字符,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/44711718/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com