gpt4 book ai didi

c++ - 使用 OpenCL 的欧氏距离

转载 作者:太空宇宙 更新时间:2023-11-04 03:38:30 25 4
gpt4 key购买 nike

我正在尝试计算一组 5D 点(像素)到 5D 单点(中心)的欧氏距离并存储在另一个结果 vector 中,我想使用 vector 索引将所有信息存储在单个 vector 中,所以对于第 i 个像素,5 个维度是 (5i) , (5i+1) , ...我是 OpenCL 的新手,我只是出于自己的目的在互联网上编辑了示例代码。理论是正确的,但代码没有显示正确的答案!这是内核:

//d_kernel.cl

__kernel void distance_kernel(__global double *pixelInfo,
__global double *clusterCentres,
__global double *distanceFromClusterCentre)
{
int index = get_global_id(0);

int d, dl, da, db, dx, dy;

dl = pixelInfo[5 * index] - clusterCentres[0];
dl = dl * dl;

da = pixelInfo[5 * index + 1] - clusterCentres[1];
da = da * da;

db = pixelInfo[5 * index + 2] - clusterCentres[2];
db = db * db;

dx = pixelInfo[5 * index + 3] - clusterCentres[3];
dx = dx * dx;

dy = pixelInfo[5 * index + 4] - clusterCentres[4];
dy = dy * dy;

distanceFromClusterCentre[index] = dx + dy + dl + da + db;

}

这是主机代码:

#include <iostream>
#include <CL/cl.h>
#include <vector>
using namespace std;

#define MAX_SOURCE_SIZE (0x100000)
int main(int argc, char **argv)
{

// Create the two input vectors
int i;
const int pixelsNumber = 1024;
const int clustersNumber = 1;

std::vector<double> pixelInfo;
pixelInfo.resize(5 * pixelsNumber);
std::fill(pixelInfo.begin(), pixelInfo.end(), 500);

std::vector<double> clusterCentres;
clusterCentres.resize(5 * clustersNumber);
std::fill(clusterCentres.begin(), clusterCentres.end(), 200);

std::vector<double> distanceFromClusterCentre;
distanceFromClusterCentre.resize(pixelsNumber);
std::fill(distanceFromClusterCentre.begin(), distanceFromClusterCentre.end(), 0);

// Load the kernel source code into the array source_str
FILE *fp;
char *source_str;
size_t source_size;

fp = fopen("d_kernel.cl", "r");
if (!fp) {
fprintf(stderr, "Failed to load kernel.\n");
exit(1);
}
source_str = (char*)malloc(MAX_SOURCE_SIZE);
source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
fclose(fp);

// Get platform and device information
cl_platform_id platform_id = NULL;
cl_device_id device_id = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1,
&device_id, &ret_num_devices);

// Create an OpenCL context
cl_context context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);

// Create a command queue
cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);

// Create memory buffers on the device for each vector
cl_mem pixelInfo_mem = clCreateBuffer(context, CL_MEM_READ_ONLY,
5 * pixelsNumber * sizeof(int), NULL, &ret);
cl_mem clusterCentres_mem = clCreateBuffer(context, CL_MEM_READ_ONLY,
5 * clustersNumber * sizeof(int), NULL, &ret);
cl_mem distanceFromClusterCentre_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
pixelsNumber * sizeof(int), NULL, &ret);

// Copy the vectors to their respective memory buffers
ret = clEnqueueWriteBuffer(command_queue, pixelInfo_mem, CL_TRUE, 0,
5 * pixelsNumber * sizeof(int), pixelInfo.data(), 0, NULL, NULL);
ret = clEnqueueWriteBuffer(command_queue, clusterCentres_mem, CL_TRUE, 0,
5 * clustersNumber * sizeof(int), clusterCentres.data(), 0, NULL, NULL);

// Create a program from the kernel source
cl_program program = clCreateProgramWithSource(context, 1,
(const char **)&source_str, (const size_t *)&source_size, &ret);

// Build the program
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);

// Create the OpenCL kernel
cl_kernel kernel = clCreateKernel(program, "vector_add", &ret);

// Set the arguments of the kernel
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&pixelInfo_mem);
ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&clusterCentres_mem);
ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&distanceFromClusterCentre_mem);

// Execute the OpenCL kernel on the list
size_t global_item_size = pixelsNumber; // Process the entire lists
size_t local_item_size = 64; // Divide work items into groups of 64
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,
&global_item_size, &local_item_size, 0, NULL, NULL);

// Read the memory buffer result on the device to the local vector result
ret = clEnqueueReadBuffer(command_queue, distanceFromClusterCentre_mem, CL_TRUE, 0,
pixelsNumber * sizeof(int), distanceFromClusterCentre.data(), 0, NULL, NULL);

// Display the result to the screen
for (i = 0; i < pixelsNumber; i++)
{
cout << "Pixel " << i << ": " << distanceFromClusterCentre[i] << endl;
//system("PAUSE");
}

// Clean up
ret = clFlush(command_queue);
ret = clFinish(command_queue);
ret = clReleaseKernel(kernel);
ret = clReleaseProgram(program);
ret = clReleaseMemObject(pixelInfo_mem);
ret = clReleaseMemObject(clusterCentres_mem);
ret = clReleaseMemObject(distanceFromClusterCentre_mem);
ret = clReleaseCommandQueue(command_queue);
ret = clReleaseContext(context);
free(pixelInfo.data());
free(clusterCentres.data());
free(distanceFromClusterCentre.data());

system("PAUSE");
return 0;
}

RESULT 的一部分是:

.
.
.
Pixel 501: -1.11874e+306
Pixel 502: -1.16263e+306
Pixel 503: -1.07485e+306
Pixel 504: -1.03079e+306
Pixel 505: -9.42843e+305
Pixel 506: -9.86903e+305
Pixel 507: -8.98954e+305
Pixel 508: -9.86903e+305
Pixel 509: -8.98954e+305
Pixel 510: -9.43014e+305
Press any key to continue . . .
Pixel 511: -8.55065e+305
Pixel 512: 0
Pixel 513: 0
Pixel 514: 0
Pixel 515: 0
Pixel 516: 0
Pixel 517: 0
Pixel 518: 0
Pixel 519: 0
Pixel 520: 0
.
.
.

在索引 511 之后, vector 的其余部分为零!

最佳答案

您创建了 double vector ,然后将它们视为存在整数(为整数创建缓冲区,将数据写入整数缓冲区并在存在整数时读回结果)。为避免此类错误,您可以这样编写代码:

cl_mem pixelInfo_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, pixelInfo.size() * sizeof(pixelInfo[0]), NULL, &ret);
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

关于c++ - 使用 OpenCL 的欧氏距离,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/30396520/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com