gpt4 book ai didi

c++ - OpenCL的部分封装导致段错误,包括代码示例

转载 作者:行者123 更新时间:2023-11-30 04:12:36 27 4
gpt4 key购买 nike

这让我很困惑。两组代码在逻辑上应该是相同的,一组只在 GPU 上崩溃,而两套在 CPU 上运行良好。这是测试代码:

#include <iostream>
#include <CL/cl.hpp>

class Device
{
public:
cl::Platform platform_;
cl::Device device_;
cl::Context context_;
cl::CommandQueue queue_;

Device( void ) : platform_()
, device_()
, context_()
, queue_() {}

Device(int32_t platform, int32_t device) : platform_()
, device_()
, context_()
, queue_()
{
std::vector<cl::Platform> platforms;
cl::Platform::get(&platforms);
platform_ = platforms[platform];

std::vector<cl::Device> devices;
platform_.getDevices(CL_DEVICE_TYPE_GPU, &devices);
device_ = devices[device];

cl_context_properties properties[3] = {
CL_CONTEXT_PLATFORM,
(cl_context_properties)(platform_)(),
0
};

cl_int clErr = CL_SUCCESS;
context_ = cl::Context(device_, properties, NULL, NULL, &clErr);
queue_ = cl::CommandQueue(context_,device_,0,&clErr);
}
};

int main()
{
Device device(0,0);

cl::Program::Sources source;
std::string src =
"__kernel void Pointless(uint total, __global uint *data)"\
"{"\
" uint perStream=total/get_global_size(0);"\
" __global uint *dest=data+get_global_id(0)*perStream;"\
" for(uint i=0;i<perStream;i++)"\
" dest[i] = 1;"\
"}";

source.push_back({src.c_str(),src.length()});

cl_int clErr = CL_SUCCESS;
cl::Program program = cl::Program(device.context_,source,&clErr);
if (clErr != CL_SUCCESS)
{
std::cerr << "Failed to create program: " << clErr << std::endl;
return 1;
}

clErr = program.build({device.device_});
if(clErr != CL_SUCCESS)
{
std::cerr << "Failed to build program: " << clErr << std::endl;
std::cerr << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device.device_) << std::endl;
return 1;
}

uint32_t samples = 16*256;
cl::make_kernel<cl_uint,cl::Buffer> Pointless(cl::Kernel(program,"Pointless"));
cl::Buffer device_samples(device.context_,CL_MEM_READ_WRITE,sizeof(cl_uint)*samples);
Pointless(cl::EnqueueArgs(device.queue_, cl::NDRange(16)), samples, device_samples).wait();

std::vector<cl_uint> host_samples(samples);
device.queue_.enqueueReadBuffer(device_samples,CL_TRUE,0,sizeof(cl_uint)*samples,host_samples.data());

for (auto x: host_samples)
std::cout << x;
std::cout << std::endl;

return 0;
}

以上似乎失败了:我在 enqueueReadBuffer 上遇到了段错误。更有趣的是,它只在 GPU(Intel P4000)上失败。 CPU (i3 3xxx) 运行它没有问题(将 CL_DEVICE_TYPE_GPU 更改为 CL_DEVICE_TYPE_CPU 以在 CPU 上进行测试)。

现在下面的代码适用于两种设备类型。

#include <iostream>
#include <CL/cl.hpp>

int main()
{
std::vector<cl::Platform> platforms;
cl::Platform::get(&platforms);
cl::Platform platform = platforms[0];

std::vector<cl::Device> devices;
platform.getDevices(CL_DEVICE_TYPE_GPU, &devices);
cl::Device device = devices[0];

cl_context_properties properties[3] = {
CL_CONTEXT_PLATFORM,
(cl_context_properties)(platform)(),
0
};

cl_int clErr = CL_SUCCESS;
cl::Context context(device, properties, NULL, NULL, &clErr);

cl::CommandQueue queue(context,device,0,&clErr);

cl::Program::Sources source;
std::string src =
"__kernel void Pointless(uint total, __global uint *data)"\
"{"\
" uint perStream=total/get_global_size(0);"\
" __global uint *dest=data+get_global_id(0)*perStream;"\
" for(uint i=0;i<perStream;i++)"\
" dest[i] = 1;"\
"}";

source.push_back({src.c_str(),src.length()});

cl::Program program = cl::Program(context,source,&clErr);

clErr = program.build({device});
if(clErr != CL_SUCCESS)
{
std::cerr << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device) << std::endl;
}

uint32_t samples = 16*256;
cl::make_kernel<cl_uint,cl::Buffer> Pointless(cl::Kernel(program,"Pointless"));
cl::Buffer device_samples(context,CL_MEM_READ_WRITE,sizeof(cl_uint)*samples);
Pointless(cl::EnqueueArgs(queue, cl::NDRange(16)), samples, device_samples).wait();

std::vector<cl_uint> host_samples(samples);
queue.enqueueReadBuffer(device_samples,CL_TRUE,0,sizeof(cl_uint)*samples,host_samples.data());

for (auto x: host_samples)
std::cout << x;
std::cout << std::endl;

return 0;
}

显然我在这里遗漏了一些非常基本的东西。他们都在使用 Intel ICD(我在这个系统上没有 AMD 设备)。

最佳答案

(刚开始发帖所以还不能评论)

我使用 Nvidia 实现(通过 Intel ICD 使用)测试了您的代码。 C++ 编译器是 G++ 4.7.3。您的两个示例在 GPU 以及可用的 Intel CPU 上都运行良好。

因此几乎可以肯定该问题仅限于 Intel GPU 实现。

关于c++ - OpenCL的部分封装导致段错误,包括代码示例,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/19773943/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com