gpt4 book ai didi

c++ - CUDA,Memcpy 中的 "illegal memory access was encountered"

转载 作者:搜寻专家 更新时间:2023-10-31 01:00:53 24 4
gpt4 key购买 nike

我有这个 cuda 文件:

#include "cuda.h"
#include "../../HandleError.h"
#include "Sphere.hpp"
#include <stdlib.h>
#include <CImg.h>

#define WIDTH 1280
#define HEIGHT 720
#define rnd(x) (x*rand()/RAND_MAX)
#define SPHERES_COUNT 5

using namespace cimg_library;

__global__
void kernel(unsigned char* bitmap, Sphere* s)
{
// Map threadIdx/blockIdx to pixel position
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;
float ox = x - blockDim.x * gridDim.x / 2;
float oy = y - blockDim.y * gridDim.y / 2;
float r = 0.2, g = 0.2, b = 0.5;
float maxz = -INF;
for (int i = 0; i < SPHERES_COUNT; i++) {
float n, t = s[i].hit(ox, oy, &n);
if (t > maxz) {
float fscale = n;
r = s[i].r * fscale;
g = s[i].g * fscale;
b = s[i].b * fscale;
maxz = t;
}
}

bitmap[offset*3] = (int)(r * 255);
bitmap[offset*3 + 1] = (int)(g * 255);
bitmap[offset*3 + 2] = (int)(b * 255);
}

__constant__ Sphere s[SPHERES_COUNT];

int main ()
{
//Capture start time
cudaEvent_t start, stop;
HANDLE_ERROR(cudaEventCreate(&start));
HANDLE_ERROR(cudaEventCreate(&stop));
HANDLE_ERROR(cudaEventRecord(start, 0));

//Create host bitmap
CImg<unsigned char> image(WIDTH, HEIGHT, 1, 3);
image.permute_axes("cxyz");

//Allocate device bitmap data
unsigned char* dev_bitmap;
HANDLE_ERROR(cudaMalloc((void**)&dev_bitmap, image.size()*sizeof(unsigned char)));

//Generate spheres and copy them on the GPU one by one
Sphere* temp_s = (Sphere*)malloc(SPHERES_COUNT*sizeof(Sphere));
for (int i=0; i <SPHERES_COUNT; i++) {
temp_s[i].r = rnd(1.0f);
temp_s[i].g = rnd(1.0f);
temp_s[i].b = rnd(1.0f);
temp_s[i].x = rnd(1000.0f) - 500;
temp_s[i].y = rnd(1000.0f) - 500;
temp_s[i].z = rnd(1000.0f) - 500;
temp_s[i].radius = rnd(100.0f) + 20;
}

HANDLE_ERROR(cudaMemcpyToSymbol(s, temp_s, sizeof(Sphere)*SPHERES_COUNT));
free(temp_s);

//Generate a bitmap from spere data
dim3 grids(WIDTH/16, HEIGHT/16);
dim3 threads(16, 16);
kernel<<<grids, threads>>>(dev_bitmap, s);

//Copy the bitmap back from the GPU for display
HANDLE_ERROR(cudaMemcpy(image.data(), dev_bitmap,
image.size()*sizeof(unsigned char),
cudaMemcpyDeviceToHost));

cudaFree(dev_bitmap);

image.permute_axes("yzcx");
image.save("render.bmp");
}

它编译很好,但是当执行时我得到这个错误:

an illegal memory access was encountered in main.cu at line 82

也就是这里:

    //Copy the bitmap back from the GPU for display
HANDLE_ERROR(cudaMemcpy(image.data(), dev_bitmap,
image.size()*sizeof(unsigned char),
cudaMemcpyDeviceToHost));

我不明白为什么...我知道如果删除这个:

  bitmap[offset*3] = (int)(r * 255);
bitmap[offset*3 + 1] = (int)(g * 255);
bitmap[offset*3 + 2] = (int)(b * 255);

错误没有被报告,所以我认为这可能是一个超出索引的错误,后来报告了,但是我有这个程序的一个相同版本,没有使用常量内存,并且在相同版本下工作正常内核函数...

最佳答案

这里有两个问题。第一个是这样的:

__constant__ Sphere s[SPHERES_COUNT];

int main ()
{
......

kernel<<<grids, threads>>>(dev_bitmap, s);

......

在主机代码中,s 是一个主机内存变量,它为 CUDA 运行时提供了一个句柄来连接设备常量内存符号。它不包含有效的设备指针,不能传递给内核调用。结果是一个无效的内存访问错误。

你可以这样做:

__constant__ Sphere s[SPHERES_COUNT];

int main ()
{
......

Sphere *d_s;
cudaGetSymbolAddress((void **)&d_s, s);
kernel<<<grids, threads>>>(dev_bitmap, d_s);

......

这将导致符号查找以获取 s 的设备地址,并且将其传递给内核是有效的。然而,GPU 依赖于编译器发出特定指令来通过常量缓存访问内存。设备编译器只有在检测到内核中正在访问 __constant__ 变量时才会发出这些指令,这在使用指针时是不可能的。您可以在 this Stack Overflow question and answer 中看到更多关于编译器如何为常量变量访问生成代码的信息。 .

关于c++ - CUDA,Memcpy 中的 "illegal memory access was encountered",我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/30187889/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com