gpt4 book ai didi

java - 访问 CUDA 内核中的变量时出现 CUDA_ERROR_ILLEGAL_ADDRESS

转载 作者:行者123 更新时间:2023-12-01 21:30:40 26 4
gpt4 key购买 nike

我在尝试运行用于计算 Buddhabrot 分形轨道的内核时遇到 CUDA_ERROR_ILLEGAL_ADDRESS 异常。

extern "C"

__global__ void exec(int iterations, int size,
float* inputR, float* inputI, // Real/Imaginary input
int* output // Output image in one dimension
) {

int i = blockIdx.x * blockDim.x + threadIdx.x;

float cR = inputR[i];
float cI = inputI[i];

float x = 0;
float y = 0;

float outX[1000];
float outY[1000];

for (int j = 0; j < iterations; j++) {
outX[j] = x;
outY[j] = y;

float xNew = (x * x) - (y * y) + cR;
float yNew = (2 * x * y) + cI;

if (xNew * xNew + yNew * yNew > 4) {
for (int k = 1; k < j; k++) {
int curX = (outX[k] + 2 ) * size / 4;
int curY = (outY[k] + 2 ) * size / 4;

int idx = curX + size * curY;

output[idx]++; // <- exception here
}
return;
}

x = xNew;
y = yNew;
}
}

我现在已经尝试了多种方法,但错误似乎并不是源于数组,这与我最初的想法相反。例如,

output[0] = 0;

会工作得很好。然而,当我尝试调试 idx 时(记住我首先认为错误与数组有关),我发现我既不能像这样分配 idx

output[0] = idx;

也不在 printf 语句中使用它

if (i == 0) {
printf("%d\n", idx);
}

我已经对 curXcurY 进行了相同的尝试,它们也拒绝工作,但是例如 cR 将工作而不会出现任何错误。最内层循环内分配的变量似乎有问题(我也无法分配 k),因此我尝试在开始时在所有循环之外声明 idx的功能,但无济于事。还是同样的错误。

堆栈跟踪:

Exception in thread "main" jcuda.CudaException: CUDA_ERROR_ILLEGAL_ADDRESS
at jcuda.driver.JCudaDriver.checkResult(JCudaDriver.java:330)
at jcuda.driver.JCudaDriver.cuCtxSynchronize(JCudaDriver.java:1938)
at fractal.Buddhabrot.<init>(Buddhabrot.java:96)
at controller.Controller.<init>(Controller.java:10)
at Main.main(Main.java:8)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:144)

常量:

block size            512*1*1
grid size 64 *1*1
iterations 1000
size 256
inputR, inputI length 64*512
output length 256*256

MCVE:

import jcuda.Pointer;
import jcuda.Sizeof;
import jcuda.driver.*;

import java.io.File;
import java.util.Random;

import static jcuda.driver.JCudaDriver.*;

public class Stackoverflow {
public static final int SIZE = 256;
public static final long NUM_POINTS = 128 * 128 * 128;
public static final int ITERATIONS = 10000;

public static final int BLOCK_SIZE = 512;
public static final int SIM_THREADS = BLOCK_SIZE * 64;

public static final Random random = new Random();

public static void main(String[] args) {
File ptxFile = new File("Buddha.ptx");

setExceptionsEnabled(true);
cuInit(0);

CUdevice device = new CUdevice();
cuDeviceGet(device, 0);

CUcontext context = new CUcontext();
cuCtxCreate(context, 0, device);

CUmodule module = new CUmodule();
cuModuleLoad(module, ptxFile.getAbsolutePath());

CUfunction function = new CUfunction();
cuModuleGetFunction(function, module, "exec");

cuCtxSetLimit(CUlimit.CU_LIMIT_PRINTF_FIFO_SIZE, 4096);

float[] inR = new float[SIM_THREADS];
float[] inI = new float[SIM_THREADS];

int[] out = new int[SIZE * SIZE];

CUdeviceptr deviceInputR = new CUdeviceptr();
cuMemAlloc(deviceInputR, inR.length * Sizeof.FLOAT);
CUdeviceptr deviceInputI = new CUdeviceptr();
cuMemAlloc(deviceInputI, inI.length * Sizeof.FLOAT);

CUdeviceptr deviceOutput = new CUdeviceptr();
cuMemAlloc(deviceOutput, out.length * Sizeof.INT);

for (long i = 0; i < NUM_POINTS; i += SIM_THREADS) {
for (int j = 0; j < SIM_THREADS; j++) {
inR[j] = random.nextFloat() * 4f - 2f;
inI[j] = random.nextFloat() * 4f - 2f;
}

System.out.println("GPU START");

cuMemcpyHtoD(deviceInputR, Pointer.to(inR), inR.length * Sizeof.FLOAT);
cuMemcpyHtoD(deviceInputI, Pointer.to(inI), inI.length * Sizeof.FLOAT);

Pointer kernelParameters = Pointer.to(
Pointer.to(new int[]{ITERATIONS}),
Pointer.to(new int[]{SIZE}),
Pointer.to(deviceInputR),
Pointer.to(deviceInputI),
Pointer.to(deviceOutput)
);

int gridSize = (int) Math.ceil(((double) SIM_THREADS) / BLOCK_SIZE);

cuLaunchKernel(function,
gridSize, 1, 1,
BLOCK_SIZE, 1, 1,
0, null,
kernelParameters, null
);

cuCtxSynchronize();

System.out.println("GPU END");
}

cuMemcpyDtoH(Pointer.to(out), deviceOutput, out.length * Sizeof.INT);
}
}

最佳答案

在“常量”部分中,您已经指出了这一点:

iterations            1000

但是在你的java代码中(在你提供MCVE之后)你有这个:

public static final int ITERATIONS = 10000;

这显然会导致内核代码的这一部分崩溃:

float outX[1000];
float outY[1000];

for (int j = 0; j < iterations; j++) {
outX[j] = x;
outY[j] = y;

自 10000 次迭代以来,索引超出范围。 (此循环的范围实际上取决于数据,但对于某些数据输入模式,循环将遍历超过 1000,如所写)。

当我改变这个:

public static final int ITERATIONS = 10000;

对此:

public static final int ITERATIONS = 1000;

你的代码对我来说运行正确:

$ cuda-memcheck java -cp ".:jcuda-0.7.5b.jar" so1
========= CUDA-MEMCHECK
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
GPU START
GPU END
========= ERROR SUMMARY: 0 errors
$

关于java - 访问 CUDA 内核中的变量时出现 CUDA_ERROR_ILLEGAL_ADDRESS,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/37489091/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com