gpt4 book ai didi

在调用函数 "bad"后,CUDA 无法再将数据从设备复制到主机

转载 作者:太空宇宙 更新时间:2023-11-04 08:46:06 25 4
gpt4 key购买 nike

我正在测试一个代码,其中内核旨在对存储在两个指针中的两个值执行简单求和。

调用内核“添加”后,我无法再将指针的数据从主机复制到设备,再从那里复制到主机,即使在内核中没有对指针执行任何操作时也是如此。但是当我评论调用函数的语句时,我得到了正确的结果。这是代码:

#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>

__global__ void add(int *a, int *b, int *c)
{
*c = *a - *b;
}

int main(void)
{
int result, x_val, y_val; //Store data from device to host in this vars.
int *x_host, *y_host; //Pointers in host
int *tempGPU, *x_dev, *y_dev; //Pointers in device

x_host = (int *)malloc(sizeof(int));
y_host = (int *)malloc(sizeof(int));

*x_host = 8;
*y_host = 4;

x_val = -5;
y_val = -10;

printf("\n x = %d, y = %d\n", *x_host, *y_host);

cudaMalloc( (void **)&tempGPU, sizeof(int) );

//It's wrong to pass this arguments to the function. The problem is in this statement.
add<<<1,1>>> (x_host, y_host, tempGPU);

cudaMemcpy(&result, tempGPU, sizeof(int), cudaMemcpyDeviceToHost);

printf("\n x_host - y_host = %d\n", result);

cudaMalloc( (void **)&x_dev, sizeof(int) );
cudaMalloc( (void **)&y_dev, sizeof(int) );

*x_host = 6;
*y_host = 20;

cudaMemcpy(x_dev, x_host, sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(y_dev, y_host, sizeof(int), cudaMemcpyHostToDevice);

cudaMemcpy(&x_val, x_dev, sizeof(int), cudaMemcpyDeviceToHost);
cudaMemcpy(&y_val, y_dev, sizeof(int), cudaMemcpyDeviceToHost);

printf("\n x_host = %d, y_host = %d\n", *x_host, *y_host);
printf("\n x_val = %d, y_val = %d\n", x_val, y_val);

cudaFree( tempGPU );

printf( "\nCUDA: %s\n", cudaGetErrorString(cudaGetLastError()) );

return 0;

}

我知道该函数需要在设备中分配指针,但为什么这样的错误不允许我正确使用 cudaMemcpy?为什么当我评论该行时:

add<<<1,1>>> (x_host, y_host, tempGPU);

我得到了正确的结果。谢谢。

最佳答案

您的问题是 x_hosty_host 是指向主机内存空间的指针。 __global__ add 函数需要指向设备内存空间的指针。当您构建代码时,add 会错误地将 x_hosty_host 解释为设备内存指针。

正如 Farzad 所注意到的,您可能已经通过适当的 CUDA 错误检查自己发现了错误 What is the canonical way to check for errors using the CUDA runtime API? .

以下是通过适当的 CUDA 错误检查修复的代码。

#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>

#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) { exit(code); getchar(); }
}
}

__global__ void add(int *a, int *b, int *c)
{
*c = *a - *b;
}

int main(void)
{
int* x_host = (int*)malloc(sizeof(int));
int* y_host = (int*)malloc(sizeof(int));

*x_host = 8;
*y_host = 4;

int* tempGPU; gpuErrchk(cudaMalloc((void**)&tempGPU,sizeof(int)));
int* x_dev; gpuErrchk(cudaMalloc((void**)&x_dev, sizeof(int)));
int* y_dev; gpuErrchk(cudaMalloc((void**)&y_dev, sizeof(int)));

gpuErrchk(cudaMemcpy(x_dev, x_host, sizeof(int), cudaMemcpyHostToDevice));
gpuErrchk(cudaMemcpy(y_dev, y_host, sizeof(int), cudaMemcpyHostToDevice));

int result;

add<<<1,1>>> (x_dev, y_dev, tempGPU);
gpuErrchk(cudaPeekAtLastError());
gpuErrchk(cudaDeviceSynchronize());

gpuErrchk(cudaMemcpy(&result, tempGPU, sizeof(int), cudaMemcpyDeviceToHost));

printf("\n x_host - y_host = %d\n", result);

gpuErrchk(cudaFree(x_dev));
gpuErrchk(cudaFree(y_dev));
gpuErrchk(cudaFree(tempGPU));

getchar();

return 0;

}

关于在调用函数 "bad"后,CUDA 无法再将数据从设备复制到主机,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/21643368/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com