gpt4 book ai didi

c++ - 尝试将结构复制到设备内存时 CUDA 参数无效 (cudaMemcpy)

转载 作者:塔克拉玛干 更新时间:2023-11-03 06:43:51 26 4
gpt4 key购买 nike

我试图弄清楚我应该如何创建一个结构/类来发送到设备,但我一直收到这个“无效参数”CUDA 错误。我做了一个显示错误的小例子:

#include <iostream>
#include <cstdio>
using namespace std;

#define CUDA_WARN(XXX) \
do { if (XXX != cudaSuccess) cerr << "CUDA Error: " << \
cudaGetErrorString(XXX) << ", at line " << __LINE__ \
<< endl; cudaDeviceSynchronize(); } while (0)

struct P {
double x,y;
__host__ __device__ void init(const double &a, const double &b) {
x = a; y = b; }
};

int main(int argc, char **argv)
{
P hP, hQ, dP;
cout << "Size of P: " << sizeof(P) << endl;
CUDA_WARN(cudaMalloc((void**) &dP, sizeof(P)));
printf("dP: %p\n", &dP); // print dP's address on the device
hP.init(1.2,-2.1);
hQ.init(0.,0.);
CUDA_WARN(cudaMemcpy(&dP, &hP, sizeof(P), cudaMemcpyHostToDevice));
CUDA_WARN(cudaMemcpy(&hQ, &dP, sizeof(P), cudaMemcpyDeviceToHost));
cout << "Copy back: " << hQ.x << "\t" << hQ.y << endl;
dP.init(3.,3.);
CUDA_WARN(cudaMemcpy(&hP, &dP, sizeof(P), cudaMemcpyDeviceToHost));
cout << "Copy new: " << hP.x << "\t" << hP.y << endl;
return 0;
}

我正在编译(我的卡是 Tesla C2050):

nvcc -arch sm_20 -o exec file.cu

我得到的结果是:

Size of P: 16
dP: 0x7fff82d4b7b0
CUDA Error: invalid argument, at line 24
CUDA Error: invalid argument, at line 25
Copy back: 0 0
CUDA Error: invalid argument, at line 28
Copy new: 1.2 -2.1


------------------
(program exited with code: 0)
Press return to continue

如果你能帮助我,谢谢你们!

====== 在@talonmies、@JackOLantern、@Robert Crovella 的评论之后 =======

谢谢大家!你真的帮了大忙!根据评论,我可以更正我的代码,现在它可以正常工作了。只是为了注册最终解决方案:

#include <iostream>
#include <cstdio>
using namespace std;

#define CUDA_WARN(XXX) \
do { if (XXX != cudaSuccess) cerr << "CUDA Error: " << \
cudaGetErrorString(XXX) << ", at line " << __LINE__ \
<< endl; cudaDeviceSynchronize(); } while (0)

struct P {
double x,y;
__host__ __device__ void init(const double &a, const double &b) {
x = a; y = b; }
};

/* INCLUDED KERNEL FUNCTION */
__global__ void dev_P_init(P *p, double a, double b) {
p->init(a,b);
}

int main(int argc, char **argv)
{
P hP, hQ, *dP; //*changed*
cout << "Size of P: " << sizeof(P) << endl;
CUDA_WARN(cudaMalloc((void**) &dP, sizeof(P)));
printf("dP: %p\n", &dP); // print dP's address on the device
hP.init(1.2,-2.1);
hQ.init(0.,0.);
CUDA_WARN(cudaMemcpy(dP, &hP, sizeof(P), cudaMemcpyHostToDevice)); //*changed*
CUDA_WARN(cudaMemcpy(&hQ, dP, sizeof(P), cudaMemcpyDeviceToHost)); //*changed*
cout << "Copy back: " << hQ.x << "\t" << hQ.y << endl;
dev_P_init<<< 1, 1 >>>(dP,3., 3.); //*call to kernel*
CUDA_WARN(cudaMemcpy(&hP, dP, sizeof(P), cudaMemcpyDeviceToHost)); //*changed*
cout << "Copy new: " << hP.x << "\t" << hP.y << endl;
return 0;
}

并更正后的输出:

Size of P: 16
dP: 0x7fff6fa2e498
Copy back: 1.2 -2.1
Copy new: 3 3


------------------
(program exited with code: 0)
Press return to continue

最佳答案

@talonmies 已经注意到,&dP 不是有效的设备指针。事实上,dP 是一个驻留在主机上的变量,因此它的地址指向主机内存空间。相反,当dP是一个指针时,cudaMalloc会接收它的值作为参数,它的值会指向一个设备内存空间。

这是您的代码的正确版本:

#include <iostream>
#include <cstdio>
using namespace std;

#define CUDA_WARN(XXX) \
do { if (XXX != cudaSuccess) cerr << "CUDA Error: " << \
cudaGetErrorString(XXX) << ", at line " << __LINE__ \
<< endl; cudaDeviceSynchronize(); } while (0)

struct P {
double x,y;
__host__ __device__ void init(const double &a, const double &b) {
x = a; y = b; }
};

int main(int argc, char **argv)
{
P *dP;
P hP, hQ;
CUDA_WARN(cudaMalloc((void**) &dP, sizeof(P)));
CUDA_WARN(cudaMemcpy(dP, &hP, sizeof(P), cudaMemcpyHostToDevice));
CUDA_WARN(cudaMemcpy(&hQ, dP, sizeof(P), cudaMemcpyDeviceToHost));
CUDA_WARN(cudaMemcpy(&hP, dP, sizeof(P), cudaMemcpyDeviceToHost));

return 0;
}

关于c++ - 尝试将结构复制到设备内存时 CUDA 参数无效 (cudaMemcpy),我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/24460507/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com