gpt4 book ai didi

c++ - 简单 CUDA 代码中的数值错误

转载 作者:太空狗 更新时间:2023-10-29 23:20:08 25 4
gpt4 key购买 nike

我刚开始用下面的 cude 试验 cuda

#include "macro.hpp"
#include <algorithm>
#include <iostream>
#include <cstdlib>

//#define double float
//#define double int

int RandomNumber(){return static_cast<double>(rand() % 1000);}

__global__ void sum3(double const* a,
double const* b,
double const* c,
double * result,
unsigned const* n)
{
unsigned i = blockIdx.x;
while(i < (*n))
{
result[i] = (a[i] + b[i] + c[i]);
}
};


int main()
{

static unsigned size = 1e2;
srand(0);
double* a = new double[size];
double* b = new double[size];
double* c = new double[size];
double* result = new double[size];

std::generate(a, a+size, RandomNumber);
std::generate(b, b+size, RandomNumber);
std::generate(c, c+size, RandomNumber);

double* ad, *bd,* cd;
double* resultd;

unsigned * sized;
std::cout << cudaMalloc((void**) &ad, size*sizeof(double)) << std::endl;
std::cout << cudaMalloc((void**) &bd, size*sizeof(double)) << std::endl;
std::cout << cudaMalloc((void**) &cd, size*sizeof(double)) << std::endl;
std::cout << cudaMalloc((void**) &resultd, size*sizeof(double)) << std::endl;
std::cout << cudaMalloc((void**) &sized, sizeof(unsigned)) << std::endl;

cudaMemcpy((void**) &sized, &size, sizeof(unsigned), cudaMemcpyHostToDevice);

// print_array(a, size);
for(int i = 0; i < 1000; ++i)
{
cudaMemcpy(ad, a, size*sizeof(double), cudaMemcpyHostToDevice);
cudaMemcpy(bd, b, size*sizeof(double), cudaMemcpyHostToDevice);
cudaMemcpy(cd, c, size*sizeof(double), cudaMemcpyHostToDevice);
sum3<<<size, 1>>>(ad, bd, cd, resultd, sized);
cudaMemcpy(result, resultd, size*sizeof(double), cudaMemcpyDeviceToHost);
}

#ifdef PRINT
for( int i = 0; i < size; ++i)
{
std::cout << a[i] << ", "<< b[i] <<"," << c[i] << "," << result[i]<< std::endl;
}
#endif

cudaFree(ad);
cudaFree(bd);
cudaFree(cd);
cudaFree(resultd);

delete[] a;
delete[] b;
delete[] c;
delete[] result;

return 0;
}

在mac book pro 上编译没有任何问题。但是,当我尝试运行它时,我得到了

930, 22,538,899
691, 832,205,23
415, 655,148,120
872, 876,481,985
761, 909,583,619
841, 104,466,917
610, 635,911,52
//more useless numbers

我已经将我的样本与 Cuda By Example 中的样本进行了比较,除了类型之外,我没有发现任何主要差异。感谢任何有关此问题的指示。

最佳答案

while(i < (*n))
{
result[i] = (a[i] + b[i] + c[i]);
}

错了(无限)

这是错误的

cudaMemcpy((void**) &sized, &size, sizeof(unsigned), cudaMemcpyHostToDevice);

&sized是指针变量的地址,不是指针值

单个数字可以传递给堆栈上的设备,所以使用

unsigned size

检查您的 cuda 函数的返回状态,http://www.drdobbs.com/high-performance-computing/207603131

关于c++ - 简单 CUDA 代码中的数值错误,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/3644271/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com