gpt4 book ai didi

cuda - 无效的设备符号 cudaMemcpyFromSymbol CUDA

转载 作者:行者123 更新时间:2023-12-02 09:21:38 24 4
gpt4 key购买 nike

我想计算 CUDA 中数组所有元素的总和。我想出了这段代码。它编译没有任何错误。但结果始终为零。我从 cudaMemcpyFromSymbol 获得了无效的设备符号。我无法使用 Thrust 或 Cublas 等任何库。

#define TRIALS_PER_THREAD 4096
#define NUM_BLOCKS 256
#define NUM_THREADS 256
double *dev;
__device__ volatile double pi_gpu = 0;

__global__ void ArraySum(double *array)

{
unsigned int tid = threadIdx.x + blockDim.x * blockIdx.x;
pi_gpu = pi_gpu + array[tid];
__syncthreads();
}

int main (int argc, char *argv[]) {
cudaMalloc((void **) &dev, NUM_BLOCKS * NUM_THREADS * sizeof(double));
double pi_gpu_h;

ArraySum<<<NUM_BLOCKS, NUM_THREADS>>>(dev);
cudaDeviceSynchronize();
cudaError err = cudaMemcpyFromSymbol(&pi_gpu_h, &pi_gpu, sizeof(double), cudaMemcpyDeviceToHost);
if( cudaSuccess != err )
{
fprintf( stderr, "cudaMemcpyFromSymbolfailed : %s\n", cudaGetErrorString( err ) );
exit( -1 );
}
return pi_gpu_h; // this is always zero!!!
}

最佳答案

符号调用副本中的符号参数不正确。它应该看起来像这样:

cudaMemcpyFromSymbol(&pi_gpu_h, pi_gpu, sizeof(double), 0, cudaMemcpyDeviceToHost)

关于cuda - 无效的设备符号 cudaMemcpyFromSymbol CUDA,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/42152619/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com