gpt4 book ai didi

CudaMemcpy 不会将值从主机复制到设备

转载 作者:太空宇宙 更新时间:2023-11-04 04:37:00 26 4
gpt4 key购买 nike

我根据本教程创建了一个简单但完整的程序:http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#device-memory

#include <cuda.h>
#include <stdio.h>
#include <stdlib.h>

#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}

//Kernel definition
__global__ void VecAdd(float* A, float* B, float* C,int N)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if(i < N)
C[i] = A[i] + B[i];
}

//Host code
int main()
{
int N = 1000;
int i;
FILE *f;
size_t size = N * sizeof(float);

//allocate input vectors h_A and h_B in host memory
float *h_A = (float*)malloc(size);
float *h_B = (float*)malloc(size);
float *h_C = (float*)malloc(size);

//Initialize input vectors
f = fopen("A.txt","r");
for(i=0;i<N;i++)
fscanf(f,"%f ",&h_A[i]);
fclose(f);
f = fopen("B.txt","r");
for(i=0;i<N;i++)
fscanf(f,"%f ",&h_B[i]);
fclose(f);
//Allocate vactors in device memory
float *d_A;
gpuErrchk(cudaMalloc(&d_A,size));
float *d_B;
cudaMalloc(&d_B,size);
float *d_C;
cudaMalloc(&d_C,size);

gpuErrchk(cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice));
cudaMemcpy(d_B, h_B, size, cudaMemcpyHostToDevice);

//invoke kernel
int threadsPerBlock = 256;
int blocksPerGrid = (N + threadsPerBlock - 1) / threadsPerBlock;

VecAdd<<<blocksPerGrid, threadsPerBlock>>>(d_A, d_B, d_C, N);

cudaMemcpy(h_C, d_C, size, cudaMemcpyDeviceToHost);

f = fopen("C.txt","w");
printf("%f \n",h_C[i]);
for(i=0;i<1000;i++)
fprintf(f,"%f ",h_C[i]);
fclose(f);
printf("Zakonczono obliczenia\n");
// Free device memory
cudaFree(d_A);
cudaFree(d_B);
cudaFree(d_C);
//Free host memory
free(h_A);
free(h_B);
return 0;
}

它应该从文件中读取两个 vector ,将它们添加到设备上,然后将输出打印到“C.txt”文件中。但是,它会打印一千个零。

经过一些调试后,我找到了罪魁祸首——cudaMalloc 函数。

(cuda-gdb) n42      cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice);(cuda-gdb) n43      cudaMemcpy(d_B, h_B, size, cudaMemcpyHostToDevice);(cuda-gdb) print d_A[0]$1 = 0(cuda-gdb) print h_A[0]$2 = 3.66192293

我想知道为什么它不起作用,这部分代码是从教程中原始复制的。

最佳答案

看来我在安装CUDA后不得不重新启动我的电脑,不仅仅是注销和登录。之后我的程序可以运行,但我不能再调试它了。有

[Thread debugging using libthread_db enabled] Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1". fatal: All CUDA devices are used for display and cannot be used while debugging. (error code = 24).

就在“运行”之后。

关于CudaMemcpy 不会将值从主机复制到设备,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/29836275/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com