gpt4 book ai didi

linux - CUDA编程: Compilation Error

转载 作者:太空宇宙 更新时间:2023-11-04 05:02:57 25 4
gpt4 key购买 nike

我正在制作一个 CUDA 程序,该程序实现对 N 个数字进行数据并行前缀和计算。我的代码还应该使用随机数生成器在主机上生成数字。但是,在尝试编译时,我似乎总是在 int main 的结束括号上遇到“无法识别的 token ”和“预期声明”错误。我在 Linux 上运行代码。

#include <stdio.h>
#include <cuda.h>
#include <stdlib.h>
#include <math.h>


__global__ void gpu_cal(int *a,int i, int n) {
int tid = blockIdx.x * blockDim.x + threadIdx.x;
if(tid>=i && tid < n) {
a[tid] = a[tid]+a[tid-i];
}
}


int main(void)
{
int key;
int *dev_a;
int N=10;//size of 1D array
int B=1;//blocks in the grid
int T=10;//threads in a block

do{

printf ("Some limitations:\n");
printf (" Maximum number of threads per block = 1024\n");
printf (" Maximum sizes of x-dimension of thread block = 1024\n");
printf (" Maximum size of each dimension of grid of thread blocks = 65535\n");
printf (" N<=B*T\n");

do{
printf("Enter size of array in one dimension, currently %d\n",N);
scanf("%d",&N);
printf("Enter size of blocks in the grid, currently %d\n",B);
scanf("%d",&B);
printf("Enter size of threads in a block, currently %d\n",T);
scanf("%d",&T);

if(N>B*T)
printf("N>B*T, this will result in an incorrect result generated by GPU, please try again\n");
if(T>1024)
printf("T>1024, this will result in an incorrect result generated by GPU, please try again\n");
}while((N>B*T)||(T>1024));

cudaEvent_t start, stop; // using cuda events to measure time
float elapsed_time_ms1, elapsed_time_ms3;

int a[N],gpu_result[N];//for result generated by GPU
int cpu_result[N];//CPU result


cudaMalloc((void**)&dev_a,N * sizeof(int));//allocate memory on GPU
int i,j;

srand(1); //initialize random number generator
for (i=0; i < N; i++) // load array with some numbers
a[i] = (int)rand() ;

cudaMemcpy(dev_a, a , N*sizeof(int),cudaMemcpyHostToDevice);//load data from host to device

cudaEventCreate(&start); // instrument code to measure start time
cudaEventCreate(&stop);

cudaEventRecord(start, 0);

//GPU computation
for(j=0;j<log(N)/log(2);j++){

gpu_cal<<<B,T>>>(dev_a,pow(2,j),N);

cudaThreadSynchronize();

}

cudaMemcpy(gpu_result,dev_a,N*sizeof(int),cudaMemcpyDeviceToHost);

cudaEventRecord(stop, 0); // instrument code to measue end time
cudaEventSynchronize(stop);
cudaEventElapsedTime(&elapsed_time_ms1, start, stop );

printf("\n\n\nTime to calculate results on GPU: %f ms.\n", elapsed_time_ms1); // print out execution time

//CPU computation
cudaEventRecord(start, 0);

for(i=0;i<N;i++)
{
cpu_result[i]=0;
for(j=0;j<=i;j++)
{
cpu_result[i]=cpu_result[i]+a[j];
}
}

cudaEventRecord(stop, 0); // instrument code to measue end time
cudaEventSynchronize(stop);
cudaEventElapsedTime(&elapsed_time_ms3, start, stop );

printf("Time to calculate results on CPU: %f ms.\n\n", elapsed_time_ms3); // print out execution time


//Error check
for(i=0;i < N;i++) {
if (gpu_result[i] != cpu_result[i] ) {
printf("ERROR!!! CPU and GPU create different answers\n");
break;
}
}
//Calculate speedup

printf("Speedup on GPU compared to CPU= %f\n", (float) elapsed_time_ms3 / (float) elapsed_time_ms1);

printf("\nN=%d",N);
printf("\nB=%d",B);
printf("\nT=%d",T);

printf("\n\n\nEnter '1' to repeat, or other integer to terminate\n");
scanf("%d",&key);

}while(key == 1);

cudaFree(dev_a);//deallocation

return 0;
}​

最佳答案

代码中的最后一个 } 是 Unicode 字符。如果删除整行,然后重新输入 },错误就会消失。

关于linux - CUDA编程: Compilation Error,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/27053974/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com