gpt4 book ai didi

c++ - CUDA __device__ 未解析的外部函数

转载 作者:可可西里 更新时间:2023-11-01 15:17:38 24 4
gpt4 key购买 nike

<分区>

我试图了解如何在单独的头文件中解耦 CUDA __device__ 代码。

我有三个文件。

文件:1:int2.cuh

#ifndef INT2_H_
#define INT2_H_

#include "cuda.h"
#include "cuda_runtime.h"
#include "device_launch_parameters.h"

__global__ void kernel();
__device__ int k2(int k);

int launchKernel(int dim);

#endif /* INT2_H_ */

文件2:int2.cu

#include "int2.cuh"
#include "cstdio"

__global__ void kernel() {
int tid = threadIdx.x;
printf("%d\n", k2(tid));
}

__device__ int k2(int i) {
return i * i;
}

int launchKernel(int dim) {
kernel<<<1, dim>>>();
cudaDeviceReset();
return 0;
}

文件 3:CUDASample.cu

include <stdio.h>
#include <stdlib.h>
#include "int2.cuh"
#include "iostream"

using namespace std;

static const int WORK_SIZE = 256;

__global__ void sampleCuda() {
int tid = threadIdx.x;
// printf("%d\n", k2(tid)); //Can not call k2
printf("%d\n", tid * tid);
}

int main(void) {

int var;
var = launchKernel(16);

kernel<<<1, 16>>>();
cudaDeviceReset();

sampleCuda<<<1, 16>>>();
cudaDeviceReset();

return 0;
}

代码运行良好。我可以调用 sampleCuda() 内核(在同一文件中),调用 C 函数 launchKernel()(在其他文件中),然后调用 kernel() 直接(在其他文件中)。

但是,从 sampleCuda() 内核调用 __device__ 函数时出现以下错误。可以在 kernel() 中调用相同的函数。

10:58:11 **** Incremental Build of configuration Debug for project CUDASample ****
make all
Building file: ../src/CUDASample.cu
Invoking: NVCC Compiler
/Developer/NVIDIA/CUDA-6.5/bin/nvcc -G -g -O0 -gencode arch=compute_20,code=sm_20 -odir "src" -M -o "src/CUDASample.d" "../src/CUDASample.cu"
/Developer/NVIDIA/CUDA-6.5/bin/nvcc -G -g -O0 --compile --relocatable-device-code=false -gencode arch=compute_20,code=compute_20 -gencode arch=compute_20,code=sm_20 -x cu -o "src/CUDASample.o" "../src/CUDASample.cu"
../src/CUDASample.cu(18): warning: variable "var" was set but never used

../src/CUDASample.cu(8): warning: variable "WORK_SIZE" was declared but never referenced

../src/CUDASample.cu(18): warning: variable "var" was set but never used

../src/CUDASample.cu(8): warning: variable "WORK_SIZE" was declared but never referenced

ptxas fatal : Unresolved extern function '_Z2k2i'
make: *** [src/CUDASample.o] Error 255

10:58:14 Build Finished (took 2s.388ms)

如何从 sampleCuda() 内核中调用 __device__ 函数?

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com