gpt4 book ai didi

c++ - CUDA 中设备类的类型限定符

转载 作者:塔克拉玛干 更新时间:2023-11-03 01:11:53 25 4
gpt4 key购买 nike

我目前正在尝试制作一段 CUDA 代码,其中包含一个仅在设备端使用的类(即主机不需要知道它的存在)。但是我无法为该类计算出正确的限定符(下面的 deviceclass):

__device__ float devicefunction (float *x) {return x[0]+x[1];}

class deviceclass {
private:
float _a;

public:
deviceclass(float *x) {_a = devicefunction(x);}

float getvalue () {return _a;}
};

// Device code
__global__ void VecInit(float* A, int N)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < N) {
deviceclass *test;

test = new deviceclass(1.0, 2.0);

A[i] = test->getvalue();
}
}

// Standard CUDA guff below: Variables
float *h_A, *d_A;

// Host code
int main(int argc, char** argv)
{
printf("Vector initialization...\n");
int N = 10000;
size_t size = N * sizeof(float);

// Allocate
h_A = (float*)malloc(size);
cudaMalloc(&d_A, size);

printf("Computing...\n");
// Invoke kernel
int threadsPerBlock = 256;
int blocksPerGrid = (N + threadsPerBlock - 1) / threadsPerBlock;
VecInit<<<blocksPerGrid, threadsPerBlock>>>(d_A, N);

// Copy result from device memory to host memory
cudaMemcpy(h_A, d_A, size, cudaMemcpyDeviceToHost);

//...etc
}

Deviceclass 设置为单独的 __device__ 会引发错误,因为它是从全局函数调用的,但是将其设置为 __device__ __host____global__ 似乎没有必要。有人可以指出我正确的方向吗?

最佳答案

事实证明,限定符必须在类的成员函数上进行,下面是一个完整的工作版本:

#include <iostream>
#include <stdio.h>
#include <stdlib.h>

using namespace std;

void Cleanup(void);


// Functions to be pointed to
__device__ float Plus (float a, float b) {return a+b;}

class deviceclass {

private:
float test;

public:
__device__ deviceclass(float a, float b) {
test = Plus(a,b);
}

__device__ float getvalue() {return test;}
};

// Device code
__global__ void VecInit(float* A, int N)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < N) {
deviceclass test(1.0, 2.0);

A[i] = test.getvalue();
}
}

// Standard CUDA guff below: Variables
float *h_A, *d_A;

// Host code
int main(int argc, char** argv)
{
printf("Vector initialization...\n");
int N = 10000;
size_t size = N * sizeof(float);

// Allocate
h_A = (float*)malloc(size);
cudaMalloc(&d_A, size);

printf("Computing...\n");
// Invoke kernel
int threadsPerBlock = 256;
int blocksPerGrid = (N + threadsPerBlock - 1) / threadsPerBlock;
VecInit<<<blocksPerGrid, threadsPerBlock>>>(d_A, N);

// Copy result from device memory to host memory
cudaMemcpy(h_A, d_A, size, cudaMemcpyDeviceToHost);



// Verify result
int i;
for (i = 0; i < N; ++i) {
cout << endl << h_A[i];
}

cout << endl;

Cleanup();
}

void Cleanup(void)
{
// Free device memory
if (d_A)
cudaFree(d_A);

// Free host memory
if (h_A)
free(h_A);

cudaThreadExit();

exit(0);
}

关于c++ - CUDA 中设备类的类型限定符,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/5078434/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com