gpt4 book ai didi

cuda - 如何实现设备端CUDA虚拟功能?

转载 作者:行者123 更新时间:2023-12-01 12:39:00 25 4
gpt4 key购买 nike

我看到 CUDA 不允许将具有虚函数的类传递到内核函数中。此限制是否有任何解决方法?

我真的很希望能够在内核函数中使用多态。

谢谢!

最佳答案

Robert Crovella 评论中最重要的部分是:

The objects simply need to be created on the device.



所以记住这一点,我正在处理我有一个抽象 class Function 的情况。然后它的一些实现封装了不同的功能及其评估。这是我在我的情况下如何实现多态性的代码的简化版本,但我并不是说它不能做得更好......它希望能帮助你理解:
class Function
{
public:
__device__ Function() {}
__device__ virtual ~Function() {}
__device__ virtual void Evaluate(const real* __restrict__ positions, real* fitnesses, const SIZE_TYPE particlesCount) const = 0;
};

class FunctionRsj : public Function
{
private:
SIZE_TYPE m_DimensionsCount;
SIZE_TYPE m_PointsCount;
real* m_Y;
real* m_X;
public:
__device__ FunctionRsj(const SIZE_TYPE dimensionsCount, const SIZE_TYPE pointsCount, real* configFileData)
: m_DimensionsCount(dimensionsCount),
m_PointsCount(pointsCount),
m_Y(configFileData),
m_X(configFileData + pointsCount) {}

__device__ ~FunctionRsj()
{
// m_Y points to the beginning of the config
// file data, use it for destruction as this
// object took ownership of configFilDeata.
delete[] m_Y;
}

__device__ void Evaluate(const real* __restrict__ positions, real* fitnesses, const SIZE_TYPE particlesCount) const
{
// Implement evaluation of FunctionRsj here.
}
};

__global__ void evaluate_fitnesses(
const real* __restrict__ positions,
real* fitnesses,
Function const* const* __restrict__ function,
const SIZE_TYPE particlesCount)
{
// This whole kernel is just a proxy as kernels
// cannot be member functions.
(*function)->Evaluate(positions, fitnesses, particlesCount);
}

__global__ void create_function(
Function** function,
SIZE_TYPE dimensionsCount,
SIZE_TYPE pointsCount,
real* configFileData)
{
// It is necessary to create object representing a function
// directly in global memory of the GPU device for virtual
// functions to work correctly, i.e. virtual function table
// HAS to be on GPU as well.
if (threadIdx.x == 0 && blockIdx.x == 0)
{
(*function) = new FunctionRsj(dimensionsCount, pointsCount, configFileData);
}
}

__global__ void delete_function(Function** function)
{
delete *function;
}

int main()
{
// Lets just assume d_FunctionConfigData, d_Positions,
// d_Fitnesses are arrays allocated on GPU already ...

// Create function.
Function** d_Function;
cudaMalloc(&d_Function, sizeof(Function**));
create_function<<<1, 1>>>(d_Function, 10, 10, d_FunctionConfigData);

// Evaluate using proxy kernel.
evaluate_fitnesses<<<
m_Configuration.GetEvaluationGridSize(),
m_Configuration.GetEvaluationBlockSize(),
m_Configuration.GetEvaluationSharedMemorySize()>>>(
d_Positions,
d_Fitnesses,
d_Function,
m_Configuration.GetParticlesCount());

// Delete function object on GPU.
delete_function<<<1, 1>>>(d_Function);
}

关于cuda - 如何实现设备端CUDA虚拟功能?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/26812913/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com