gpt4 book ai didi

c++ - transform_reduce 中的推力异常 bulk_kernel_by_value

转载 作者:行者123 更新时间:2023-11-30 05:42:47 24 4
gpt4 key购买 nike

我正在研究一个优化问题,其中包含各种类似形式的数学函数,因此我将它们扭曲到 FunctionObj

template <typename T>
struct FunctionObj
{
T a;
FunctionObj(): a(1)
{
}
};

并定义了一个FuncEval来求值

template <typename T>
__host__ __device__ inline T FuncEval(const FunctionObj<T> &f_obj, T x)
{
return f_obj.a+x;
}

我真正想做的是 sum {func(x)},所以我定义了一个 FuncEvalF 仿函数来利用 thrust::tranform_reduce

template <typename T>
struct FuncEvalF
{
const FunctionObj<T>& f_obj;
__host__ __device__ inline FuncEvalF(const FunctionObj<T>& in_f_obj) :f_obj(in_f_obj)
{

}
__host__ __device__ inline T operator()(T x)
{
return FuncEval(f_obj, x);
}
};

template <typename T>
__host__ __device__ inline T BatchFuncEval(const FunctionObj<T> &f_obj, int size, const T *x_in);
template<>
inline float BatchFuncEval< float>(const FunctionObj<float> &f_obj, int size, const float *x_in)
{
return thrust::transform_reduce(thrust::device, thrust::device_pointer_cast(x_in), thrust::device_pointer_cast(x_in + size), FuncEvalF<float>(f_obj), static_cast<float>(0), thrust::plus<float>());
}

最后在 main.cu 中调用 transform_reduce

auto func = FuncEvalF<float>(FunctionObj<float>());
float result = 0;
try
{
result = thrust::transform_reduce(thrust::device, thrust::device_pointer_cast(dev_a), thrust::device_pointer_cast(dev_a + 10000), func, static_cast<float>(0), thrust::plus<float>());

}
catch (std::exception e)
{
printf("%s in thurst \n ", e.what());
}

这里出现异常:bulk_kernel_by_value,即使我将 10000 更改为 10。当我将 FuncEval 的定义更改为

时,事情只会变得更好
return x;

程序会输出正确但无意义的答案。我不禁要问我的代码有什么问题?感谢您的关注。完整代码如下,cuda 7.0 sm_20

#include <cuda_runtime.h>
#include <device_launch_parameters.h>

#include <thrust/device_vector.h>
#include <thrust/functional.h>
#include <thrust/inner_product.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/reduce.h>
#include <thrust/execution_policy.h>
#include <thrust/transform_reduce.h>
#include <thrust/transform.h>

#include <stdio.h>

template <typename T>
struct FunctionObj
{
T a;
FunctionObj(): a(1)
{
}

};

template <typename T>
__host__ __device__ inline T FuncEval(const FunctionObj<T> &f_obj, T x)
{
return f_obj.a+x;
}


template <typename T>
struct FuncEvalF
{
const FunctionObj<T>& f_obj;
__host__ __device__ inline FuncEvalF(const FunctionObj<T>& in_f_obj) :f_obj(in_f_obj)
{

}
__host__ __device__ inline T operator()(T x)
{
return FuncEval(f_obj, x);
}
};
template <typename T>
__host__ __device__ inline T BatchFuncEval(const FunctionObj<T> &f_obj, int size, const T *x_in);
template<>
inline float BatchFuncEval< float>(const FunctionObj<float> &f_obj, int size, const float *x_in)
{
return thrust::transform_reduce(thrust::device, thrust::device_pointer_cast(x_in), thrust::device_pointer_cast(x_in + size), FuncEvalF<float>(f_obj), static_cast<float>(0), thrust::plus<float>());
}
int main()
{
cudaError_t cudaE;
float a[10000] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
float* dev_a;
cudaE = cudaMalloc((void**)(&dev_a), sizeof(float) * 10000);
cudaE = cudaMemcpy(dev_a, a, sizeof(float) * 10000, cudaMemcpyHostToDevice);
auto func = FuncEvalF<float>(FunctionObj<float>());
float result = 0;
try
{
result = thrust::transform_reduce(thrust::device, thrust::device_pointer_cast(dev_a), thrust::device_pointer_cast(dev_a + 10000), func, static_cast<float>(0), thrust::plus<float>());

}
catch (std::exception e)
{
printf("%s in thurst \n ", e.what());
}
printf("the gpu float result is %f\n", result);
cudaFree(dev_a);
}

最佳答案

问题是 f_objstruct FuncEvalF 内是 const FunctionObj<T>& .

它在主机上被实例化为一个临时的 FunctionObj<float>() ,但以后对它的引用不再有效。

解决这个问题的一种方法是创建它的拷贝而不是持有对它的引用:

template <typename T>
struct FuncEvalF
{
FunctionObj<T> f_obj;
....
}

关于c++ - transform_reduce 中的推力异常 bulk_kernel_by_value,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/30523558/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com