gpt4 book ai didi

使用 double2 阵列减少 CUDA 推力

转载 作者:行者123 更新时间:2023-12-01 18:39:53 25 4
gpt4 key购买 nike

我有以下(可编译和可执行)代码,使用 CUDA Thrust 来执行 float2 数组的缩减。它工作正常

using namespace std;

// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <conio.h>

#include <typeinfo>
#include <iostream>

// includes CUDA
#include <cuda.h>
#include <cuda_runtime.h>

// includes Thrust
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/reduce.h>

// float2 + struct
struct add_float2 {
__device__ float2 operator()(const float2& a, const float2& b) const {
float2 r;
r.x = a.x + b.x;
r.y = a.y + b.y;
return r;
}
};

// double2 + struct
struct add_double2 {
__device__ double2 operator()(const double2& a, const double2& b) const {
double2 r;
r.x = a.x + b.x;
r.y = a.y + b.y;
return r;
}
};

void main( int argc, char** argv)
{
int N = 20;

// --- Host
float2* ha; ha = (float2*) malloc(N*sizeof(float2));
for (unsigned i=0; i<N; ++i) {
ha[i].x = 1;
ha[i].y = 2;
}

// --- Device
float2* da; cudaMalloc((void**)&da,N*sizeof(float2));
cudaMemcpy(da,ha,N*sizeof(float2),cudaMemcpyHostToDevice);

thrust::device_ptr<float2> dev_ptr_1(da);
thrust::device_ptr<float2> dev_ptr_2(da+N);

float2 init; init.x = init.y = 0.0f;

float2 sum = thrust::reduce(dev_ptr_1,dev_ptr_2,init,add_float2());

cout << " Real part = " << sum.x << "; Imaginary part = " << sum.y << endl;

getch();

}

但是,当我在main程序中将float2更改为double2时,即

void main( int argc, char** argv) 
{
int N = 20;

// --- Host
double2* ha; ha = (double2*) malloc(N*sizeof(double2));
for (unsigned i=0; i<N; ++i) {
ha[i].x = 1;
ha[i].y = 2;
}

// --- Device
double2* da; cudaMalloc((void**)&da,N*sizeof(double2));
cudaMemcpy(da,ha,N*sizeof(double2),cudaMemcpyHostToDevice);

thrust::device_ptr<double2> dev_ptr_1(da);
thrust::device_ptr<double2> dev_ptr_2(da+N);

double2 init; init.x = init.y = 0.0;

double2 sum = thrust::reduce(dev_ptr_1,dev_ptr_2,init,add_double2());

cout << " Real part = " << sum.x << "; Imaginary part = " << sum.y << endl;

getch();

}

我在 reduce 行收到一个异常。如何通过 double2 数组使用 CUDA 推力减小?我做错了什么吗?提前致谢。

根据 TALONMIES 的回答得出的可行解决方案

使用命名空间 std;

// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <conio.h>

#include <typeinfo>
#include <iostream>

// includes CUDA
#include <cuda.h>
#include <cuda_runtime.h>

// includes Thrust
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/reduce.h>

struct my_double2 {
double x, y;
};

// double2 + struct
struct add_my_double2 {
__device__ my_double2 operator()(const my_double2& a, const my_double2& b) const {
my_double2 r;
r.x = a.x + b.x;
r.y = a.y + b.y;
return r;
}
};

void main( int argc, char** argv)
{
int N = 20;

// --- Host
my_double2* ha; ha = (my_double2*) malloc(N*sizeof(my_double2));
for (unsigned i=0; i<N; ++i) {
ha[i].x = 1;
ha[i].y = 2;
}

// --- Device
my_double2* da; cudaMalloc((void**)&da,N*sizeof(my_double2));
cudaMemcpy(da,ha,N*sizeof(my_double2),cudaMemcpyHostToDevice);

thrust::device_ptr<my_double2> dev_ptr_1(da);
thrust::device_ptr<my_double2> dev_ptr_2(da+N);

my_double2 init; init.x = init.y = 0.0;

cout << "here3\n";
my_double2 sum = thrust::reduce(dev_ptr_1,dev_ptr_2,init,add_my_double2());

cout << " Real part = " << sum.x << "; Imaginary part = " << sum.y << endl;

getch();

}

最佳答案

这是与 MSVC 和 nvcc 的已知不兼容性。请参阅here例如。解决方案是定义您自己的 double2 版本并使用它。

仅供引用,我可以在具有 CUDA 5.5 的 Linux 64 位机器上正确编译并运行您的代码。

关于使用 double2 阵列减少 CUDA 推力,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/18123407/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com