gpt4 book ai didi

c++ - 使用 CUDA Thrust 置换迭代器复制数组的特定元素

转载 作者:行者123 更新时间:2023-11-28 00:26:24 29 4
gpt4 key购买 nike

我有一个包含 count * 3 个元素的 glm::vec3 数组。我有另一个数组,其中包含要复制的元素的 int 索引。一个例子:

thrust::device_vector<glm::vec3> vals(9);
// vals contains 9 vec3, which represent 3 "items"
// vals[0], vals[1], vals[2] are the first "item",
// vals[3], vals[4], vals[5] are the second "item"...

int idcs[] = {0, 2};
// index 0 and 2 should be copied, i.e.
// vals[0..2] and vals[6..8]

我尝试使用置换迭代器,但我无法让它工作。我的做法是:

thrust::copy(
thrust::make_permutation_iterator(vals, idcs),
thrust::make_permutation_iterator(vals, idcs + 2),
target.begin()
);

当然这只会复制 vals[0]vals[2] 而不是 vals[0] vals[1] vals[2] vals[6] vals[7] vals[8]

是否可以使用 Thrust 将所需的值从一个缓冲区复制到另一个缓冲区?

最佳答案

我们可以结合strided ranges的想法用你的permutation iterator我认为,实现你想要的方法。

基本思想是使用排列迭代器方法选择要复制的项目“组”,我们将使用一组 3 个跨步范围迭代器组合成一个 zip 迭代器来选择每组中的 3 个项目。我们需要一个用于输入的 zip 迭代器和一个用于输出的 zip 迭代器。这是一个完整的示例,使用 uint3 作为 glm::vec3 的代理:

$ cat t484.cu
#include <vector_types.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <iostream>
#include <thrust/copy.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/functional.h>


#define DSIZE 18


template <typename Iterator>
class strided_range
{
public:

typedef typename thrust::iterator_difference<Iterator>::type difference_type;

struct stride_functor : public thrust::unary_function<difference_type,difference_type>
{
difference_type stride;

stride_functor(difference_type stride)
: stride(stride) {}

__host__ __device__
difference_type operator()(const difference_type& i) const
{
return stride * i;
}
};

typedef typename thrust::counting_iterator<difference_type> CountingIterator;
typedef typename thrust::transform_iterator<stride_functor, CountingIterator> TransformIterator;
typedef typename thrust::permutation_iterator<Iterator,TransformIterator> PermutationIterator;

// type of the strided_range iterator
typedef PermutationIterator iterator;

// construct strided_range for the range [first,last)
strided_range(Iterator first, Iterator last, difference_type stride)
: first(first), last(last), stride(stride) {}

iterator begin(void) const
{
return PermutationIterator(first, TransformIterator(CountingIterator(0), stride_functor(stride)));
}

iterator end(void) const
{
return begin() + ((last - first) + (stride - 1)) / stride;
}

protected:
Iterator first;
Iterator last;
difference_type stride;
};

typedef thrust::device_vector<uint3>::iterator Iter;

int main(){
// set up test data
int idcs[] = {0, 2, 5};
unsigned num_idcs = sizeof(idcs)/sizeof(int);
thrust::host_vector<uint3> h_vals(DSIZE);
for (int i = 0; i < DSIZE; i ++) {
h_vals[i].x = i;
h_vals[i].y = 100+i;
h_vals[i].z = 1000+i;}
thrust::device_vector<uint3> d_target(num_idcs*3);
thrust::host_vector<int> h_idcs(idcs, idcs + num_idcs);
thrust::device_vector<int> d_idcs = h_idcs;
thrust::device_vector<uint3> d_vals = h_vals;
// set up strided ranges for input, output
strided_range<Iter> item_1(d_vals.begin() , d_vals.end(), 3);
strided_range<Iter> item_2(d_vals.begin()+1, d_vals.end(), 3);
strided_range<Iter> item_3(d_vals.begin()+2, d_vals.end(), 3);
// set up strided ranges for output
strided_range<Iter> out_1(d_target.begin() , d_target.end(), 3);
strided_range<Iter> out_2(d_target.begin()+1, d_target.end(), 3);
strided_range<Iter> out_3(d_target.begin()+2, d_target.end(), 3);
// copy from input to output
thrust::copy(thrust::make_permutation_iterator(thrust::make_zip_iterator(thrust::make_tuple(item_1.begin(), item_2.begin(), item_3.begin())), d_idcs.begin()), thrust::make_permutation_iterator(thrust::make_zip_iterator(thrust::make_tuple(item_1.begin(), item_2.begin(), item_3.begin())), d_idcs.end()), thrust::make_zip_iterator(thrust::make_tuple(out_1.begin(), out_2.begin(), out_3.begin())));
// print out results
thrust::host_vector<uint3> h_target = d_target;
for (int i = 0; i < h_target.size(); i++)
std::cout << "index: " << i << " x: " << h_target[i].x << " y: " << h_target[i].y << " z: " << h_target[i].z << std::endl;
return 0;
}
$ nvcc -arch=sm_20 -o t484 t484.cu
$ ./t484
index: 0 x: 0 y: 100 z: 1000
index: 1 x: 1 y: 101 z: 1001
index: 2 x: 2 y: 102 z: 1002
index: 3 x: 6 y: 106 z: 1006
index: 4 x: 7 y: 107 z: 1007
index: 5 x: 8 y: 108 z: 1008
index: 6 x: 15 y: 115 z: 1015
index: 7 x: 16 y: 116 z: 1016
index: 8 x: 17 y: 117 z: 1017
$

关于c++ - 使用 CUDA Thrust 置换迭代器复制数组的特定元素,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/24793558/

29 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com