c++ - Thrust CUDA 将 char * 分配给对象的 device

c++ - Thrust CUDA 将 char * 分配给对象的 device_vector

转载作者：搜寻专家更新时间：2023-10-31 01:04:20

我在将 host_vector 深度复制到 device_vector 时遇到问题。我认为我在修改存储在 device_vector 中的元素的值时遇到问题。您可以在底部找到一个可编译版本，但有问题的代码如下(我在触发段错误的行上加了星号):

thrust::host_vector<CharArr> hostToSort(size);
            thrust::host_vector<long long> hostToSortRow(size);
            for(int i =0; i < size; i++){
                CharArr sortRow;
                sortRow.value = arrayToSort[i];
                sortRow.length = strlen(arrayToSort[i]);
                hostToSort[i] = sortRow;
                hostToSortRow[i] = arrayToSortRow[i];
            }
            thrust::device_vector<CharArr> deviceArrayToSort =hostToSort;
            //thrust::copy(hostToSort.begin(),hostToSort.end(),deviceArrayToSort.begin());
//           = ;// (arrayToSort,arrayToSort + size);
            thrust::device_vector<long long> deviceArrayToSortRow = hostToSortRow;//(arrayToSortRow,arrayToSortRow + size);

           // thrust::sort(deviceArrayToSort.begin(),deviceArrayToSort.end());
            for(int i = 0; i < size; i++){

                char * hostString = hostToSort[i].value;
                int sizeString = strlen(hostString);
                char * deviceString = 0;

                CharArr * deviceCharArr = (&deviceArrayToSort[i]).get();
                cudaMalloc((void **) deviceString,sizeString);
                cudaMemcpy(deviceString,hostString,sizeString,cudaMemcpyHostToDevice);
            ****    deviceCharArr->length = sizeString;
            ****    deviceCharArr->value = deviceString;
            }

当我们到达实际任务时会发生什么
deviceCharArr->value = deviceString它会引发段错误。我是 CUDA 的新手，如果有明显的答案，我深表歉意，但我找不到很多在设备上分配 char * 的例子。

完整的可编译版本在这里

#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/reduce.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/sort.h>

#include <thrust/reduce.h>


typedef struct{

    char * value;
    int length;
} CharArr;


struct CharArrayCmp{
    __host__ __device__
      bool operator()(const CharArr & o1, const CharArr & o2) {
          return this->compare(o1.value,o1.length,o2.value,o2.length);
      }

    __host__ __device__ bool compare (const char * src, int lenSrc, const char * dst, int lenDest)
    {
        int end;
        if(lenSrc > lenDest){
            end = lenDest;
        }else{
            end = lenSrc;
        }
        for(int i = 0; i < end; i++){
            if(src[i] > dst[i]){
                return false;
            }else if(src[i] < dst[i]){
                return true;
            }
        }
        if(lenSrc >= lenDest){
            return false;
        }
        return true;
    }
};


void sortCharArrayHost(char ** arrayToSort, long long * arrayToSortRow,long long size){
    std::cout <<"about to start LongIndex" <<std::endl;

            thrust::host_vector<CharArr> hostToSort(size);
            thrust::host_vector<long long> hostToSortRow(size);
            for(int i =0; i < size; i++){
                CharArr sortRow;
                sortRow.value = arrayToSort[i];
                sortRow.length = strlen(arrayToSort[i]);
                hostToSort[i] = sortRow;
                hostToSortRow[i] = arrayToSortRow[i];
            }
            /*thrust::device_vector<CharArr> deviceArrayToSort =hostToSort;
            //thrust::copy(hostToSort.begin(),hostToSort.end(),deviceArrayToSort.begin());
//           = ;// (arrayToSort,arrayToSort + size);
            thrust::device_vector<long long> deviceArrayToSortRow = hostToSortRow;//(arrayToSortRow,arrayToSortRow + size);

           // thrust::sort(deviceArrayToSort.begin(),deviceArrayToSort.end());
            for(int i = 0; i < size; i++){
                char * deviceString = 0;
                char * hostString = hostToSort[i].value;
                int size = strlen(hostString)*sizeof(char);
                int cudaStatus;
                CharArr * deviceCharArr = (&deviceArrayToSort[i]).get();
                cudaStatus = cudaMalloc((void **) deviceString,size);
                cudaStatus = cudaMemcpy(deviceString,hostString,size,cudaMemcpyHostToDevice);

                (&deviceArrayToSort[i]).get()->value = "";
            }
*/
//          thrust::sort_by_key(deviceArrayToSort.begin(),deviceArrayToSort.end(),deviceArrayToSortRow.begin(),CharArrayCmp());
            thrust::sort_by_key(hostToSort.begin(),hostToSort.end(),hostToSortRow.begin(),CharArrayCmp());

            //copy the contents back into our original array to sort now sorted
          //  hostToSort = deviceArrayToSort;
            for(int i =0; i < size; i++){
                arrayToSort[i] = hostToSort[i].value;
            }
//          thrust::copy(deviceArrayToSortRow.begin(),deviceArrayToSortRow.end(),arrayToSortRow);
            thrust::copy(hostToSortRow.begin(),hostToSortRow.end(),arrayToSortRow);


}
void sortCharArrayDevice(char ** arrayToSort, long long * arrayToSortRow,long long size){
    std::cout <<"about to start LongIndex" <<std::endl;

            thrust::host_vector<CharArr> hostToSort(size);
            thrust::host_vector<long long> hostToSortRow(size);
            for(int i =0; i < size; i++){
                CharArr sortRow;
                sortRow.value = arrayToSort[i];
                sortRow.length = strlen(arrayToSort[i]);
                hostToSort[i] = sortRow;
                hostToSortRow[i] = arrayToSortRow[i];
            }
            thrust::device_vector<CharArr> deviceArrayToSort =hostToSort;
            //thrust::copy(hostToSort.begin(),hostToSort.end(),deviceArrayToSort.begin());
//           = ;// (arrayToSort,arrayToSort + size);
            thrust::device_vector<long long> deviceArrayToSortRow = hostToSortRow;//(arrayToSortRow,arrayToSortRow + size);

           // thrust::sort(deviceArrayToSort.begin(),deviceArrayToSort.end());
            for(int i = 0; i < size; i++){

                char * hostString = hostToSort[i].value;
                int sizeString = strlen(hostString);
                char * deviceString = 0;

                CharArr * deviceCharArr = (&deviceArrayToSort[i]).get();
                cudaMalloc((void **) deviceString,sizeString);
                cudaMemcpy(deviceString,hostString,sizeString,cudaMemcpyHostToDevice);
                deviceCharArr->length = sizeString;
                deviceCharArr->value = deviceString;
            }

            thrust::sort_by_key(deviceArrayToSort.begin(),deviceArrayToSort.end(),deviceArrayToSortRow.begin(),CharArrayCmp());
        //copy the contents back into our original array to sort now sorted
            for(int i =0; i < size; i++){
                arrayToSort[i] = (&deviceArrayToSort[i]).get()->value;
            }
            thrust::copy(deviceArrayToSortRow.begin(),deviceArrayToSortRow.end(),arrayToSortRow);



}
int main()
{
    char ** charArr = new char*[10];

    charArr[0] = "zyxw";
    charArr[1] = "abcd";
    charArr[2] = "defg";
    charArr[3] = "werd";
    charArr[4] = "aasd";
    charArr[5] = "zwedew";
    charArr[6] = "asde";
    charArr[7] = "rurt";
    charArr[8] = "ntddwe";
    charArr[9] = "erbfde";

    long long * rows = new long long[10];
    for(int i = 0; i < 10;i++ ){
        rows[i] = i;
    }

    sortCharArrayHost(charArr,rows,10);

    for(int i = 0; i < 10; i++){
        std::cout<<"Row is "<<rows[i]<<" String is "<<charArr[i]<<std::endl;

    }


    charArr[0] = "zyxw";
    charArr[1] = "abcd";
    charArr[2] = "defg";
    charArr[3] = "werd";
    charArr[4] = "aasd";
    charArr[5] = "zwedew";
    charArr[6] = "asde";
    charArr[7] = "rurt";
    charArr[8] = "ntddwe";
    charArr[9] = "erbfde";


    for(int i = 0; i < 10;i++ ){
        rows[i] = i;
    }
    sortCharArrayDevice(charArr,rows,10);

        for(int i = 0; i < 10; i++){
            std::cout<<"Row is "<<rows[i]<<" String is "<<charArr[i]<<std::endl;

        }

}

最佳答案

正如 JackOLantern 已经指出的那样，这是 Not Acceptable :

// this creates an allocation on the device
thrust::device_vector<CharArr> deviceArrayToSort =hostToSort;
// this takes the (device) address an element and assigns it to a pointer variable
CharArr * deviceCharArr = (&deviceArrayToSort[i]).get();
// this then dereferences a device pointer in host code which is illegal
deviceCharArr->length = sizeString;

在 CUDA 中，您不允许在主机代码中取消引用设备指针，反之亦然。

您似乎有以下数据集:

待排序的字符串
由 CharArr 对象组成的字符串“句柄”数组，每个对象都包含指向字符串开头和长度的指针
字符串索引数组(即 0、1、2、...)

你想根据 1 对上面的 2 和 3 进行排序。如果可能的话，将“喜欢”的所有内容都放在一个或两个 vector 中。让我们尝试以下操作:

将所有字符串连接成一个 char vector 。
在另一个 int vector 中标记每个字符串的起始索引。连续起始索引的差异将构成每个字符串的长度。我们将使用 zip_iterator 将每个字符串的开始和长度组合成一个 thrust::tuple 以供比较器使用
使用所需的比较仿函数对“元组数组”进行排序(即同时对索引和长度进行排序)。其他数据的任何必要重新排列都可以使用重新排序的索引 vector 来完成。
如果您还需要重新排序的字符串索引(即 0、1、2 等)，您可以很容易地创建该 vector 并将其作为第三个元素添加到要排序的元组中。

请注意，上述方法完全避免了指针的使用，如您所见，管理同一数据的主机和设备拷贝可能很麻烦。

这是一个完整的例子:

$ cat t439.cu
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/sort.h>
#include <thrust/copy.h>

#define NUM_STRINGS 10



struct stringCmp{

  const char * strings;

  stringCmp(char * _strings) : strings(_strings) {}

template<typename myTuple>
    __host__ __device__
      bool operator()(const myTuple & o1, const myTuple & o2) {
        int idxSrc = thrust::get<0>(o1);
        int lenSrc = thrust::get<1>(o1);
        int idxDst = thrust::get<0>(o2);
        int lenDst = thrust::get<1>(o2);
        int end;
        if(lenSrc > lenDst){
            end = lenDst;
        }else{
            end = lenSrc;
        }
        for(int i = 0; i < end; i++){
            if(strings[idxSrc+i] > strings[idxDst+i]){
                return false;
            }else if(strings[idxSrc+i] < strings[idxDst+i]){
                return true;
            }
        }
        if(lenSrc >= lenDst){
            return false;
        }
        return true;
    }
};

void sortCharArrayDevice(char ** arr, int *rows, int num_str){

    thrust::host_vector<char> h_strings;
    thrust::host_vector<int>  h_st_idx(num_str);
    thrust::host_vector<int>  h_len(num_str);
    thrust::host_vector<int>  h_rows(num_str);
    // concatenate strings
    // assume no zero length strings
    h_st_idx[0] = 0;
    for (int i = 0; i < num_str; i++){
      int sidx = 0;
      while (arr[i][sidx] != '\0'){
        h_strings.push_back(arr[i][sidx]);
        sidx++;}
      h_len[i] = sidx;
      if (i < num_str-1) h_st_idx[i+1] = h_st_idx[i] + sidx;
      h_rows[i] = rows[i];
      }
    // copy data to device
    thrust::device_vector<char> d_strings = h_strings;
    thrust::device_vector<int>  d_st_idx = h_st_idx;
    thrust::device_vector<int>  d_len = h_len;
    thrust::device_vector<int>  d_rows = h_rows;
    // sort on device
    thrust::sort(thrust::make_zip_iterator(thrust::make_tuple(d_st_idx.begin(), d_len.begin(), d_rows.begin())), thrust::make_zip_iterator(thrust::make_tuple(d_st_idx.end(), d_len.end(), d_rows.end())), stringCmp(thrust::raw_pointer_cast(d_strings.data())));
    thrust::copy(d_rows.begin(), d_rows.end(), rows);
}


int main()
{
    char ** charArr = new char*[NUM_STRINGS];

    charArr[0] = "zyxw";
    charArr[1] = "abcd";
    charArr[2] = "defg";
    charArr[3] = "werd";
    charArr[4] = "aasd";
    charArr[5] = "zwedew";
    charArr[6] = "asde";
    charArr[7] = "rurt";
    charArr[8] = "ntddwe";
    charArr[9] = "erbfde";

    int * rows = new int[NUM_STRINGS];
    for(int i = 0; i < NUM_STRINGS;i++ ){
        rows[i] = i;
    }

    sortCharArrayDevice(charArr,rows,NUM_STRINGS);

        for(int i = 0; i < NUM_STRINGS; i++){
            std::cout<<"Row is "<<rows[i]<<" String is "<<charArr[rows[i]]<<std::endl;

        }

}
$ nvcc -arch=sm_20 -o t439 t439.cu
$ ./t439
Row is 4 String is aasd
Row is 1 String is abcd
Row is 6 String is asde
Row is 2 String is defg
Row is 9 String is erbfde
Row is 8 String is ntddwe
Row is 7 String is rurt
Row is 3 String is werd
Row is 5 String is zwedew
Row is 0 String is zyxw
$

关于c++ - Thrust CUDA 将 char * 分配给对象的 device_vector，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/24126610/

文章推荐： c++ - 为什么指针的内容会改变？

文章推荐： c++ - Boost Spirit - 将列表提取为单个字符串

文章推荐： C++ Iterator generalize - 不同的容器，相同的类型

文章推荐： c++ - c2143, c4430 错误不知道为什么

cuda - Thrust::min_element 在 Thrust::device_vector 上发生崩溃(CUDA Thrust)
以下 CUDA Thrust 程序崩溃: #include #include int main(void) { thrust::device_vector vec; for (int i(
c++ - thrust::device_vector 使用 thrust::replace 或 thrust::transform 自定义仿函数/谓词
我使用 cuda 内核对推力 vector 执行 S 形激活: thrust::device_vector output = input; float * output_ptr = thrust::r
c++ - thrust::complex with thrust reduce 无法编译
我一直在尝试实现一些需要在 thrust::complexes 上调用 reduce 的代码，编译器向我发出错误消息: cannot pass an argument with a user-prov
c++ - cuda thrust::for_each with thrust::counting_iterator
我是 CUDA 的新手，而且很吃力。当提供 counting_iterator 时，我似乎无法让 thrust::for_each 算法工作。这是我的简单仿函数: struct print_Funct
c++ - thrust::device_vector of thrust::complex 编译错误，可能是由于错误的实现
我实际上正在学习CUDA和thrust，我正在尝试用.cpp做一个项目，。 hpp 文件和 .cu, .cuh 文件。因此，我做了第一个小实现(见下面的代码)，但是我有一个编译错误。这是 output
c++ - 如何使用 CUDA Thrust 执行策略覆盖 Thrust 的低级设备内存分配器
我想覆盖低级 CUDA 设备内存分配器(实现为 thrust::system::cuda::detail::malloc())，以便它使用自定义分配器而不是直接调用 cudaMalloc()在主机 (
c++ - 如何将二维 thrust::device_vector> 转换为原始指针
当我在main函数中使用thrust::device_vector时，可以正确的传递给内核函数，代码如下: thrust::device_vector device_a(2); thrust::h
c++ - Thrust device vector of thrust device vector 推力装置 vector
我在 CUDA 中使用这种 vector 方法的 vector 方法，因为我仍然习惯于 Matlab 和 Python 风格的编程环境。我能够从设备 vector 中的主机端提取数据，但现在我不确定如
c++ - 命名空间 thrust::system::cuda::thrust 中无法解释的错误，特别是在 "system_error"和 "cuda_category"
我正在尝试使用 thrust::raw_pointer_cast 转换原始指针以捕获仿函数中的输出。我尝试了多种方法来将指针传递给 float ，但不断出现内存冲突和两个智能感知错误 thrust::
thrust 学习笔记
gather与scatter正好相反： scatter是顺序输入根据map确定撒点输出位置。 #include #include #include ... // mark even indice
cuda - Thrust 是同步还是异步？
我是 Thrust 的新手，有件事我不明白。 Thrust 是异步还是同步？如果我编写以下代码，所花费的时间不是0。但在其他标签中，其他用户报告的结果为0。真相是什么？ clock_t start,
thrust - 编译器不支持#pragma Once
我的编译器 (PGI) 不支持 #pragma once 但是我想包含的库(推力)使用它们。这个问题有解决办法吗？最佳答案您可以使用guardonce将 #pragma Once 语句转换为标准
cuda - Thrust::remove_if的返回值类型
我的设备上有两个整数数组 dmap 和 dflag相同的长度我用推力设备指针 dmapt 和dflagt dmap 数组中有一些值为 -1 的元素。我想要删除这些 -1 和相应的值dflag 数组。
cuda - Thrust 如何知道如何自动配置它启动的内核？
Thrust 能够对编码器隐藏各种细节，并且声称 Thrust 会根据系统规范在一定程度上设置参数。 Thrust 如何选择最佳参数化，以及如何处理不同机器上的各种代码？ Thrust 实现这种通用库
cuda - Thrust 设备管理和内核
我在当前项目中使用了 Thrust，所以我不必写 device_vector自己抽象或(分段)扫描内核。到目前为止，我已经使用推力抽象完成了我的所有工作，但是对于简单的内核或不容易转换为 for_e
c++ - Thrust 中的虚方法调用
我想做这样的事情: BaseFunctor* f = new MyFunctor(); thrust::transform(it1,it2,MyFunctor); 目标是让用户能够传递不同的仿函数(具
c++ - Thrust 对主机上运行的自定义仿函数的结果不正确
当我尝试实现任何仿函数时，我得到了不好的结果。例如，我尝试了一个类似于 thrust::negate 的否定仿函数下面是一个示例代码，它使用内置的否定仿函数产生了良好的结果: int data[10]
在 thrust 中调用用户定义的函数
我正在使用 OpenCV 加载一个 .png 文件，我想使用 thrust 库提取它的蓝色强度值。我的代码是这样的: 使用 OpenCV IplImage 指针加载图像将图像数据复制到thrust
c++ - Thrust+boost代码编译错误
我有一个奇怪的问题，我无法解决。它与 boost +推力代码相关联。代码: #include #include #include #include #include #include #
cuda - 使用 Thrust 的向量数组
是否可以使用 Thrust 创建一个 device_vectors 数组？我知道我不能创建一个 device_vector 的 device_vector，但是我将如何创建一个 device_vect

搜寻专家

个人简介

我是一名优秀的程序员,十分优秀！

作者热门文章

滴滴打车优惠券免费领取

全站热门文章

首页

博学

6Ren·AI

商城

c++ - Thrust CUDA 将 char * 分配给对象的 device_vector