gpt4 book ai didi

c++ - Thrust CUDA 将 char * 分配给对象的 device_vector

转载 作者:搜寻专家 更新时间:2023-10-31 01:04:20 25 4
gpt4 key购买 nike

我在将 host_vector 深度复制到 device_vector 时遇到问题。我认为我在修改存储在 device_vector 中的元素的值时遇到问题。您可以在底部找到一个可编译版本,但有问题的代码如下(我在触发段错误的行上加了星号):

thrust::host_vector<CharArr> hostToSort(size);
thrust::host_vector<long long> hostToSortRow(size);
for(int i =0; i < size; i++){
CharArr sortRow;
sortRow.value = arrayToSort[i];
sortRow.length = strlen(arrayToSort[i]);
hostToSort[i] = sortRow;
hostToSortRow[i] = arrayToSortRow[i];
}
thrust::device_vector<CharArr> deviceArrayToSort =hostToSort;
//thrust::copy(hostToSort.begin(),hostToSort.end(),deviceArrayToSort.begin());
// = ;// (arrayToSort,arrayToSort + size);
thrust::device_vector<long long> deviceArrayToSortRow = hostToSortRow;//(arrayToSortRow,arrayToSortRow + size);

// thrust::sort(deviceArrayToSort.begin(),deviceArrayToSort.end());
for(int i = 0; i < size; i++){

char * hostString = hostToSort[i].value;
int sizeString = strlen(hostString);
char * deviceString = 0;

CharArr * deviceCharArr = (&deviceArrayToSort[i]).get();
cudaMalloc((void **) deviceString,sizeString);
cudaMemcpy(deviceString,hostString,sizeString,cudaMemcpyHostToDevice);
**** deviceCharArr->length = sizeString;
**** deviceCharArr->value = deviceString;
}

当我们到达实际任务时会发生什么
deviceCharArr->value = deviceString它会引发段错误。我是 CUDA 的新手,如果有明显的答案,我深表歉意,但我找不到很多在设备上分配 char * 的例子。

完整的可编译版本在这里

#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/reduce.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/sort.h>

#include <thrust/reduce.h>


typedef struct{

char * value;
int length;
} CharArr;


struct CharArrayCmp{
__host__ __device__
bool operator()(const CharArr & o1, const CharArr & o2) {
return this->compare(o1.value,o1.length,o2.value,o2.length);
}

__host__ __device__ bool compare (const char * src, int lenSrc, const char * dst, int lenDest)
{
int end;
if(lenSrc > lenDest){
end = lenDest;
}else{
end = lenSrc;
}
for(int i = 0; i < end; i++){
if(src[i] > dst[i]){
return false;
}else if(src[i] < dst[i]){
return true;
}
}
if(lenSrc >= lenDest){
return false;
}
return true;
}
};


void sortCharArrayHost(char ** arrayToSort, long long * arrayToSortRow,long long size){
std::cout <<"about to start LongIndex" <<std::endl;

thrust::host_vector<CharArr> hostToSort(size);
thrust::host_vector<long long> hostToSortRow(size);
for(int i =0; i < size; i++){
CharArr sortRow;
sortRow.value = arrayToSort[i];
sortRow.length = strlen(arrayToSort[i]);
hostToSort[i] = sortRow;
hostToSortRow[i] = arrayToSortRow[i];
}
/*thrust::device_vector<CharArr> deviceArrayToSort =hostToSort;
//thrust::copy(hostToSort.begin(),hostToSort.end(),deviceArrayToSort.begin());
// = ;// (arrayToSort,arrayToSort + size);
thrust::device_vector<long long> deviceArrayToSortRow = hostToSortRow;//(arrayToSortRow,arrayToSortRow + size);

// thrust::sort(deviceArrayToSort.begin(),deviceArrayToSort.end());
for(int i = 0; i < size; i++){
char * deviceString = 0;
char * hostString = hostToSort[i].value;
int size = strlen(hostString)*sizeof(char);
int cudaStatus;
CharArr * deviceCharArr = (&deviceArrayToSort[i]).get();
cudaStatus = cudaMalloc((void **) deviceString,size);
cudaStatus = cudaMemcpy(deviceString,hostString,size,cudaMemcpyHostToDevice);

(&deviceArrayToSort[i]).get()->value = "";
}
*/
// thrust::sort_by_key(deviceArrayToSort.begin(),deviceArrayToSort.end(),deviceArrayToSortRow.begin(),CharArrayCmp());
thrust::sort_by_key(hostToSort.begin(),hostToSort.end(),hostToSortRow.begin(),CharArrayCmp());

//copy the contents back into our original array to sort now sorted
// hostToSort = deviceArrayToSort;
for(int i =0; i < size; i++){
arrayToSort[i] = hostToSort[i].value;
}
// thrust::copy(deviceArrayToSortRow.begin(),deviceArrayToSortRow.end(),arrayToSortRow);
thrust::copy(hostToSortRow.begin(),hostToSortRow.end(),arrayToSortRow);


}
void sortCharArrayDevice(char ** arrayToSort, long long * arrayToSortRow,long long size){
std::cout <<"about to start LongIndex" <<std::endl;

thrust::host_vector<CharArr> hostToSort(size);
thrust::host_vector<long long> hostToSortRow(size);
for(int i =0; i < size; i++){
CharArr sortRow;
sortRow.value = arrayToSort[i];
sortRow.length = strlen(arrayToSort[i]);
hostToSort[i] = sortRow;
hostToSortRow[i] = arrayToSortRow[i];
}
thrust::device_vector<CharArr> deviceArrayToSort =hostToSort;
//thrust::copy(hostToSort.begin(),hostToSort.end(),deviceArrayToSort.begin());
// = ;// (arrayToSort,arrayToSort + size);
thrust::device_vector<long long> deviceArrayToSortRow = hostToSortRow;//(arrayToSortRow,arrayToSortRow + size);

// thrust::sort(deviceArrayToSort.begin(),deviceArrayToSort.end());
for(int i = 0; i < size; i++){

char * hostString = hostToSort[i].value;
int sizeString = strlen(hostString);
char * deviceString = 0;

CharArr * deviceCharArr = (&deviceArrayToSort[i]).get();
cudaMalloc((void **) deviceString,sizeString);
cudaMemcpy(deviceString,hostString,sizeString,cudaMemcpyHostToDevice);
deviceCharArr->length = sizeString;
deviceCharArr->value = deviceString;
}

thrust::sort_by_key(deviceArrayToSort.begin(),deviceArrayToSort.end(),deviceArrayToSortRow.begin(),CharArrayCmp());
//copy the contents back into our original array to sort now sorted
for(int i =0; i < size; i++){
arrayToSort[i] = (&deviceArrayToSort[i]).get()->value;
}
thrust::copy(deviceArrayToSortRow.begin(),deviceArrayToSortRow.end(),arrayToSortRow);



}
int main()
{
char ** charArr = new char*[10];

charArr[0] = "zyxw";
charArr[1] = "abcd";
charArr[2] = "defg";
charArr[3] = "werd";
charArr[4] = "aasd";
charArr[5] = "zwedew";
charArr[6] = "asde";
charArr[7] = "rurt";
charArr[8] = "ntddwe";
charArr[9] = "erbfde";

long long * rows = new long long[10];
for(int i = 0; i < 10;i++ ){
rows[i] = i;
}

sortCharArrayHost(charArr,rows,10);

for(int i = 0; i < 10; i++){
std::cout<<"Row is "<<rows[i]<<" String is "<<charArr[i]<<std::endl;

}


charArr[0] = "zyxw";
charArr[1] = "abcd";
charArr[2] = "defg";
charArr[3] = "werd";
charArr[4] = "aasd";
charArr[5] = "zwedew";
charArr[6] = "asde";
charArr[7] = "rurt";
charArr[8] = "ntddwe";
charArr[9] = "erbfde";


for(int i = 0; i < 10;i++ ){
rows[i] = i;
}
sortCharArrayDevice(charArr,rows,10);

for(int i = 0; i < 10; i++){
std::cout<<"Row is "<<rows[i]<<" String is "<<charArr[i]<<std::endl;

}

}

最佳答案

正如 JackOLantern 已经指出的那样,这是 Not Acceptable :

// this creates an allocation on the device
thrust::device_vector<CharArr> deviceArrayToSort =hostToSort;
// this takes the (device) address an element and assigns it to a pointer variable
CharArr * deviceCharArr = (&deviceArrayToSort[i]).get();
// this then dereferences a device pointer in host code which is illegal
deviceCharArr->length = sizeString;

在 CUDA 中,您不允许在主机代码中取消引用设备指针,反之亦然。

您似乎有以下数据集:

  1. 待排序的字符串
  2. CharArr 对象组成的字符串“句柄”数组,每个对象都包含指向字符串开头和长度的指针
  3. 字符串索引数组(即 0、1、2、...)

你想根据 1 对上面的 2 和 3 进行排序。如果可能的话,将“喜欢”的所有内容都放在一个或两个 vector 中。让我们尝试以下操作:

  1. 将所有字符串连接成一个 char vector 。
  2. 在另一个 int vector 中标记每个字符串的起始索引。连续起始索引的差异将构成每个字符串的长度。我们将使用 zip_iterator 将每个字符串的开始和长度组合成一个 thrust::tuple 以供比较器使用
  3. 使用所需的比较仿函数对“元组数组”进行排序(即同时对索引和长度进行排序)。其他数据的任何必要重新排列都可以使用重新排序的索引 vector 来完成。
  4. 如果您还需要重新排序的字符串索引(即 0、1、2 等),您可以很容易地创建该 vector 并将其作为第三个元素添加到要排序的元组中。

请注意,上述方法完全避免了指针的使用,如您所见,管理同一数据的主机和设备拷贝可能很麻烦。

这是一个完整的例子:

$ cat t439.cu
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/sort.h>
#include <thrust/copy.h>

#define NUM_STRINGS 10



struct stringCmp{

const char * strings;

stringCmp(char * _strings) : strings(_strings) {}

template<typename myTuple>
__host__ __device__
bool operator()(const myTuple & o1, const myTuple & o2) {
int idxSrc = thrust::get<0>(o1);
int lenSrc = thrust::get<1>(o1);
int idxDst = thrust::get<0>(o2);
int lenDst = thrust::get<1>(o2);
int end;
if(lenSrc > lenDst){
end = lenDst;
}else{
end = lenSrc;
}
for(int i = 0; i < end; i++){
if(strings[idxSrc+i] > strings[idxDst+i]){
return false;
}else if(strings[idxSrc+i] < strings[idxDst+i]){
return true;
}
}
if(lenSrc >= lenDst){
return false;
}
return true;
}
};

void sortCharArrayDevice(char ** arr, int *rows, int num_str){

thrust::host_vector<char> h_strings;
thrust::host_vector<int> h_st_idx(num_str);
thrust::host_vector<int> h_len(num_str);
thrust::host_vector<int> h_rows(num_str);
// concatenate strings
// assume no zero length strings
h_st_idx[0] = 0;
for (int i = 0; i < num_str; i++){
int sidx = 0;
while (arr[i][sidx] != '\0'){
h_strings.push_back(arr[i][sidx]);
sidx++;}
h_len[i] = sidx;
if (i < num_str-1) h_st_idx[i+1] = h_st_idx[i] + sidx;
h_rows[i] = rows[i];
}
// copy data to device
thrust::device_vector<char> d_strings = h_strings;
thrust::device_vector<int> d_st_idx = h_st_idx;
thrust::device_vector<int> d_len = h_len;
thrust::device_vector<int> d_rows = h_rows;
// sort on device
thrust::sort(thrust::make_zip_iterator(thrust::make_tuple(d_st_idx.begin(), d_len.begin(), d_rows.begin())), thrust::make_zip_iterator(thrust::make_tuple(d_st_idx.end(), d_len.end(), d_rows.end())), stringCmp(thrust::raw_pointer_cast(d_strings.data())));
thrust::copy(d_rows.begin(), d_rows.end(), rows);
}


int main()
{
char ** charArr = new char*[NUM_STRINGS];

charArr[0] = "zyxw";
charArr[1] = "abcd";
charArr[2] = "defg";
charArr[3] = "werd";
charArr[4] = "aasd";
charArr[5] = "zwedew";
charArr[6] = "asde";
charArr[7] = "rurt";
charArr[8] = "ntddwe";
charArr[9] = "erbfde";

int * rows = new int[NUM_STRINGS];
for(int i = 0; i < NUM_STRINGS;i++ ){
rows[i] = i;
}

sortCharArrayDevice(charArr,rows,NUM_STRINGS);

for(int i = 0; i < NUM_STRINGS; i++){
std::cout<<"Row is "<<rows[i]<<" String is "<<charArr[rows[i]]<<std::endl;

}

}
$ nvcc -arch=sm_20 -o t439 t439.cu
$ ./t439
Row is 4 String is aasd
Row is 1 String is abcd
Row is 6 String is asde
Row is 2 String is defg
Row is 9 String is erbfde
Row is 8 String is ntddwe
Row is 7 String is rurt
Row is 3 String is werd
Row is 5 String is zwedew
Row is 0 String is zyxw
$

关于c++ - Thrust CUDA 将 char * 分配给对象的 device_vector,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/24126610/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com