gpt4 book ai didi

c++ - Cuda如何将char**从内核复制到主机

转载 作者:行者123 更新时间:2023-11-30 19:04:09 25 4
gpt4 key购买 nike

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <string.h>
#include <malloc.h>
#include <fstream>
#include <algorithm>
#include <time.h>

using namespace std;

__global__ void kern_2D(char **desc, char** merge_char) {

int idx = threadIdx.x + blockDim.x*blockIdx.x;
int idy = threadIdx.y + blockDim.y*blockIdx.y;

if (idx < 10000)
{
char* s1 = desc[idx];
merge_char[idx] = s1;
//printf("From key = %s\n", merge_char[idx]);
}

}


int main() {
cudaError_t err = cudaSuccess;
size_t max_line_len = 255;
char line[255];
size_t line_len;
size_t max_lines_desc = 10000;
//---------------------------------------------------------------------------------//

char **d_desc;
cudaMalloc(&d_desc, max_lines_desc * sizeof(char *));

char **m_desc = NULL;
m_desc = (char**)malloc(max_lines_desc * sizeof(char**));
char **d_temp_desc = NULL;
d_temp_desc = (char **)malloc(max_lines_desc * sizeof(char **));

FILE *f_desc = fopen("desc.txt", "r");
if (!f_desc)
{
fprintf(stderr, "Error opening file!\n");
}
int idesc = 0;

do
{
if (!fgets(line, max_line_len, f_desc))
{
if (ferror(f_desc) && !feof(f_desc))
{
fprintf(stderr, "Error reading from file!\n");
fclose(f_desc);
}
break;
}

line_len = strlen(line);
if ((line_len > 0) && (line[line_len - 1] == '\n'))
{
line[line_len - 1] = '\0';
--line_len;
}
m_desc[idesc] = line;
cudaMalloc(&(d_temp_desc[idesc]), sizeof(line) * sizeof(char));
cudaMemcpy(d_temp_desc[idesc], m_desc[idesc], sizeof(line) * sizeof(char), cudaMemcpyHostToDevice);
cudaMemcpy(d_desc + idesc, &(d_temp_desc[idesc]), sizeof(char *), cudaMemcpyHostToDevice);

++idesc;
} while (idesc < max_lines_desc);
fclose(f_desc);

//---------------------------------------------------------------------------------//


char **merge_char;
cudaMallocManaged(&merge_char, max_lines_desc * sizeof(char *));


kern_2D << < 1, 1000 >> > (d_desc , merge_char);

err = cudaDeviceSynchronize();
if (err != cudaSuccess) {
fprintf(stderr, "cudaDeviceSynchronize returned error code %s after launching addKernel!\n", cudaGetErrorString(err));
}


//---------------------------------------------------------------------------------//

char** h_dev;

cudaMalloc((void**)(&h_dev), max_lines_desc * sizeof(char*));
err = cudaMemcpy(h_dev, merge_char, max_lines_desc * sizeof(char*), cudaMemcpyDeviceToHost);
if (err == cudaSuccess) printf("2: Okay \n");


for (int i = 0; i < max_lines_desc; i++)
{
printf("%s\n", h_dev[i]);
}


return 0;


}
//nvcc - arch = sm_30 - o kernel kernel.cu
// cuda - memcheck . / kernel

我对我的错误感到抱歉。我已经更新了我的代码。完成了。

对于 desc.txt,该文件有 10000 行,如下所示。从设备复制到主机后我检查了状态,但我错了。我无法打印 char** h_dev。

motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125

最佳答案

我不得不说,我真的不明白你的意图是什么,因为你的内核所做的唯一事情就是交换指针。如果这就是您打算做的全部事情,那么到处使用双指针肯定会给自己带来麻烦。管理指数会简单得多。

但是为了解决你的问题,据我所知,你的“复制回主机”确实是不正确的。您实际上正在对数据从主机到设备进行深度复制,因此您也需要在另一个方向上进行深度复制(两阶段复制)。

为了实现这一点,我们不在托管拷贝上使用 cudaMalloccudaMalloc 分配设备内存。如果你想复制一些东西到主机,你的复制目标就是主机内存。因此,我们需要一组 cudaMemcpy 操作来将数据深度复制回主机,并使用主机缓冲区作为目标。

以下代码代表了我可以对您所展示的内容进行的最简单的修改,以实现此目的,并且它似乎适用于我的简单测试用例:

$ cat desc.txt
1motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
2motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
3motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
4motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
5motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
6motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap1
$ cat t301.cu
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <string.h>
#include <malloc.h>
#include <fstream>
#include <algorithm>
#include <time.h>

using namespace std;

__global__ void kern_2D(char **desc, char** merge_char) {

int idx = threadIdx.x + blockDim.x*blockIdx.x;
int idy = threadIdx.y + blockDim.y*blockIdx.y;

if (idx < 10000)
{
char* s1 = desc[idx];
merge_char[idx] = s1;
//printf("From key = %s\n", merge_char[idx]);
}

}


int main() {
cudaError_t err = cudaSuccess;
size_t max_line_len = 255;
char line[255];
size_t line_len;
size_t max_lines_desc = 10000;
//---------------------------------------------------------------------------------//

char **d_desc;
cudaMalloc(&d_desc, max_lines_desc * sizeof(char *));

char **m_desc = NULL;
m_desc = (char**)malloc(max_lines_desc * sizeof(char**));
char **d_temp_desc = NULL;
d_temp_desc = (char **)malloc(max_lines_desc * sizeof(char **));

FILE *f_desc = fopen("desc.txt", "r");
if (!f_desc)
{
fprintf(stderr, "Error opening file!\n");
}
int idesc = 0;

do
{
if (!fgets(line, max_line_len, f_desc))
{
if (ferror(f_desc) && !feof(f_desc))
{
fprintf(stderr, "Error reading from file!\n");
fclose(f_desc);
}
break;
}

line_len = strlen(line);
if ((line_len > 0) && (line[line_len - 1] == '\n'))
{
line[line_len - 1] = '\0';
--line_len;
}
m_desc[idesc] = line;
cudaMalloc(&(d_temp_desc[idesc]), sizeof(line) * sizeof(char));
cudaMemcpy(d_temp_desc[idesc], m_desc[idesc], sizeof(line) * sizeof(char), cudaMemcpyHostToDevice);
cudaMemcpy(d_desc + idesc, &(d_temp_desc[idesc]), sizeof(char *), cudaMemcpyHostToDevice);

++idesc;
} while (idesc < max_lines_desc);
fclose(f_desc);

//---------------------------------------------------------------------------------//


char **merge_char;
cudaMallocManaged(&merge_char, max_lines_desc * sizeof(char *));


kern_2D << < 1, 1000 >> > (d_desc , merge_char);

err = cudaDeviceSynchronize();
if (err != cudaSuccess) {
fprintf(stderr, "cudaDeviceSynchronize returned error code %s after launching addKernel!\n", cudaGetErrorString(err));
}


//---------------------------------------------------------------------------------//

char** h_dev;

h_dev = (char **)malloc(max_lines_desc * sizeof(char*));
err = cudaMemcpy(h_dev, merge_char, max_lines_desc * sizeof(char*), cudaMemcpyDeviceToHost);
if (err == cudaSuccess) printf("2: Okay \n");


for (int i = 0; i < 6; i++)
{
cudaMemcpy(line, h_dev[i], sizeof(line), cudaMemcpyDeviceToHost);
printf("%s\n", line);
}


return 0;


}
$ nvcc -o t301 t301.cu
t301.cu(15): warning: variable "idy" was declared but never referenced

$ cuda-memcheck ./t301
========= CUDA-MEMCHECK
2: Okay
1motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
2motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
3motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
4motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
5motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap125
6motorcycle ckd new apsonic ckd 2017 ckd 2018 motorcycle apsoni new motorcycle apsonic no 125 motorcycle apsonic ap125 new motorcycle apsonic ap1
========= ERROR SUMMARY: 0 errors
$

关于c++ - Cuda如何将char**从内核复制到主机,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/52706663/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com