- html - 出于某种原因,IE8 对我的 Sass 文件中继承的 html5 CSS 不友好?
- JMeter 在响应断言中使用 span 标签的问题
- html - 在 :hover and :active? 上具有不同效果的 CSS 动画
- html - 相对于居中的 html 内容固定的 CSS 重复背景?
我正在编写 CUDA 程序,为 BMP 文件添加模糊效果。我编写了在 CPU 上执行此操作的工作程序,现在我尝试将代码转换为 CUDA。这是我想在 CUDA 上使用的函数:
void blur(bitmap_header* hp, unsigned char *data)
{
int xx,yy,x,y, avgB, avgG, avgR, ile;
int blurSize = 5;
for(xx = 0; xx < hp->width; xx++)
{
for(yy = 0; yy < hp->height; yy++)
{
avgB = avgG = avgR = 0;
ile = 0;
for(x = xx; x < hp->width && x < xx + blurSize; x++)
{
for(y = yy; y < hp->height && y < yy + blurSize; y++)
{
avgB += data[x*3 + y*hp->width*3 + 0];
avgG += data[x*3 + y*hp->width*3 + 1];
avgR += data[x*3 + y*hp->width*3 + 2];
ile++;
}
}
avgB = avgB / ile;
avgG = avgG / ile;
avgR = avgR / ile;
data[xx*3 + yy*hp->width*3 + 0] = avgB;
data[xx*3 + yy*hp->width*3 + 1] = avgG;
data[xx*3 + yy*hp->width*3 + 2] = avgR;
}
}
}
如何将此函数转换为在 CUDA 设备上运行?每个教程仅涵盖一个 for 循环并使用
int i = threadIdx.x
我之前关于这个程序的问题: Blur effect on bitmap using C
编辑
带有 CUDA 编辑的完整代码:
#include <stdio.h>
#include <stdlib.h>
#include <Windows.h>
#pragma pack(push,1)
/* Windows 3.x bitmap file header */
typedef struct {
char filetype[2]; /* magic - always 'B' 'M' */
unsigned int filesize;
short reserved1;
short reserved2;
unsigned int dataoffset; /* offset in bytes to actual bitmap data */
} file_header;
/* Windows 3.x bitmap full header, including file header */
typedef struct {
file_header fileheader;
unsigned int headersize;
int width;
int height;
short planes;
short bitsperpixel; /* we only support the value 24 here */
unsigned int compression; /* we do not support compression */
unsigned int bitmapsize;
int horizontalres;
int verticalres;
unsigned int numcolors;
unsigned int importantcolors;
} bitmap_header;
#pragma pack(pop)
__global__ void blur(bitmap_header* hp, unsigned char *data)
{
int xx,yy,x,y, avgB, avgG, avgR, ile;
int blurSize = 5;
xx = blockIdy.y * blockDim.y + threadIdx.y;
yy = blockIdx.x * blockDim.x + threadIdx.x;
if(xx >= hp->width || yy >= hp->height)
return;
avgB = avgG = avgR = 0;
ile = 0;
for(x = xx; x < hp->width && x < xx + blurSize; x++)
{
for(y = yy; y < hp->height && y < yy + blurSize; y++)
{
avgB += data[x*3 + y*hp->width*3 + 0];
avgG += data[x*3 + y*hp->width*3 + 1];
avgR += data[x*3 + y*hp->width*3 + 2];
ile++;
}
}
avgB = avgB / ile;
avgG = avgG / ile;
avgR = avgR / ile;
data[xx*3 + yy*hp->width*3 + 0] = avgB;
data[xx*3 + yy*hp->width*3 + 1] = avgG;
data[xx*3 + yy*hp->width*3 + 2] = avgR;
}
int filter(char* input, char *output)
{
//variable dec:
FILE *fp,*out;
bitmap_header* hp;
bitmap_header* d_hp;
unsigned char *data;
unsigned char *d_data;
//Open input file:
fp = fopen(input, "r");
if(fp==NULL)
return 1;
//Read the input file headers:
hp=(bitmap_header*)malloc(sizeof(bitmap_header));
cudaMalloc( &d_hp, (sizeof(bitmap_header));
if(hp==NULL)
return 1;
fread(hp, sizeof(bitmap_header), 1, fp);
cudaMemcpy(d_hp, &hp, (sizeof(bitmap_header), cudaMemcpyHostToDevice);
//Read the data of the image:
data = (unsigned char*)malloc(sizeof(char)*hp->bitmapsize);
cudaMalloc( &d_data, (sizeof(char)*hp->bitmapsize));
fseek(fp,sizeof(char)*hp->fileheader.dataoffset,SEEK_SET);
fread(data,sizeof(char),hp->bitmapsize, fp);
cudaMemcpy(d_data, &data, (sizeof(char)*hp->bitmapsize), cudaMemcpyHostToDevice);
dim3 block(16,16);
dim3 grid ( (hp->height + 15)/16, (hp->width + 15)/16 );
blur<<<grid,block>>>(d_hp, d_data);
cudaMemcpy(data, d_data, (sizeof(char)*hp->bitmapsize), cudaMemcpyDeviceToHost);
//Open output file:
out = fopen(output, "wb");
if(out==NULL)
{
fclose(fp);
free(hp);
free(data);
cudaFree(d_data);
cudaFree(d_hp);
return 1;
}
fwrite(hp,sizeof(char),sizeof(bitmap_header),out);
fseek(out,sizeof(char)*hp->fileheader.dataoffset,SEEK_SET);
fwrite(data,sizeof(char),hp->bitmapsize,out);
fclose(fp);
fclose(out);
free(hp);
free(data);
cudaFree(d_data);
cudaFree(d_hp);
return 0;
}
int main(int argc, char* argv[])
{
int frames;
int frame = 1;
char path[100000];
system("rd /s/q temp");
system("mkdir temp");
system("cls");
printf("Zapis wszystkich klatek do folderu temp.\n");
system("ffmpeg.exe -i test.mp4 -r 29.970 -vcodec bmp temp/%d.bmp");
printf("Ile jest klatek w folderze temp?\n");
scanf("%d", &frames);
for(frame = 1;frame<=frames;frame++)
{
sprintf(path,"temp\\%d.bmp",frame);
printf("Nakladam filtr na ");
printf(path);
printf("\n");
filter(path,path);
}
system("cls");
printf("Wszystkie klatki do filmu mp4.\n");
system("ffmpeg -r 29.970 -i temp/%d.bmp -c:v libx264 -preset slow -crf 21 temp/out.mp4");
system("cls");
printf("Wyciecie dzwieku z filmu do mp3\n");
system("ffmpeg -i test.mp4 -vn -ar 44100 -ac 2 -ab 192 -f wav temp/sound.wav");
system("cls");
printf("Polaczenie mp3 z mp4.\n");
system("ffmpeg -i temp/sound.wav -i temp/out.mp4 final.mp4");
system("cls");
printf("Delete ");
system("rd /s temp");
system("pause");
return 0;
}
完整错误列表:
Warning 1 warning : The 'compute_10' and 'sm_10' architectures are deprecated, and may be removed in a future release. C:\Users\Karpińscy\documents\visual studio 2012\Projects\blur\blur\nvcc blur
Error 2 error : identifier "blockIdy" is undefined C:\Users\Karpi˝scy\documents\visual studio 2012\Projects\blur\blur\kernel.cu blur
Error 3 error : expected a ")" C:\Users\Karpi˝scy\documents\visual studio 2012\Projects\blur\blur\kernel.cu blur
Warning 4 warning : expression has no effect C:\Users\Karpi˝scy\documents\visual studio 2012\Projects\blur\blur\kernel.cu blur
Error 5 error : expected a ")" C:\Users\Karpi˝scy\documents\visual studio 2012\Projects\blur\blur\kernel.cu blur
Error 6 error : too few arguments in function call C:\Users\Karpi˝scy\documents\visual studio 2012\Projects\blur\blur\kernel.cu blur
Error 7 error MSB3721: The command ""C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.0\bin\nvcc.exe" -gencode=arch=compute_10,code=\"sm_10,compute_10\" --use-local-env --cl-version 2012 -ccbin "C:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\bin" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.0\include" -I"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.0\include" -G -maxrregcount=0 --machine 32 --compile -cudart static -I -g -DWIN32 -D_DEBUG -D_CONSOLE -D_MBCS -Xcompiler "/EHsc /W3 /nologo /Od /Zi /RTC1 /MDd " -o Debug\kernel.cu.obj "C:\Users\Karpińscy\documents\visual studio 2012\Projects\blur\blur\kernel.cu"" exited with code 2. C:\Program Files (x86)\MSBuild\Microsoft.Cpp\v4.0\V110\BuildCustomizations\CUDA 6.0.targets 597 9 blur
8 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 33 1 blur
9 IntelliSense: expected a ';' c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 33 12 blur
10 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 56 4 blur
11 IntelliSense: expected a ';' c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 56 9 blur
12 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 57 4 blur
13 IntelliSense: expected a ';' c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 57 7 blur
14 IntelliSense: expected a declaration c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 58 3 blur
15 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 62 2 blur
16 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 63 2 blur
17 IntelliSense: identifier "xx" is undefined c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 65 7 blur
18 IntelliSense: identifier "yy" is undefined c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 65 14 blur
19 IntelliSense: identifier "avgB" is undefined c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 65 36 blur
20 IntelliSense: identifier "xx" is undefined c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 66 7 blur
21 IntelliSense: identifier "yy" is undefined c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 66 14 blur
22 IntelliSense: identifier "xx" is undefined c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 67 7 blur
23 IntelliSense: identifier "yy" is undefined c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 67 14 blur
24 IntelliSense: expected a declaration c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 68 1 blur
25 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 94 5 blur
26 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 95 2 blur
27 IntelliSense: identifier "d_data" is undefined c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 95 15 blur
28 IntelliSense: expected a ')' c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 95 21 blur
29 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 97 5 blur
30 IntelliSense: identifier "fp" is undefined c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 97 11 blur
31 IntelliSense: expected a ')' c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 97 13 blur
32 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 98 5 blur
33 IntelliSense: expected a ')' c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 98 15 blur
34 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 99 2 blur
35 IntelliSense: identifier "d_data" is undefined c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 99 13 blur
36 IntelliSense: expected a ')' c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 99 19 blur
37 IntelliSense: identifier "dim3" is undefined c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 102 2 blur
38 IntelliSense: expected a ')' c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 102 15 blur
39 IntelliSense: identifier "dim3" is undefined c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 103 2 blur
40 IntelliSense: expected a ')' c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 103 34 blur
41 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 105 2 blur
42 IntelliSense: expected a ';' c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 105 6 blur
43 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 106 2 blur
44 IntelliSense: expected a ')' c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 106 17 blur
45 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 109 2 blur
46 IntelliSense: identifier "output" is undefined c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 109 14 blur
47 IntelliSense: expected a declaration c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 110 2 blur
48 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 122 5 blur
49 IntelliSense: expected a ')' c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 122 14 blur
50 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 123 5 blur
51 IntelliSense: expected a ')' c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 123 16 blur
52 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 125 5 blur
53 IntelliSense: identifier "fp" is undefined c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 125 12 blur
54 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 126 5 blur
55 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 127 5 blur
56 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 128 5 blur
57 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 130 2 blur
58 IntelliSense: identifier "d_data" is undefined c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 130 11 blur
59 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 131 2 blur
60 IntelliSense: identifier "d_hp" is undefined c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 131 11 blur
61 IntelliSense: expected a declaration c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 132 5 blur
62 IntelliSense: expected a declaration c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 133 1 blur
63 IntelliSense: expected a declaration c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 158 2 blur
64 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 161 2 blur
65 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 162 2 blur
66 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 164 2 blur
67 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 165 2 blur
68 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 166 2 blur
69 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 168 2 blur
70 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 169 2 blur
71 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 170 2 blur
72 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 172 2 blur
73 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 173 2 blur
74 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 174 2 blur
75 IntelliSense: this declaration has no storage class or type specifier c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 175 2 blur
76 IntelliSense: expected a declaration c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 177 5 blur
77 IntelliSense: expected a declaration c:\Users\Karpińscy\Documents\Visual Studio 2012\Projects\blur\blur\kernel.cu 178 1 blur
最佳答案
您的代码中有不少语法错误。事实上,它们与 CUDA 没有任何关系。也许您应该提高基本的 C 编码技能并了解如何解释编译器错误。
这一行:
cudaMalloc( &d_hp, (sizeof(bitmap_header));
缺少右括号。您看不到这一点,也无法找出表明这一点的编译器错误?
Error 3 error : expected a ")" C:\Users\Karpi˝scy\documents\visual studio 2012\Projects\blur\blur\kernel.cu blur
在cuda中,没有内置变量blockIdy
。也许您的意思是blockIdx.y
?
Error 2 error : identifier "blockIdy" is undefined C:\Users\Karpi˝scy\documents\visual studio 2012\Projects\blur\blur\kernel.cu blur
此行出现另一个编译错误:
cudaMemcpy(d_hp, &hp, (sizeof(bitmap_header), cudaMemcpyHostToDevice);
应该是这样的:
cudaMemcpy(d_hp, &hp, sizeof(bitmap_header), cudaMemcpyHostToDevice);
如果您在修复这些编译错误后仍然需要帮助,请发布一个新问题,其中包含完整但简单的代码,该代码不依赖于从文件中读取图像。只需创建一个简单的测试用例并使用它来验证内核即可。添加我提到的错误检查并使用 cuda-memcheck
运行您的代码。如果你拒绝做这些事情,我就帮不了你。
关于c - 将四个嵌套循环转换为 CUDA 内核,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/23712019/
这是我关于 Stack Overflow 的第一个问题,这是一个很长的问题。 tl;dr 版本是:我如何使用 thrust::device_vector如果我希望它存储不同类型的对象 DerivedC
我已使用 cudaMalloc 在设备上分配内存并将其传递给内核函数。是否可以在内核完成执行之前从主机访问该内存? 最佳答案 我能想到的在内核仍在执行时启动 memcpy 的唯一方法是在与内核不同的流
是否可以在同一节点上没有支持 CUDA 的设备的情况下编译 CUDA 程序,仅使用 NVIDIA CUDA Toolkit...? 最佳答案 你的问题的答案是肯定的。 nvcc编译器驱动程序与设备的物
我不知道 cuda 不支持引用参数。我的程序中有这两个函数: __global__ void ExtractDisparityKernel ( ExtractDisparity& es)
我正在使用 CUDA 5.0。我注意到编译器将允许我在内核中使用主机声明的 int 常量。但是,它拒绝编译任何使用主机声明的 float 常量的内核。有谁知道这种看似差异的原因? 例如,下面的代码可以
自从 CUDA 9 发布以来,显然可以将不同的线程和 block 分组到同一组中,以便您可以一起管理它们。这对我来说非常有用,因为我需要启动一个包含多个 block 的内核并等待所有 block 都同
我需要在 CUDA 中执行三线性插值。这是问题定义: 给定三个点向量:x[nx]、y[ny]、z[nz] 和一个函数值矩阵func[nx][ny][nz],我想在 x、y 范围之间的一些随机点处找到函
我认为由于 CUDA 可以执行 64 位 128 位加载/存储,因此它可能具有一些用于加/减/等的内在函数。像 float3 这样的向量类型,在像 SSE 这样更少的指令中。 CUDA 有这样的功能吗
我有一个问题,每个线程 block (一维)必须对共享内存内的一个数组进行扫描,并执行几个其他任务。 (该数组最多有 1024 个元素。) 有没有支持这种操作的好库? 我检查了 Thrust 和 Cu
我对线程的形成和执行方式有很多疑惑。 首先,文档将 GPU 线程描述为轻量级线程。假设我希望将两个 100*100 矩阵相乘。如果每个元素都由不同的线程计算,则这将需要 100*100 个线程。但是,
我正在尝试自己解决这个问题,但我不能。 所以我想听听你的建议。 我正在编写这样的内核代码。 VGA 是 GTX 580。 xxxx >> (... threadNum ...) (note. Shar
查看 CUDA Thrust 代码中的内核启动,似乎它们总是使用默认流。我可以让 Thrust 使用我选择的流吗?我在 API 中遗漏了什么吗? 最佳答案 我想在 Thrust 1.8 发布后更新 t
我想知道 CUDA 应用程序的扭曲调度顺序是否是确定性的。 具体来说,我想知道在同一设备上使用相同输入数据多次运行同一内核时,warp 执行的顺序是否会保持不变。如果没有,是否有任何东西可以强制对扭曲
一个 GPU 中可以有多少个 CUDA 网格? 两个网格可以同时存在于 GPU 中吗?还是一台 GPU 设备只有一个网格? Kernel1>(dst1, param1); Kernel1>(dst2,
如果我编译一个计算能力较低的 CUDA 程序,例如 1.3(nvcc 标志 sm_13),并在具有 Compute Capability 2.1 的设备上运行它,它是否会利用 Compute 2.1
固定内存应该可以提高从主机到设备的传输速率(api 引用)。但是我发现我不需要为内核调用 cuMemcpyHtoD 来访问这些值,也不需要为主机调用 cuMemcpyDtoA 来读取值。我不认为这会奏
我希望对 CUDA C 中负载平衡的最佳实践有一些一般性的建议和说明,特别是: 如果经纱中的 1 个线程比其他 31 个线程花费的时间长,它会阻止其他 31 个线程完成吗? 如果是这样,多余的处理能力
CUDA 中是否有像 opencl 一样的内置交叉和点积,所以 cuda 内核可以使用它? 到目前为止,我在规范中找不到任何内容。 最佳答案 您可以在 SDK 的 cutil_math.h 中找到这些
有一些与我要问的问题类似的问题,但我觉得它们都没有触及我真正要寻找的核心。我现在拥有的是一种 CUDA 方法,它需要将两个数组定义到共享内存中。现在,数组的大小由在执行开始后读入程序的变量给出。因此,
经线是 32 根线。 32 个线程是否在多处理器中并行执行? 如果 32 个线程没有并行执行,则扭曲中没有竞争条件。 在经历了一些例子后,我有了这个疑问。 最佳答案 在 CUDA 编程模型中,warp
我是一名优秀的程序员,十分优秀!