gpt4 book ai didi

cuda - CUDA纹理的不同寻址模式

转载 作者:行者123 更新时间:2023-12-04 01:39:53 25 4
gpt4 key购买 nike

我在边界寻址模式(cudaAddressModeBorder)中使用CUDA纹理。我正在使用tex2D<float>()读取纹理坐标。当纹理坐标超出纹理范围时,tex2D<float>()返回0

如何将返回的边界值从0更改为其他内容?我可以手动检查纹理坐标并自行设置边界值。我想知道是否有CUDA API可以设置这样的边界值。

最佳答案

如sgarizvi所述,CUDA仅支持四种不可自定义的地址模式,即夹紧边界包装镜像,在第3.2.11.1节中进行了描述。 CUDA编程指南。

前两个工作在非规范化坐标和归一化坐标中,而后两个仅在规范化坐标中起作用。

为了描述前两个,为简单起见,让我们考虑未归一化的坐标情况并考虑一维信号。在这种情况下,输入序列为c[k]k=0,...,M-1

cudaAddressModeClamp

信号c[k]k=0,...,M-1外部继续,因此c[k] = c[0]表示k < 0,而c[k] = c[M-1]表示k >= M

cudaAddressModeBorder

信号c[k]k=0,...,M-1之外继续,以便c[k] = 0表示k < 0k >= M

现在,为了描述最后两种地址模式,我们不得不考虑归一化的坐标,因此,将一维输入信号样本假定为c[k / M]k=0,...,M-1

cudaAddressModeWrap

信号c[k / M]k=0,...,M-1之外继续,因此它是周期性的,周期等于M。换句话说,c[(k + p * M) / M] = c[k / M]用于任何(正,负或消失的)整数p

cudaAddressModeMirror

信号c[k / M]k=0,...,M-1之外继续,因此它是周期性的,周期等于2 * M - 2。换句话说,c[l / M] = c[k / M]表示任何lk,例如(l + k)mod(2 * M - 2) = 0

以下代码说明了所有四种可用的地址模式

#include <stdio.h>

texture<float, 1, cudaReadModeElementType> texture_clamp;
texture<float, 1, cudaReadModeElementType> texture_border;
texture<float, 1, cudaReadModeElementType> texture_wrap;
texture<float, 1, cudaReadModeElementType> texture_mirror;

/********************/
/* CUDA ERROR CHECK */
/********************/
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}

/******************************/
/* CUDA ADDRESS MODE CLAMPING */
/******************************/
__global__ void Test_texture_clamping(const int M) {

printf("Texture clamping - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_clamp, -(float)threadIdx.x));
printf("Texture clamping - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_clamp, (float)(M + threadIdx.x)));

}

/****************************/
/* CUDA ADDRESS MODE BORDER */
/****************************/
__global__ void Test_texture_border(const int M) {

printf("Texture border - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_border, -(float)threadIdx.x));
printf("Texture border - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_border, (float)(M + threadIdx.x)));

}

/**************************/
/* CUDA ADDRESS MODE WRAP */
/**************************/
__global__ void Test_texture_wrap(const int M) {

printf("Texture wrap - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_wrap, -(float)threadIdx.x/(float)M));
printf("Texture wrap - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_wrap, (float)(M + threadIdx.x)/(float)M));

}

/****************************/
/* CUDA ADDRESS MODE MIRROR */
/****************************/
__global__ void Test_texture_mirror(const int M) {

printf("Texture mirror - i = %i; value = %f\n", -threadIdx.x, tex1D(texture_mirror, -(float)threadIdx.x/(float)M));
printf("Texture mirror - i = %i; value = %f\n", M + threadIdx.x, tex1D(texture_mirror, (float)(M + threadIdx.x)/(float)M));

}

/********/
/* MAIN */
/********/
void main(){

const int M = 4;

// --- Host side memory allocation and initialization
float *h_data = (float*)malloc(M * sizeof(float));

for (int i=0; i<M; i++) h_data[i] = (float)i;

// --- Texture clamping
cudaArray* d_data_clamping = NULL; gpuErrchk(cudaMallocArray(&d_data_clamping, &texture_clamp.channelDesc, M, 1));
gpuErrchk(cudaMemcpyToArray(d_data_clamping, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice));
cudaBindTextureToArray(texture_clamp, d_data_clamping);
texture_clamp.normalized = false;
texture_clamp.addressMode[0] = cudaAddressModeClamp;

dim3 dimBlock(2 * M, 1); dim3 dimGrid(1, 1);
Test_texture_clamping<<<dimGrid,dimBlock>>>(M);

printf("\n\n\n");

// --- Texture border
cudaArray* d_data_border = NULL; gpuErrchk(cudaMallocArray(&d_data_border, &texture_border.channelDesc, M, 1));
gpuErrchk(cudaMemcpyToArray(d_data_border, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice));
cudaBindTextureToArray(texture_border, d_data_border);
texture_border.normalized = false;
texture_border.addressMode[0] = cudaAddressModeBorder;

Test_texture_border<<<dimGrid,dimBlock>>>(M);

printf("\n\n\n");

// --- Texture wrap
cudaArray* d_data_wrap = NULL; gpuErrchk(cudaMallocArray(&d_data_wrap, &texture_wrap.channelDesc, M, 1));
gpuErrchk(cudaMemcpyToArray(d_data_wrap, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice));
cudaBindTextureToArray(texture_wrap, d_data_wrap);
texture_wrap.normalized = true;
texture_wrap.addressMode[0] = cudaAddressModeWrap;

Test_texture_wrap<<<dimGrid,dimBlock>>>(M);

printf("\n\n\n");

// --- Texture mirror
cudaArray* d_data_mirror = NULL; gpuErrchk(cudaMallocArray(&d_data_mirror, &texture_mirror.channelDesc, M, 1));
gpuErrchk(cudaMemcpyToArray(d_data_mirror, 0, 0, h_data, M * sizeof(float), cudaMemcpyHostToDevice));
cudaBindTextureToArray(texture_mirror, d_data_mirror);
texture_mirror.normalized = true ;
texture_mirror.addressMode[0] = cudaAddressModeMirror;

Test_texture_mirror<<<dimGrid,dimBlock>>>(M);

printf("\n\n\n");
}

这些是输出
index                  -7  -6  -5  -4  -3  -2  -1  0  1  2  3  4  5  6  7  8  9  10  11
clamp 0 0 0 0 0 0 0 0 1 2 3 3 3 3 3 3 3 3 3
border 0 0 0 0 0 0 0 0 1 2 3 0 0 0 0 0 0 0 0
wrap 1 2 3 0 1 2 3 0 1 2 3 0 1 2 3 0 1 2 3
mirror 1 2 3 3 2 1 0 0 1 2 3 3 2 1 0 0 1 2 3

关于cuda - CUDA纹理的不同寻址模式,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/19020963/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com