gpt4 book ai didi

arrays - 如何读回 CUDA 纹理进行测试?

转载 作者:行者123 更新时间:2023-12-04 11:32:00 29 4
gpt4 key购买 nike

好的,到目前为止,我可以在主机上创建一个数组(float类型),并将其复制到gpu,然后将其作为另一个数组带回主机(通过与原始比较来测试复制是否成功)。

然后我从 GPU 上的数组创建一个 CUDA 数组。然后我将该数组绑定(bind)到 CUDA 纹理。

我现在想读回该纹理并与原始数组进行比较(再次测试它是否正确复制)。我看到了一些使用 readTexel() 的示例代码功能如下图。它似乎对我不起作用...(除了 bindToTexture(float* deviceArray) 函数中从 readTexels(SIZE, testArrayDevice) 行开始的部分外,基本上一切正常)。

有什么不同方法的建议吗?还是我在代码中遗漏了一些明显的问题?

谢谢你们的帮助!

#include <stdio.h>
#include <assert.h>
#include <cuda.h>

#define SIZE 20;

//Create a channel description to use.
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);

//Create a texture to use.
texture<float, 2, cudaReadModeElementType> cudaTexture;
//cudaTexture.filterMode = cudaFilterModeLinear;
//cudaTexture.normalized = false;

__global__ void readTexels(int amount, float *Array)
{
int index = blockIdx.x * blockDim.x + threadIdx.x;

if (index < amount)
{
float x = tex1D(cudaTexture, float(index));
Array[index] = x;
}
}

float* copyToGPU(float* hostArray, int size)
{
//Create pointers, one for the array to be on the device, and one for bringing it back to the host for testing.
float* deviceArray;
float* testArray;

//Allocate some memory for the two arrays so they don't get overwritten.
testArray = (float *)malloc(sizeof(float)*size);

//Allocate some memory for the array to be put onto the GPU device.
cudaMalloc((void **)&deviceArray, sizeof(float)*size);

//Actually copy the array from hostArray to deviceArray.
cudaMemcpy(deviceArray, hostArray, sizeof(float)*size, cudaMemcpyHostToDevice);

//Copy the deviceArray back to testArray in host memory for testing.
cudaMemcpy(testArray, deviceArray, sizeof(float)*size, cudaMemcpyDeviceToHost);

//Make sure contents of testArray match the original contents in hostArray.
for (int i = 0; i < size; i++)
{
if (hostArray[i] != testArray[i])
{
printf("Location [%d] does not match in hostArray and testArray.\n", i);
}
}

//Don't forget free these arrays after you're done!
free(testArray);

return deviceArray; //TODO: FREE THE DEVICE ARRAY VIA cudaFree(deviceArray);
}

cudaArray* bindToTexture(float* deviceArray)
{
//Create a CUDA array to translate deviceArray into.
cudaArray* cuArray;

//Allocate memory for the CUDA array.
cudaMallocArray(&cuArray, &cudaTexture.channelDesc, SIZE, 1);

//Copy the deviceArray into the CUDA array.
cudaMemcpyToArray(cuArray, 0, 0, deviceArray, sizeof(float)*SIZE, cudaMemcpyHostToDevice);

//Release the deviceArray
cudaFree(deviceArray);

//Bind the CUDA array to the texture.
cudaBindTextureToArray(cudaTexture, cuArray);

//Make a test array on the device and on the host to verify that the texture has been saved correctly.
float* testArrayDevice;
float* testArrayHost;

//Allocate memory for the two test arrays.
cudaMalloc((void **)&testArray, sizeof(float)*SIZE);
testArrayHost = (float *)malloc(sizeof(float)*SIZE);

//Read the texels of the texture to the test array in the device.
readTexels(SIZE, testArrayDevice);

//Copy the device test array to the host test array.
cudaMemcpy(testArrayHost, testArrayDevice, sizeof(float)*SIZE, cudaMemcpyDeviceToHost);

//Print contents of the array out.
for (int i = 0; i < SIZE; i++)
{
printf("%f\n", testArrayHost[i]);
}

//Free the memory for the test arrays.
free(testArrayHost);
cudaFree(testArrayDevice);

return cuArray; //TODO: UNBIND THE CUDA TEXTURE VIA cudaUnbindTexture(cudaTexture);
//TODO: FREE THE CUDA ARRAY VIA cudaFree(cuArray);
}


int main(void)
{
float* hostArray;

hostArray = (float *)malloc(sizeof(float)*SIZE);

for (int i = 0; i < SIZE; i++)
{
hostArray[i] = 10.f + i;
}

float* deviceAddy = copyToGPU(hostArray, SIZE);

free(hostArray);

return 0;
}

最佳答案

简要地:

------------- 在你的 main.cu 中 -------------------------------- -------------------------------------------------- -----

-1。将纹理定义为全局变量


texture refTexture; // global variable !
// meaning: address the texture with (x,y) (2D) and get an unsinged int

在主函数中:

-2。使用结合纹理的数组
    cudaArray* myArray; // declar.
// ask for memory
cudaMallocArray ( &myArray,

&refTex.channelDesc, /* with this you don't need to fill a channel descriptor */
width,

height);

-3。将数据从 CPU 复制到 GPU(到数组)
 cudaMemcpyToArray ( arrayCudaEntrada, // destination: the array

0, 0, // offsets
sourceData, // pointer uint*
widthheightsizeof(uint), // total amount of bytes to be copied
cudaMemcpyHostToDevice);

-4。绑定(bind)纹理和数组
    cudaBindTextureToArray( refTex,arrayCudaEntrada)

-5。更改纹理中的一些参数


refTextura_In.normalized = false; // don't automatically convert fetched data to [0,1[
refTextura_In.addressMode[0] = cudaAddressModeClamp; // if my indexing is out of bounds: automatically use a valid indexing (0 if negative index, last if too great index)
refTextura_In.addressMode[1] = cudaAddressModeClamp;



---------- in the kernel --------------------------------------------------------

    // find out indexes (f,c) to process by this thread
uint f = (blockIdx.x * blockDim.x) + threadIdx.x;
uint c = (blockIdx.y * blockDim.y) + threadIdx.y;



  // this is curious and necessary: indexes for reading from a texture
// are floats !. Even if you are certain to access (4,5) you have
// match the "center" this is (4.5, 5.5)
uint read = tex2D( refTex, c+0.5f, f+0.5f); // texRef is a global variable

现在您处理读取结果并将结果写入设备全局的其他区域
内存,而不是纹理本身!

关于arrays - 如何读回 CUDA 纹理进行测试?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/2353622/

29 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com