gpt4 book ai didi

opengl - cudaFree - 无效的设备指针错误

转载 作者:行者123 更新时间:2023-12-05 01:02:54 26 4
gpt4 key购买 nike

我正在尝试释放设备内存 dev_inp我在我的 CUDA + OpenGL 互操作代码中分配的。经过错误检查,我收到了 Invalid Device Pointer错误,程序在 cudaFree(dev_inp); 处停止执行在我的 renderScene() 结束时调用功能。一切都很好,但我担心内存泄漏。

问题:

一种。为什么我无法释放已分配的本地设备内存?我取消映射 cuda_resource来自像素缓冲区对象,并取消注册资源。

从 CUDA C 编程指南中的 B.17 节:
Memory allocated via malloc() cannot be freed using the runtime (i.e. by calling any of the free memory functions from Sections 3.2.2).
所以,这让我想到了另外两个问题:

湾。我没有malloc在内核中编辑内存,因为我没有。因此,利用 cudaFree功能应该(技术上?)在这里工作对吗?是否由程序员解除分配给本地定义的指针的内存,或者 nvcc 编译器是否在程序退出或超出本地范围时处理解除分配?我不希望我的代码中出现内存泄漏,因此通过处理释放我之前分配的内存我感觉更安全。

C。是否谨慎调用cudaDeviceReset()renderScene() 的末尾函数,以便主 CUDA 上下文被销毁(连同它的变量和指针,根据 CUDA C 编程指南)?我看到 NVidia Visual Profiler 文档也提到了这一点:cudaDeviceReset()
当我调用它时,渲染似乎比平时慢。如果我能简单地 cudaFree 那就太好了这里的内存,但我似乎无法让它工作。

完整代码:

#define GET_PROC_ADDRESS( str ) wglGetProcAddress( str )

GLuint tex;
GLuint pbo;
struct cudaGraphicsResource *cuda_resource;

PFNGLBINDBUFFERARBPROC glBindBuffer = NULL;
PFNGLDELETEBUFFERSARBPROC glDeleteBuffers = NULL;
PFNGLGENBUFFERSARBPROC glGenBuffers = NULL;
PFNGLBUFFERDATAARBPROC glBufferData = NULL;

// ==========================================================================================
// CUDA ERROR CHECKING CODE
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) getchar();
}
}

// ==========================================================================================

void initCUDADevice() {

gpuErrchk(cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() ));

}

// ==========================================================================================

void changeSize(int w, int h) {

//cudaDeviceReset();
//initCUDADevice();

glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glLoadIdentity();

// Prevent a divide by zero, when window is too short
// (you cant make a window of zero width).
if (h == 0)
h = 1;

float ratio = w * 1.0 / h;

// Use the Projection Matrix
glMatrixMode(GL_PROJECTION);

// Reset Matrix
//glLoadIdentity();

//// Set the viewport to be the entire window
glViewport(0, 0, w, h);

//// Get Back to the Modelview
glMatrixMode(GL_MODELVIEW);
}

// ==========================================================================================

void renderScene(void) {

// Clear Color and Depth Buffers
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
// Reset transformations
glLoadIdentity();

// ====================================================================================
// initiate GPU by setting it correctly
//initCUDADevice();

// ====================================================================================
// read the image that needs to be textured

Mat image, flipped;
image = imread("K:/Ultrasound experiment images/PA_175.png", CV_LOAD_IMAGE_GRAYSCALE); // Read the file from disk

if(!image.data) // Check for invalid input
{
cout << "Could not open or find the image" << std::endl ;


}

cv::flip(image, flipped, 0);

imshow("OpenCV - image", image); // displays output

// ====================================================================================
// allocate the PBO, texture, and CUDA resource

glBindBuffer = (PFNGLBINDBUFFERARBPROC)GET_PROC_ADDRESS("glBindBuffer");
glDeleteBuffers = (PFNGLDELETEBUFFERSARBPROC)GET_PROC_ADDRESS("glDeleteBuffers");
glGenBuffers = (PFNGLGENBUFFERSARBPROC)GET_PROC_ADDRESS("glGenBuffers");
glBufferData = (PFNGLBUFFERDATAARBPROC)GET_PROC_ADDRESS("glBufferData");

// ====================================================================================
// generate the pixel buffer object (PBO)

// Generate a buffer ID called a PBO (Pixel Buffer Object)
glGenBuffers(1, &pbo);

// Make this the current UNPACK buffer (OpenGL is state-based)
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo);

// Allocate data for the buffer. 4-channel 8-bit image
glBufferData(GL_PIXEL_UNPACK_BUFFER, sizeof(unsigned char) * flipped.rows * flipped.cols, NULL, GL_STREAM_DRAW);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);

gpuErrchk(cudaGraphicsGLRegisterBuffer(&cuda_resource, pbo, cudaGraphicsMapFlagsNone));

// ====================================================================================
// create the texture object

// enable 2D texturing
glEnable(GL_TEXTURE_2D);

// generate and bind the texture
glGenTextures(1, &tex);
glBindTexture(GL_TEXTURE_2D, tex);

glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);

// put flipped.data at the end for cpu rendering
glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE, image.cols, image.rows, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, 0 );

// put tex at the end for cpu rendering
glBindTexture(GL_TEXTURE_2D, 0);

// ====================================================================================
// copy OpenCV flipped image data into the device pointer

glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);

unsigned char *dev_inp;

gpuErrchk( cudaMalloc((void**)&dev_inp, sizeof(unsigned char)*flipped.rows*flipped.cols) );

gpuErrchk( cudaGraphicsMapResources(1, &cuda_resource, 0) );

size_t size;
gpuErrchk( cudaGraphicsResourceGetMappedPointer((void **)&dev_inp, &size, cuda_resource) );

gpuErrchk( cudaMemcpy(dev_inp, flipped.data, sizeof(unsigned char)*flipped.rows*flipped.cols, cudaMemcpyHostToDevice) );

gpuErrchk( cudaGraphicsUnmapResources(1, &cuda_resource, 0) );

// ====================================================================================
// bind pbo and texture to render data now

glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo);
//
glBindTexture(GL_TEXTURE_2D, tex);

glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, flipped.cols, flipped.rows, GL_LUMINANCE, GL_UNSIGNED_BYTE, NULL);

gpuErrchk( cudaGraphicsUnregisterResource(cuda_resource));
gpuErrchk( cudaThreadSynchronize());

//gpuErrchk(cudaFree(dev_inp));

// ====================================================================================
// map the texture coords to the vertex coords

glBegin(GL_QUADS);
// Front Face
glTexCoord2f(0.0f, 0.0f); glVertex3f(-1.0f, -1.0f, 1.0f); // Bottom Left Of The Texture and Quad
glTexCoord2f(1.0f, 0.0f); glVertex3f( 1.0f, -1.0f, 1.0f); // Bottom Right Of The Texture and Quad
glTexCoord2f(1.0f, 1.0f); glVertex3f( 1.0f, 1.0f, 1.0f); // Top Right Of The Texture and Quad
glTexCoord2f(0.0f, 1.0f); glVertex3f(-1.0f, 1.0f, 1.0f); // Top Left Of The Texture and Quad

glEnd();

glFlush(); // force rendering

glDisable(GL_TEXTURE_2D);

//glutSwapBuffers();
gpuErrchk(cudaFree(dev_inp)); // <--- Error here
//cudaGraphicsUnregisterResource(cuda_resource);

}


// ==========================================================================================


int main(int argc, char **argv) {


// init GLUT and create window
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_DEPTH | GLUT_RGB );
glutInitWindowPosition(100,100);
glutInitWindowSize(1024,256);
glutCreateWindow("CUDA + OpenGL interop");


// register callbacks
glutDisplayFunc(renderScene);
glutReshapeFunc(changeSize);
//glutIdleFunc(renderScene);

// enter GLUT event processing cycle
glutMainLoop();

return 1;
}

最佳答案

此行不是必需的,应从您的代码中删除:

gpuErrchk( cudaMalloc((void**)&dev_inp, sizeof(unsigned char)*flipped.rows*flipped.cols) );

此行创建一个设备分配,并将该分配的指针分配给 dev_inp .

问题出现在这里:
gpuErrchk( cudaGraphicsResourceGetMappedPointer((void **)&dev_inp, &size, cuda_resource) );

此行获取一个新指针,源自 cuda_resource对象,指向另一个不同的分配,并将该指针放入 dev_inp , 覆盖 您之前分配的指针(来自 cudaMalloc )。在这一行中获取的新指针已经有一个底层设备分配。此时您不需要单独/额外分配它。

此时,如果您尝试释放 dev_inp :
gpuErrchk(cudaFree(dev_inp));        // <--- Error here

您正在尝试释放程序未明确分配的数据(通过 cudaMalloc ),并且是持久性(此时) cuda_resource 的必要组件目的。你不想那样做。不幸的是,放置在 dev_inp 中的原始指针现在已丢失(覆盖),因此无法在程序中“释放”它,只要程序正在执行,就会出现内存泄漏。

解决方案是不执行额外的、不需要的分配:
gpuErrchk( cudaMalloc((void**)&dev_inp, sizeof(unsigned char)*flipped.rows*flipped.cols) );

这意味着相应的 cudaFree操作也应该被消除:
gpuErrchk(cudaFree(dev_inp));        // <--- Error here

我不会使用 cudaDeviceReset CUDA 代码中的任何位置,尤其是 CUDA/OpenGL 代码,直到程序实际退出。在其他一些非常特殊的情况下,您可能需要使用 cudaDeviceReset在您实际打算退出程序之前,但它们不适用于此处。

关于opengl - cudaFree - 无效的设备指针错误,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/25559600/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com