gpt4 book ai didi

使用 CUDA 的负片(反)图像(图像无法显示)

转载 作者:太空宇宙 更新时间:2023-11-03 22:14:29 24 4
gpt4 key购买 nike

我正在尝试使用 CUDA 创建负像,使用与 CPU 计算相同的功能。

这是主类。

int main(int argc, char** argv)
{

IplImage* image_input = cvLoadImage("test.jpg", CV_LOAD_IMAGE_UNCHANGED);
IplImage* image_output = cvCreateImage(cvGetSize(image_input),
IPL_DEPTH_8U,image_input->nChannels);

unsigned char *h_out = (unsigned char*)image_output->imageData;
unsigned char *h_in = (unsigned char*)image_input->imageData;

width = image_input->width;
height = image_input->height;
widthStep = image_input->widthStep;
channels = image_input->nChannels;

negatif_parallel(h_in, h_out, width, height, widthStep, channels);

cvShowImage("Original", image_input);
cvShowImage("CPU", image_output);

waitKey(0);
cvReleaseImage(&image_input);
cvReleaseImage(&image_output);

}

这是 CUDA 类

__global__ void kernel ( unsigned char *d_in ,unsigned char* d_out, int width , int height, int widthStep, int channels) {
int x = blockIdx . x * blockDim . x + threadIdx . x ;
int y = blockIdx . y * blockDim . y + threadIdx . y ;

int s;

if( x < width && y < height){
int i = y;
int j = x;
for(int k=0;k<channels;k++){
s = d_in[i*widthStep + j*channels + k];
s = 255-d_in[i*widthStep + j*channels + k];
d_out[i*widthStep + j*channels + k]=s;
}

}
}

extern "C" void negatif_parallel( unsigned char* h_in, unsigned char* h_out, int width, int height, int widthStep,int channels){

unsigned char* d_in;
unsigned char* d_out;
cudaMalloc((void**) &d_in, width*height);
cudaMalloc((void**) &d_out, width*height);

cudaMemcpy(d_in, h_in, width*height*sizeof( unsigned char), cudaMemcpyHostToDevice);
dim3 block (16,16);
dim3 grid (width/16, height/16);
kernel<<<grid,block>>>(d_in, d_out, width, height, widthStep, channels);

cudaMemcpy(h_out, d_out, width*height*sizeof( unsigned char), cudaMemcpyDeviceToHost);
cudaFree(d_in);
cudaFree(d_out);

当使用 CPU 计算完成时,负像成功。但是用CUDA的时候,反图不成功,只出现空白的白色图像。我的代码出了什么问题? T_T

最佳答案

你很接近。只需要将每个 channel 中的字节数添加到内存分配和传输中。这是您的代码的工作版本。我也添加了一些错误检查。参见 this question有关错误检查的更多信息。请注意,在这种情况下,您不必在 GPU 上使用两个缓冲区。您可以使用单个缓冲区并就地进行转换。

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <iostream>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>

using namespace cv;
using namespace std;

void negatif_parallel( unsigned char* h_in, unsigned char* h_out, int width, int height, int widthStep,int channels);

#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}

int main(int argc, char** argv)
{
IplImage* image_input = cvLoadImage("test.jpg", CV_LOAD_IMAGE_UNCHANGED);
IplImage* image_output = cvCreateImage(cvGetSize(image_input), IPL_DEPTH_8U,image_input->nChannels);

unsigned char *h_out = (unsigned char*)image_output->imageData;
unsigned char *h_in = (unsigned char*)image_input->imageData;

int width = image_input->width;
int height = image_input->height;
int widthStep = image_input->widthStep;
int channels = image_input->nChannels;

negatif_parallel(h_in, h_out, width, height, widthStep, channels);

cvShowImage("Original", image_input);
cvShowImage("CPU", image_output);

waitKey(0);

cvReleaseImage(&image_input);
cvReleaseImage(&image_output);
}

__global__ void kernel (unsigned char *d_in,unsigned char* d_out, int width, int height, int widthStep, int channels) {
int x = blockIdx . x * blockDim . x + threadIdx . x ;
int y = blockIdx . y * blockDim . y + threadIdx . y ;

int s;

if (x < width && y < height) {
int i = y;
int j = x;
for(int k=0; k< channels; k++) {
s = d_in[i*widthStep + j*channels + k];
s = 255-d_in[i*widthStep + j*channels + k];
d_out[i*widthStep + j*channels + k]=s;
}

}
}

void negatif_parallel( unsigned char* h_in, unsigned char* h_out, int width, int height, int widthStep,int channels)
{
unsigned char* d_in;
unsigned char* d_out;
cudaMalloc((void**) &d_in, width*height*channels);
cudaMalloc((void**) &d_out, width*height*channels);

gpuErrchk(cudaMemcpy(d_in, h_in, width*height*channels, cudaMemcpyHostToDevice));
dim3 block (16,16);
dim3 grid (width / 16, height /16);
kernel<<<grid, block>>>(d_in, d_out, width, height, widthStep, channels);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() ); // Not strictly required because the next call, cudaMemcpy, is blocking

gpuErrchk(cudaMemcpy(h_out, d_out, width * height * channels, cudaMemcpyDeviceToHost));
gpuErrchk(cudaFree(d_in));
gpuErrchk(cudaFree(d_out));
}

关于使用 CUDA 的负片(反)图像(图像无法显示),我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/19921536/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com