gpt4 book ai didi

c++ - 使用 OpenCV 改进文本二值化/OCR 预处理

转载 作者:塔克拉玛干 更新时间:2023-11-03 02:00:00 26 4
gpt4 key购买 nike

我正在为我的应用构建一个扫描仪功能,并使用 OpenCV 将文档的照片二值化:

    // convert to greyscale
cv::Mat converted, blurred, blackAndWhite;
converted = cv::Mat(inputMatrix.rows, inputMatrix.cols, CV_8UC1);
cv::cvtColor(inputMatrix, converted, CV_BGR2GRAY );

// remove noise
cv::GaussianBlur(converted, blurred, cvSize(3,3), 0);

// adaptive threshold
cv::adaptiveThreshold(blackAndWhite, blackAndWhite, 255, cv::ADAPTIVE_THRESH_GAUSSIAN_C, cv::THRESH_BINARY, 15 , 9);

结果还可以,但来自不同扫描仪应用程序的扫描效果要好得多。尤其是非常小的文本要好得多:用opencv处理 First: my result

使用 DropBox 扫描 Second: Scanned with dropbox

我能做些什么来改善我的结果?

最佳答案

可能是应用程序正在使用 anti-aliasing使他们的二值化输出看起来更好。为了获得类似的效果,我首先尝试对图像进行二值化处理,但由于所有锯齿状边缘,结果看起来不太好。然后我对结果应用金字塔上采样然后下采样,输出更好。

但是我没有使用自适应阈值。我分割了类似文本的区域并仅处理这些区域,然后粘贴它们以形成最终图像。它是一种使用 Otsu 方法或 k-means 的局部阈值(在代码中使用 thr_roi_otsuthr_roi_kmeansproc_parts 的组合) .以下是一些结果。

将 Otsu 阈值应用于所有文本区域,然后进行上采样,然后进行下采样:

一些文字:

otsu1txt

全图:

otsu1x

对输入图像进行上采样,将 Otsu 阈值应用于各个文本区域,对结果进行下采样:

一些文字:

otsuparts2xtext

全图:

otsuparts2x

/*
apply Otsu threshold to the region in mask
*/
Mat thr_roi_otsu(Mat& mask, Mat& im)
{
Mat bw = Mat::ones(im.size(), CV_8U) * 255;

vector<unsigned char> pixels(countNonZero(mask));
int index = 0;
for (int r = 0; r < mask.rows; r++)
{
for (int c = 0; c < mask.cols; c++)
{
if (mask.at<unsigned char>(r, c))
{
pixels[index++] = im.at<unsigned char>(r, c);
}
}
}
// threshold pixels
threshold(pixels, pixels, 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
// paste pixels
index = 0;
for (int r = 0; r < mask.rows; r++)
{
for (int c = 0; c < mask.cols; c++)
{
if (mask.at<unsigned char>(r, c))
{
bw.at<unsigned char>(r, c) = pixels[index++];
}
}
}

return bw;
}

/*
apply k-means to the region in mask
*/
Mat thr_roi_kmeans(Mat& mask, Mat& im)
{
Mat bw = Mat::ones(im.size(), CV_8U) * 255;

vector<float> pixels(countNonZero(mask));
int index = 0;
for (int r = 0; r < mask.rows; r++)
{
for (int c = 0; c < mask.cols; c++)
{
if (mask.at<unsigned char>(r, c))
{
pixels[index++] = (float)im.at<unsigned char>(r, c);
}
}
}
// cluster pixels by gray level
int k = 2;
Mat data(pixels.size(), 1, CV_32FC1, &pixels[0]);
vector<float> centers;
vector<int> labels(countNonZero(mask));
kmeans(data, k, labels, TermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 10, 1.0), k, KMEANS_PP_CENTERS, centers);
// examine cluster centers to see which pixels are dark
int label0 = centers[0] > centers[1] ? 1 : 0;
// paste pixels
index = 0;
for (int r = 0; r < mask.rows; r++)
{
for (int c = 0; c < mask.cols; c++)
{
if (mask.at<unsigned char>(r, c))
{
bw.at<unsigned char>(r, c) = labels[index++] != label0 ? 255 : 0;
}
}
}

return bw;
}

/*
apply procfn to each connected component in the mask,
then paste the results to form the final image
*/
Mat proc_parts(Mat& mask, Mat& im, Mat (procfn)(Mat&, Mat&))
{
Mat tmp = mask.clone();
vector<vector<Point>> contours;
vector<Vec4i> hierarchy;
findContours(tmp, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, Point(0, 0));

Mat byparts = Mat::ones(im.size(), CV_8U) * 255;

for(int idx = 0; idx >= 0; idx = hierarchy[idx][0])
{
Rect rect = boundingRect(contours[idx]);
Mat msk = mask(rect);
Mat img = im(rect);
// process the rect
Mat roi = procfn(msk, img);
// paste it to the final image
roi.copyTo(byparts(rect));
}

return byparts;
}

int _tmain(int argc, _TCHAR* argv[])
{
Mat im = imread("1.jpg", 0);
// detect text regions
Mat morph;
Mat kernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3));
morphologyEx(im, morph, CV_MOP_GRADIENT, kernel, Point(-1, -1), 1);
// prepare a mask for text regions
Mat bw;
threshold(morph, bw, 0, 255, THRESH_BINARY | THRESH_OTSU);
morphologyEx(bw, bw, CV_MOP_DILATE, kernel, Point(-1, -1), 10);

Mat bw2x, im2x;
pyrUp(bw, bw2x);
pyrUp(im, im2x);

// apply Otsu threshold to all text regions, then upsample followed by downsample
Mat otsu1x = thr_roi_otsu(bw, im);
pyrUp(otsu1x, otsu1x);
pyrDown(otsu1x, otsu1x);

// apply k-means to all text regions, then upsample followed by downsample
Mat kmeans1x = thr_roi_kmeans(bw, im);
pyrUp(kmeans1x, kmeans1x);
pyrDown(kmeans1x, kmeans1x);

// upsample input image, apply Otsu threshold to all text regions, downsample the result
Mat otsu2x = thr_roi_otsu(bw2x, im2x);
pyrDown(otsu2x, otsu2x);

// upsample input image, apply k-means to all text regions, downsample the result
Mat kmeans2x = thr_roi_kmeans(bw2x, im2x);
pyrDown(kmeans2x, kmeans2x);

// apply Otsu threshold to individual text regions, then upsample followed by downsample
Mat otsuparts1x = proc_parts(bw, im, thr_roi_otsu);
pyrUp(otsuparts1x, otsuparts1x);
pyrDown(otsuparts1x, otsuparts1x);

// apply k-means to individual text regions, then upsample followed by downsample
Mat kmeansparts1x = proc_parts(bw, im, thr_roi_kmeans);
pyrUp(kmeansparts1x, kmeansparts1x);
pyrDown(kmeansparts1x, kmeansparts1x);

// upsample input image, apply Otsu threshold to individual text regions, downsample the result
Mat otsuparts2x = proc_parts(bw2x, im2x, thr_roi_otsu);
pyrDown(otsuparts2x, otsuparts2x);

// upsample input image, apply k-means to individual text regions, downsample the result
Mat kmeansparts2x = proc_parts(bw2x, im2x, thr_roi_kmeans);
pyrDown(kmeansparts2x, kmeansparts2x);

return 0;
}

关于c++ - 使用 OpenCV 改进文本二值化/OCR 预处理,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/43659275/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com