gpt4 book ai didi

c++ - 使用opencv检测图像中的文本

转载 作者:太空狗 更新时间:2023-10-29 20:56:05 25 4
gpt4 key购买 nike

我需要检测图像中的文本..

这里有一些在大多数情况下都有效的代码..但不是在所有情况下..请参阅附加的输入/输出图像

代码

#include "string"
#include "fstream"
#include "/var/bin/opencv/include/opencv2/opencv.hpp"

using namespace std;
using namespace cv;

void detect_text(string input){
Mat large = imread(input);

Mat rgb;
// downsample and use it for processing
pyrDown(large, rgb);
Mat small;
cvtColor(rgb, small, CV_BGR2GRAY);
// morphological gradient
Mat grad;
Mat morphKernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3));
morphologyEx(small, grad, MORPH_GRADIENT, morphKernel);
// binarize
Mat bw;
threshold(grad, bw, 0.0, 255.0, THRESH_BINARY | THRESH_OTSU);
// connect horizontally oriented regions
Mat connected;
morphKernel = getStructuringElement(MORPH_RECT, Size(9, 1));
morphologyEx(bw, connected, MORPH_CLOSE, morphKernel);
// find contours
Mat mask = Mat::zeros(bw.size(), CV_8UC1);
vector<vector<Point> > contours;
vector<Vec4i> hierarchy;
findContours(connected, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, Point(0, 0));
// filter contours
for(int idx = 0; idx >= 0; idx = hierarchy[idx][0]){
Rect rect = boundingRect(contours[idx]);
Mat maskROI(mask, rect);
maskROI = Scalar(0, 0, 0);
// fill the contour
drawContours(mask, contours, idx, Scalar(255, 255, 255), CV_FILLED);
// ratio of non-zero pixels in the filled region
double r = (double)countNonZero(maskROI) / (rect.width * rect.height);

// assume at least 45% of the area is filled if it contains text
if (r > 0.45 &&
(rect.height > 8 && rect.width > 8) // constraints on region size
// these two conditions alone are not very robust. better to use something
//like the number of significant peaks in a horizontal projection as a third condition
){
rectangle(rgb, rect, Scalar(0, 255, 0), 2);
}
}

imwrite(string("test_text_contours.jpg"), rgb);
}

int main(int argc, char* argv[]){
detect_text(string("input.jpg"));
}

输入

enter image description here

输出

enter image description here

更新

/*
* Compile
* # g++ txtbin.cpp -o txtbin `pkg-config opencv --cflags --libs`
*
* Get opencv version
* # pkg-config --modversion opencv
*
* Run
* # ./txtbin input.jpg output.png
*/

#include "string"
#include "fstream"
#include "/var/bin/opencv/include/opencv2/opencv.hpp"
//#include "/usr/include/opencv2/opencv.hpp"
#include "/usr/include/boost/tuple/tuple.hpp"

using namespace std;
using namespace cv;
using namespace boost;

void CalcBlockMeanVariance(Mat& Img, Mat& Res, float blockSide=21, float contrast=0.01){
/*
* blockSide: set greater for larger fonts in image and vice versa
* contrast: set smaller for lower contrast image
*/

Mat I;
Img.convertTo(I, CV_32FC1);
Res = Mat::zeros(Img.rows / blockSide, Img.cols / blockSide, CV_32FC1);
Mat inpaintmask;
Mat patch;
Mat smallImg;
Scalar m, s;

for(int i = 0; i < Img.rows - blockSide; i += blockSide){
for(int j = 0; j < Img.cols - blockSide; j += blockSide){
patch = I(Range(i, i + blockSide + 1), Range(j, j + blockSide + 1));
meanStdDev(patch, m, s);

if(s[0] > contrast){
Res.at<float>(i / blockSide, j / blockSide) = m[0];
}
else{
Res.at<float>(i / blockSide, j / blockSide) = 0;
}
}
}

resize(I, smallImg, Res.size());

threshold(Res, inpaintmask, 0.02, 1.0, THRESH_BINARY);

Mat inpainted;
smallImg.convertTo(smallImg, CV_8UC1, 255);

inpaintmask.convertTo(inpaintmask, CV_8UC1);
inpaint(smallImg, inpaintmask, inpainted, 5, INPAINT_TELEA);

resize(inpainted, Res, Img.size());
Res.convertTo(Res, CV_32FC1, 1.0 / 255.0);
}

tuple<int, int, int, int> detect_text_box(string input, Mat& res, bool draw_contours=false){
Mat large = imread(input);

bool test_output = false;

int
top = large.rows,
bottom = 0,
left = large.cols,
right = 0;

int
rect_bottom,
rect_right;

Mat rgb;
// downsample and use it for processing
pyrDown(large, rgb);
pyrDown(rgb, rgb);
Mat small;
cvtColor(rgb, small, CV_BGR2GRAY);
// morphological gradient
Mat grad;
Mat morphKernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3));
morphologyEx(small, grad, MORPH_GRADIENT, morphKernel);
// binarize
Mat bw;
threshold(grad, bw, 0.0, 255.0, THRESH_BINARY | THRESH_OTSU);
// connect horizontally oriented regions
Mat connected;
morphKernel = getStructuringElement(MORPH_RECT, Size(9, 1));
morphologyEx(bw, connected, MORPH_CLOSE, morphKernel);
// find contours
Mat mask = Mat::zeros(bw.size(), CV_8UC1);
vector<vector<Point> > contours;
vector<Vec4i> hierarchy;
findContours(connected, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, Point(0, 0));

Scalar color = Scalar(0, 255, 0);
Scalar color2 = Scalar(0, 0, 255);
int thickness = 2;

// filter contours
for(int idx = 0; idx >= 0; idx = hierarchy[idx][0]){
Rect rect = boundingRect(contours[idx]);
Mat maskROI(mask, rect);
maskROI = Scalar(0, 0, 0);
// fill the contour
drawContours(mask, contours, idx, Scalar(255, 255, 255), CV_FILLED);
// ratio of non-zero pixels in the filled region
double r = (double)countNonZero(maskROI) / (rect.width * rect.height);

// assume at least 25% of the area is filled if it contains text
if (r > 0.25 &&
(rect.height > 8 && rect.width > 8) // constraints on region size
// these two conditions alone are not very robust. better to use something
//like the number of significant peaks in a horizontal projection as a third condition
){
if(draw_contours){
rectangle(res, Rect(rect.x * 4, rect.y * 4, rect.width * 4, rect.height * 4), color, thickness);
}

if(test_output){
rectangle(rgb, rect, color, thickness);
}

if(rect.y < top){
top = rect.y;
}
rect_bottom = rect.y + rect.height;
if(rect_bottom > bottom){
bottom = rect_bottom;
}
if(rect.x < left){
left = rect.x;
}
rect_right = rect.x + rect.width;
if(rect_right > right){
right = rect_right;
}
}
}

if(draw_contours){
rectangle(res, Point(left * 4, top * 4), Point(right * 4, bottom * 4), color2, thickness);
}

if(test_output){
rectangle(rgb, Point(left, top), Point(right, bottom), color2, thickness);
imwrite(string("test_text_contours.jpg"), rgb);
}

return make_tuple(left * 4, top * 4, (right - left) * 4, (bottom - top) * 4);
}

int main(int argc, char* argv[]){
string input;
string output = "output.png";

int
width = 0,
height = 0,
blockside = 9;

bool
crop = false,
draw = false;

float margin = 0;

cout << "OpenCV version: " << CV_VERSION << endl;

// Return error if arguments are missing
if(argc < 3){
cerr << "\nUsage: txtbin input [options] output\n\n"
"Options:\n"
"\t-w <number> -- set max width (keeps aspect ratio)\n"
"\t-h <number> -- set max height (keeps aspect ratio)\n"
"\t-c -- crop text content contour\n"
"\t-m <number> -- add margins (number in %)\n"
"\t-b <number> -- set blockside\n"
"\t-d -- draw text content contours (debugging)\n" << endl;
return 1;
}

// Parse arguments
for(int i = 1; i < argc; i++){
if(i == 1){
input = string(argv[i]);

// Return error if input file is invalid
ifstream stream(input.c_str());
if(!stream.good()){
cerr << "Error: Input file is invalid!" << endl;
return 1;
}
}
else if(string(argv[i]) == "-w"){
width = atoi(argv[++i]);
}
else if(string(argv[i]) == "-h"){
height = atoi(argv[++i]);
}
else if(string(argv[i]) == "-c"){
crop = true;
}
else if(string(argv[i]) == "-m"){
margin = atoi(argv[++i]);
}
else if(string(argv[i]) == "-b"){
blockside = atoi(argv[++i]);
}
else if(string(argv[i]) == "-d"){
draw = true;
}
else if(i == argc - 1){
output = string(argv[i]);
}
}

Mat Img = imread(input, CV_LOAD_IMAGE_GRAYSCALE);
Mat res;
Img.convertTo(Img, CV_32FC1, 1.0 / 255.0);
CalcBlockMeanVariance(Img, res, blockside);
res = 1.0 - res;
res = Img + res;
threshold(res, res, 0.85, 1, THRESH_BINARY);

int
txt_x,
txt_y,
txt_width,
txt_height;

if(crop || draw){
tie(txt_x, txt_y, txt_width, txt_height) = detect_text_box(input, res, draw);
}

if(crop){
//res = res(Rect(txt_x, txt_y, txt_width, txt_height)).clone();
res = res(Rect(txt_x, txt_y, txt_width, txt_height));
}

if(margin){
int border = res.cols * margin / 100;
copyMakeBorder(res, res, border, border, border, border, BORDER_CONSTANT, Scalar(255, 255, 255));
}

float
width_input = res.cols,
height_input = res.rows;

bool resized = false;

// Downscale image
if(width > 0 && width_input > width){
float scale = width_input / width;
width_input /= scale;
height_input /= scale;
resized = true;
}
if(height > 0 && height_input > height){
float scale = height_input / height;
width_input /= scale;
height_input /= scale;
resized = true;
}
if(resized){
resize(res, res, Size(round(width_input), round(height_input)));
}

imwrite(output, res * 255);

return 0;
}

最佳答案

您的 detect_text 代码与我的文本检测帖子非常相似 here .如果您使用过该代码,您会看到原始帖子中的输入图像是 1400 x 800。但是您在这篇帖子中的输入图像和您的 previous post通常是四倍大。因此,首先您可以尝试对输入图像进行两次下采样。此外,您的文字看起来有点倾斜,因此您可以尝试旋转矩形而不是直立矩形。然后你可以为你的案例调整参数。正如我在代码中提到的,轮廓过滤标准不是很可靠。对代码进行这些更改后,我得到了如下所示的合理输出。请注意,我用绿色突出显示了检测到的文本区域的旋转矩形

1 2 3 4

代码:

void detect_text(string input){
Mat large = imread(input);

Mat rgb;
// downsample and use it for processing
pyrDown(large, rgb);
pyrDown(rgb, rgb);
Mat small;
cvtColor(rgb, small, CV_BGR2GRAY);
// morphological gradient
Mat grad;
Mat morphKernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3));
morphologyEx(small, grad, MORPH_GRADIENT, morphKernel);
// binarize
Mat bw;
threshold(grad, bw, 0.0, 255.0, THRESH_BINARY | THRESH_OTSU);
// connect horizontally oriented regions
Mat connected;
morphKernel = getStructuringElement(MORPH_RECT, Size(9, 1));
morphologyEx(bw, connected, MORPH_CLOSE, morphKernel);
// find contours
Mat mask = Mat::zeros(bw.size(), CV_8UC1);
vector<vector<Point> > contours;
vector<Vec4i> hierarchy;
findContours(connected, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, Point(0, 0));
// filter contours
for(int idx = 0; idx >= 0; idx = hierarchy[idx][0]){
Rect rect = boundingRect(contours[idx]);
Mat maskROI(mask, rect);
maskROI = Scalar(0, 0, 0);
// fill the contour
drawContours(mask, contours, idx, Scalar(255, 255, 255), CV_FILLED);

RotatedRect rrect = minAreaRect(contours[idx]);
double r = (double)countNonZero(maskROI) / (rrect.size.width * rrect.size.height);

Scalar color;
int thickness = 1;
// assume at least 25% of the area is filled if it contains text
if (r > 0.25 &&
(rrect.size.height > 8 && rrect.size.width > 8) // constraints on region size
// these two conditions alone are not very robust. better to use something
//like the number of significant peaks in a horizontal projection as a third condition
){
thickness = 2;
color = Scalar(0, 255, 0);
}
else
{
thickness = 1;
color = Scalar(0, 0, 255);
}

Point2f pts[4];
rrect.points(pts);
for (int i = 0; i < 4; i++)
{
line(rgb, Point((int)pts[i].x, (int)pts[i].y), Point((int)pts[(i+1)%4].x, (int)pts[(i+1)%4].y), color, thickness);
}
}

imwrite("cont.jpg", rgb);
}

关于c++ - 使用opencv检测图像中的文本,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/34415815/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com