gpt4 book ai didi

python - 如何在openCV,python中找到旋转和裁剪一段文本

转载 作者:太空宇宙 更新时间:2023-11-03 15:42:44 25 4
gpt4 key购买 nike

我正在与一个项目作斗争,该项目从标签中获取非常清晰的字体图像,例如读取“文本区域”并使用 OCR tesseract 将其输出为字符串。

现在我在这件事上取得了相当大的进展,因为我添加了各种全局过滤器以获得非常清晰的结果,但我正在努力寻找仅过滤文本的方法,然后你必须考虑将它旋转到尽可能水平,然后简单的部分应该是裁剪它。

如果我只使用 rasdpberry pi 来进行计算,我是否可以知道如何做到这一点而不使用训练数据并使系统过于复杂?

感谢您的帮助,这是我到目前为止所想出的:

原始图像(从 PiCamera 捕获):

https://i.imgur.com/vm5wb1Z.jpg

阴影去除后的自适应阈值:

[ https://i.imgur.com/rqWoUsI.jpg[2]

去除阴影后的 Glocad tresh:

/image/5KAhz.jpg

代码如下:

# import the necessary packages
from PIL import Image
import pytesseract
import argparse
import cv2
import os

import picamera
import time

import numpy as np
#preprocess = "tresh"

#Remaining textcorping and rotating:
import math
import json
from collections import defaultdict
from scipy.ndimage.filters import rank_filter

def dilate(ary, N, iterations):
"""Dilate using an NxN '+' sign shape. ary is np.uint8."""
kernel = np.zeros((N,N), dtype=np.uint8)
kernel[(N-1)/2,:] = 1
dilated_image = cv2.dilate(ary / 255, kernel, iterations=iterations)

kernel = np.zeros((N,N), dtype=np.uint8)
kernel[:,(N-1)/2] = 1
dilated_image = cv2.dilate(dilated_image, kernel, iterations=iterations)
return dilated_image


def props_for_contours(contours, ary):
"""Calculate bounding box & the number of set pixels for each contour."""
c_info = []
for c in contours:
x,y,w,h = cv2.boundingRect(c)
c_im = np.zeros(ary.shape)
cv2.drawContours(c_im, [c], 0, 255, -1)
c_info.append({
'x1': x,
'y1': y,
'x2': x + w - 1,
'y2': y + h - 1,
'sum': np.sum(ary * (c_im > 0))/255
})
return c_info


def union_crops(crop1, crop2):
"""Union two (x1, y1, x2, y2) rects."""
x11, y11, x21, y21 = crop1
x12, y12, x22, y22 = crop2
return min(x11, x12), min(y11, y12), max(x21, x22), max(y21, y22)


def intersect_crops(crop1, crop2):
x11, y11, x21, y21 = crop1
x12, y12, x22, y22 = crop2
return max(x11, x12), max(y11, y12), min(x21, x22), min(y21, y22)


def crop_area(crop):
x1, y1, x2, y2 = crop
return max(0, x2 - x1) * max(0, y2 - y1)


def find_border_components(contours, ary):
borders = []
area = ary.shape[0] * ary.shape[1]
for i, c in enumerate(contours):
x,y,w,h = cv2.boundingRect(c)
if w * h > 0.5 * area:
borders.append((i, x, y, x + w - 1, y + h - 1))
return borders


def angle_from_right(deg):
return min(deg % 90, 90 - (deg % 90))


def remove_border(contour, ary):
"""Remove everything outside a border contour."""
# Use a rotated rectangle (should be a good approximation of a border).
# If it's far from a right angle, it's probably two sides of a border and
# we should use the bounding box instead.
c_im = np.zeros(ary.shape)
r = cv2.minAreaRect(contour)
degs = r[2]
if angle_from_right(degs) <= 10.0:
box = cv2.cv.BoxPoints(r)
box = np.int0(box)
cv2.drawContours(c_im, [box], 0, 255, -1)
cv2.drawContours(c_im, [box], 0, 0, 4)
else:
x1, y1, x2, y2 = cv2.boundingRect(contour)
cv2.rectangle(c_im, (x1, y1), (x2, y2), 255, -1)
cv2.rectangle(c_im, (x1, y1), (x2, y2), 0, 4)

return np.minimum(c_im, ary)


def find_components(edges, max_components=16):
"""Dilate the image until there are just a few connected components.

Returns contours for these components."""
# Perform increasingly aggressive dilation until there are just a few
# connected components.
count = 21
dilation = 5
n = 1
while count > 16:
n += 1
dilated_image = dilate(edges, N=3, iterations=n)
contours, hierarchy = cv2.findContours(dilated_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
count = len(contours)
#print dilation
#Image.fromarray(edges).show()
#Image.fromarray(255 * dilated_image).show()
return contours


def find_optimal_components_subset(contours, edges):
"""Find a crop which strikes a good balance of coverage/compactness.

Returns an (x1, y1, x2, y2) tuple.
"""
c_info = props_for_contours(contours, edges)
c_info.sort(key=lambda x: -x['sum'])
total = np.sum(edges) / 255
area = edges.shape[0] * edges.shape[1]

c = c_info[0]
del c_info[0]
this_crop = c['x1'], c['y1'], c['x2'], c['y2']
crop = this_crop
covered_sum = c['sum']

while covered_sum < total:
changed = False
recall = 1.0 * covered_sum / total
prec = 1 - 1.0 * crop_area(crop) / area
f1 = 2 * (prec * recall / (prec + recall))
#print '----'
for i, c in enumerate(c_info):
this_crop = c['x1'], c['y1'], c['x2'], c['y2']
new_crop = union_crops(crop, this_crop)
new_sum = covered_sum + c['sum']
new_recall = 1.0 * new_sum / total
new_prec = 1 - 1.0 * crop_area(new_crop) / area
new_f1 = 2 * new_prec * new_recall / (new_prec + new_recall)

# Add this crop if it improves f1 score,
# _or_ it adds 25% of the remaining pixels for <15% crop expansion.
# ^^^ very ad-hoc! make this smoother
remaining_frac = c['sum'] / (total - covered_sum)
new_area_frac = 1.0 * crop_area(new_crop) / crop_area(crop) - 1
if new_f1 > f1 or (
remaining_frac > 0.25 and new_area_frac < 0.15):
print '%d %s -> %s / %s (%s), %s -> %s / %s (%s), %s -> %s' % (
i, covered_sum, new_sum, total, remaining_frac,
crop_area(crop), crop_area(new_crop), area, new_area_frac,
f1, new_f1)
crop = new_crop
covered_sum = new_sum
del c_info[i]
changed = True
break

if not changed:
break

return crop


def pad_crop(crop, contours, edges, border_contour, pad_px=15):
"""Slightly expand the crop to get full contours.

This will expand to include any contours it currently intersects, but will
not expand past a border.
"""
bx1, by1, bx2, by2 = 0, 0, edges.shape[0], edges.shape[1]
if border_contour is not None and len(border_contour) > 0:
c = props_for_contours([border_contour], edges)[0]
bx1, by1, bx2, by2 = c['x1'] + 5, c['y1'] + 5, c['x2'] - 5, c['y2'] - 5

def crop_in_border(crop):
x1, y1, x2, y2 = crop
x1 = max(x1 - pad_px, bx1)
y1 = max(y1 - pad_px, by1)
x2 = min(x2 + pad_px, bx2)
y2 = min(y2 + pad_px, by2)
return crop

crop = crop_in_border(crop)

c_info = props_for_contours(contours, edges)
changed = False
for c in c_info:
this_crop = c['x1'], c['y1'], c['x2'], c['y2']
this_area = crop_area(this_crop)
int_area = crop_area(intersect_crops(crop, this_crop))
new_crop = crop_in_border(union_crops(crop, this_crop))
if 0 < int_area < this_area and crop != new_crop:
print '%s -> %s' % (str(crop), str(new_crop))
changed = True
crop = new_crop

if changed:
return pad_crop(crop, contours, edges, border_contour, pad_px)
else:
return crop


def downscale_image(im, max_dim=2048):
"""Shrink im until its longest dimension is <= max_dim.

Returns new_image, scale (where scale <= 1).
"""
a, b = im.size
if max(a, b) <= max_dim:
return 1.0, im

scale = 1.0 * max_dim / max(a, b)
new_im = im.resize((int(a * scale), int(b * scale)), Image.ANTIALIAS)
return scale, new_im


def process_image(inputImg):

opnImg = Image.open(inputImg)
scale, im = downscale_image(opnImg)

edges = cv2.Canny(np.asarray(im), 100, 200)

# TODO: dilate image _before_ finding a border. This is crazy sensitive!
contours, hierarchy = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
borders = find_border_components(contours, edges)
borders.sort(key=lambda (i, x1, y1, x2, y2): (x2 - x1) * (y2 - y1))

border_contour = None
if len(borders):
border_contour = contours[borders[0][0]]
edges = remove_border(border_contour, edges)

edges = 255 * (edges > 0).astype(np.uint8)

# Remove ~1px borders using a rank filter.


maxed_rows = rank_filter(edges, -4, size=(1, 20))
maxed_cols = rank_filter(edges, -4, size=(20, 1))

debordered = np.minimum(np.minimum(edges, maxed_rows), maxed_cols)
edges = debordered


contours = find_components(edges)
if len(contours) == 0:
print '%s -> (no text!)' % path
return

crop = find_optimal_components_subset(contours, edges)
crop = pad_crop(crop, contours, edges, border_contour)

crop = [int(x / scale) for x in crop] # upscale to the original image size.
#draw = ImageDraw.Draw(im)
#c_info = props_for_contours(contours, edges)
#for c in c_info:
# this_crop = c['x1'], c['y1'], c['x2'], c['y2']
# draw.rectangle(this_crop, outline='blue')
#draw.rectangle(crop, outline='red')
#im.save(out_path)
#draw.text((50, 50), path, fill='red')
#orig_im.save(out_path)
#im.show()
text_im = opnImg.crop(crop)
text_im.save('Cropted_and_rotated_image.jpg')
return text_im
'''
text_im.save(out_path)
print '%s -> %s' % (path, out_path)
'''


#Camera capturing stuff:
myCamera = picamera.PiCamera()

myCamera.vflip = True
myCamera.hflip = True
'''
myCamera.start_preview()
time.sleep(6)
myCamera.stop_preview()
'''
myCamera.capture("Captured_Image.png")
#End capturing persidure


imgAddr = '/home/pi/My_examples/Mechanical_display_converter/Example1.jpg'
#imgAddr = "Captured_Image.png"



# construct the argument parse and parse the arguments
#ap = argparse.ArgumentParser()
'''
ap.add_argument("-i", "--image", required=True,
help="path to input image to be OCR'd")

ap.add_argument("-p", "--preprocess", type=str, default="thresh",
help="type of preprocessing to be done")
args = vars(ap.parse_args())
'''

# load the example image and convert it to grayscale
img = cv2.imread(imgAddr)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.imshow('Step1_gray_filter', gray)

'''

# check to see if we should apply thresholding to preprocess the
# image
if args["preprocess"] == "thresh":
gray = cv2.threshold(gray, 0, 255,
cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]

# make a check to see if median blurring should be done to remove
# noise
elif args["preprocess"] == "blur":
gray = cv2.medianBlur(gray, 3)


if preprocess == "thresh":
gray = cv2.threshold(gray, 150, 255,
cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]



# make a check to see if median blurring should be done to remove
# noise
elif preprocess == "blur":
gray = cv2.medianBlur(gray, 3)

'''

rgb_planes = cv2.split(img)

result_planes = []
result_norm_planes = []
for plane in rgb_planes:
dilated_img = cv2.dilate(plane, np.ones((7,7), np.uint8))
bg_img = cv2.medianBlur(dilated_img, 21)
diff_img = 255 - cv2.absdiff(plane, bg_img)
norm_img = cv2.normalize(diff_img, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
result_planes.append(diff_img)
result_norm_planes.append(norm_img)

result = cv2.merge(result_planes)
result_norm = cv2.merge(result_norm_planes)

cv2.imshow('shadows_out.png', result)
cv2.imshow('shadows_out_norm.png', result_norm)

grayUnShadowedImg = cv2.cvtColor(result, cv2.COLOR_BGR2GRAY)
cv2.imshow('Shadow_Gray_CVT', grayUnShadowedImg)


ret, threshUnShadowedImg = cv2.threshold(grayUnShadowedImg, 200, 255, cv2.THRESH_BINARY)
cv2.imshow('unShadowed_Thresh_filtering', threshUnShadowedImg)
#v2.imwrite('unShadowed_Thresh_filtering.jpg', threshUnShadowedImg)

#croptedunShadowedImg = process_image('unShadowed_Thresh_filtering.jpg')

adptThreshUnShadowedImg = cv2.adaptiveThreshold(grayUnShadowedImg, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 115, 1)
cv2.imshow('unShadowed_Adaptive_Thresh_filtering', adptThreshUnShadowedImg)

'''
blurFImg = cv2.GaussianBlur(adptThreshUnShadowedImg,(25,25), 0)
ret, f3Img = cv2.threshold(blurFImg,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
cv2.imshow('f3Img', f3Img )
'''
#OCR Stage:
'''
# write the grayscale image to disk as a temporary file so we can
# apply OCR to it
filename = "{}.png".format(os.getpid())
cv2.imwrite(filename, threshImg)

# load the image as a PIL/Pillow image, apply OCR, and then delete
# the temporary file
text = pytesseract.image_to_string(Image.open(filename))
os.remove(filename)
print("\n" + text)
'''

cv2.waitKey(0)
cv2.destroyAllWindows()

也尝试了这个来源,但这似乎不起作用,而且理解起来不是很清楚:

https://www.danvk.org/2015/01/07/finding-blocks-of-text-in-an-image-using-python-opencv-and-numpy.html

最佳答案

我已经举了一个例子,也许可以让您了解如何进行。我在没有你对图像进行转换的情况下完成了它,但如果你愿意,你可以使用它们来完成。

我所做的是首先使用 cv2.THRESH_BINARY 将图像转换为二进制。接下来,我制作了一个 mask 并通过使用大小 (cv2.contourArea()) 和比率(从 cv2.boundingRect() 获取)限制它们来绘制轮廓作为阈值。然后我使用 cv2.morphologyEx() 和一个大内核大小 (50x50) 将所有彼此靠近的轮廓连接起来。

enter image description here

然后我选择了最大的轮廓(文本)并使用 cv2.minAreaRect() 绘制了一个旋转的矩形,这让我得到了旋转角度。

enter image description here

然后我可以使用 cv2.getRotationMatrix2D()cv2.warpAffine() 旋转图像,并使用最高的 X、Y 和最低的 X、Y 获得稍大的边界框我用来裁剪图像的旋转矩形的 X、Y 值。

enter image description here

然后我再次搜索轮廓并从图像中去除噪声(小轮廓),结果是具有高对比度的文本。

最终结果:

enter image description here

此代码仅用于提供一个想法或对问题的另一种观点,它可能不适用于其他图像(如果它们与原始图像差异太大)或者至少您必须调整代码的某些参数.希望能帮助到你。干杯!

代码:

import cv2
import numpy as np

# Read image and search for contours.
img = cv2.imread('rotatec.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, threshold = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)
contours, hierarchy = cv2.findContours(threshold,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)

# Create first mask used for rotation.
mask = np.ones(img.shape, np.uint8)*255

# Draw contours on the mask with size and ratio of borders for threshold.
for cnt in contours:
size = cv2.contourArea(cnt)
x,y,w,h = cv2.boundingRect(cnt)
if 10000 > size > 500 and w*2.5 > h:
cv2.drawContours(mask, [cnt], -1, (0,0,0), -1)

# Connect neighbour contours and select the biggest one (text).
kernel = np.ones((50,50),np.uint8)
opening = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
gray_op = cv2.cvtColor(opening, cv2.COLOR_BGR2GRAY)
_, threshold_op = cv2.threshold(gray_op, 150, 255, cv2.THRESH_BINARY_INV)
contours_op, hierarchy_op = cv2.findContours(threshold_op, cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
cnt = max(contours_op, key=cv2.contourArea)

# Create rotated rectangle to get the angle of rotation and the 4 points of the rectangle.
_, _, angle = rect = cv2.minAreaRect(cnt)
(h,w) = img.shape[:2]
(center) = (w//2,h//2)

# Rotate the image.
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(img, M, (int(w),int(h)), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT)

# Create bounding box for rotated text (use old points of rotated rectangle).
box = cv2.boxPoints(rect)
a, b, c, d = box = np.int0(box)
bound =[]
bound.append(a)
bound.append(b)
bound.append(c)
bound.append(d)
bound = np.array(bound)
(x1, y1) = (bound[:,0].min(), bound[:,1].min())
(x2, y2) = (bound[:,0].max(), bound[:,1].max())
cv2.drawContours(img,[box],0,(0,0,255),2)

# Crop the image and create new mask for the final image.
rotated = rotated[y1:y2, x1:x2]
mask_final = np.ones(rotated.shape, np.uint8)*255

# Remove noise from the final image.
gray_r = cv2.cvtColor(rotated, cv2.COLOR_BGR2GRAY)
_, threshold_r = cv2.threshold(gray_r, 150, 255, cv2.THRESH_BINARY_INV)
contours, hierarchy = cv2.findContours(threshold_r,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
for cnt in contours:
size = cv2.contourArea(cnt)
if size < 500:
cv2.drawContours(threshold_r, [cnt], -1, (0,0,0), -1)

# Invert black and white.
final_image = cv2.bitwise_not(threshold_r)

# Display results.
cv2.imshow('final', final_image)
cv2.imshow('rotated', rotated)

编辑:

对于文本识别,我建议您查看来自 SO Simple Digit Recognition OCR in OpenCV-Python 的这篇帖子.

使用上述帖子中的代码的结果:

enter image description here

enter image description here

编辑:

这是我的代码,是用上述帖子中的略微修改后的代码实现的。所有步骤都写在评论里。您应该将脚本和训练图像保存到同一目录。这是我的训练图像:

enter image description here

代码:

import cv2
import numpy as np

# Read image and search for contours.
img = cv2.imread('rotatec.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, threshold = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)
contours, hierarchy = cv2.findContours(threshold,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)

# Create first mask used for rotation.
mask = np.ones(img.shape, np.uint8)*255

# Draw contours on the mask with size and ratio of borders for threshold.
for cnt in contours:
size = cv2.contourArea(cnt)
x,y,w,h = cv2.boundingRect(cnt)
if 10000 > size > 500 and w*2.5 > h:
cv2.drawContours(mask, [cnt], -1, (0,0,0), -1)

# Connect neighbour contours and select the biggest one (text).
kernel = np.ones((50,50),np.uint8)
opening = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
gray_op = cv2.cvtColor(opening, cv2.COLOR_BGR2GRAY)
_, threshold_op = cv2.threshold(gray_op, 150, 255, cv2.THRESH_BINARY_INV)
contours_op, hierarchy_op = cv2.findContours(threshold_op, cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
cnt = max(contours_op, key=cv2.contourArea)

# Create rotated rectangle to get the angle of rotation and the 4 points of the rectangle.
_, _, angle = rect = cv2.minAreaRect(cnt)
(h,w) = img.shape[:2]
(center) = (w//2,h//2)

# Rotate the image.
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(img, M, (int(w),int(h)), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT)

# Create bounding box for rotated text (use old points of rotated rectangle).
box = cv2.boxPoints(rect)
a, b, c, d = box = np.int0(box)
bound =[]
bound.append(a)
bound.append(b)
bound.append(c)
bound.append(d)
bound = np.array(bound)
(x1, y1) = (bound[:,0].min(), bound[:,1].min())
(x2, y2) = (bound[:,0].max(), bound[:,1].max())
cv2.drawContours(img,[box],0,(0,0,255),2)

# Crop the image and create new mask for the final image.
rotated = rotated[y1:y2, x1-10:x2]
mask_final = np.ones(rotated.shape, np.uint8)*255

# Remove noise from the final image.
gray_r = cv2.cvtColor(rotated, cv2.COLOR_BGR2GRAY)
_, threshold_r = cv2.threshold(gray_r, 150, 255, cv2.THRESH_BINARY_INV)
contours, hierarchy = cv2.findContours(threshold_r,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
for cnt in contours:
size = cv2.contourArea(cnt)
if size < 500:
cv2.drawContours(threshold_r, [cnt], -1, (0,0,0), -1)

# Invert black and white.
final_image = cv2.bitwise_not(threshold_r)

# Display results.
cv2.imwrite('rotated12.png', final_image)

# Import module for finding path to database.
from pathlib import Path

# This code executes once amd writes two files.
# If file exists it skips this step, else it runs again.
file = Path("generalresponses.data")
if file.is_file() == False:

# Reading the training image
im = cv2.imread('pitrain1.png')
im3 = im.copy()
gray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray,(5,5),0)
thresh = cv2.adaptiveThreshold(blur,255,1,1,11,2)

# Finding contour

_,contours,hierarchy = cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)

# Creates array and list for appending data
samples = np.empty((0,100))
responses = []

# Value serving to increment the "automatic" learning
i = 0

# Iterating through contours and appending the array and list with "learned" values
for cnt in contours:
i+=1
[x,y,w,h] = cv2.boundingRect(cnt)
cv2.rectangle(im,(x,y),(x+w,y+h),(0,0,255),2)
roi = thresh[y:y+h,x:x+w] # Croping ROI to bounding rectangle
roismall = cv2.resize(roi,(10,10)) # Resizing ROI to smaller image
cv2.imshow('norm',im)

# Appending values based on the pitrain1.png image
if i < 36:
responses.append(int(45))
elif 35 < i < 80:
responses.append(int(48))
elif 79 < i < 125:
responses.append(int(57))
elif 124 < i < 160:
responses.append(int(56))
elif 159 < i < 205:
responses.append(int(55))
elif 204 < i < 250:
responses.append(int(54))
elif 249 < i < 295:
responses.append(int(53))
elif 294 < i < 340:
responses.append(int(52))
elif 339 < i < 385:
responses.append(int(51))
elif 384 < i < 430:
responses.append(int(50))
elif 429 < i < 485:
responses.append(int(49))
else:
break
sample = roismall.reshape((1,100))
samples = np.append(samples,sample,0)

# Reshaping and saving database
responses = np.array(responses)
responses = responses.reshape((responses.size,1))
print('end')
np.savetxt('generalsamples.data',samples)
np.savetxt('generalresponses.data',responses, fmt='%s')

################### Recognition ########################

# Dictionary for numbers and characters (in this sample code the only
# character is " - ")
number = {
48 : "0",
53 : "5",
52 : "4",
50 : "2",
45 : "-",
55 : "7",
51 : "3",
57 : "9",
56 : "8",
54 : "6",
49 : "1"
}


####### training part ###############
samples = np.loadtxt('generalsamples.data',np.float32)
responses = np.loadtxt('generalresponses.data',np.float32)
responses = responses.reshape((responses.size,1))

model = cv2.ml.KNearest_create()
model.train(samples,cv2.ml.ROW_SAMPLE,responses)

############################# testing part #########################

im = cv2.imread('rotated12.png')
out = np.zeros(im.shape,np.uint8)
gray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
thresh = cv2.adaptiveThreshold(gray,255,1,1,11,2)

contours,hierarchy = cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)

for cnt in contours:
[x,y,w,h] = cv2.boundingRect(cnt)
cv2.rectangle(im,(x,y),(x+w,y+h),(0,255,0),2)
roi = thresh[y:y+h,x:x+w]
roismall = cv2.resize(roi,(10,10))
roismall = roismall.reshape((1,100))
roismall = np.float32(roismall)
retval, results, neigh_resp, dists = model.findNearest(roismall,k=5)
string = int((results[0][0]))
string2 = number.get(string)
print(string2)
cv2.putText(out,str(string2),(x,y+h),0,1,(0,255,0))

cv2.imshow('im',im)
cv2.imshow('out',out)
cv2.waitKey(0)
cv2.destroyAllWindows()

结果:

enter image description here

enter image description here

关于python - 如何在openCV,python中找到旋转和裁剪一段文本,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/51699779/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com