gpt4 book ai didi

python - 从表格图像中提取单个字段以使用OCR精益求精

转载 作者:行者123 更新时间:2023-12-01 04:24:11 25 4
gpt4 key购买 nike

我已经扫描了具有该图像所示表格的图像:

scanned image with handwritten digits and printed information

我试图分别提取每个框并执行OCR,但是当我尝试检测水平和垂直线然后检测框时,它返回以下图像:

enter image description here

当我尝试执行其他转换以检测文本(侵 eclipse 和膨胀)时,仍然有一些行残留在文本中,如下所示:

dilated text and lines

我无法检测到仅执行OCR的文本,并且未生成如下所示的正确边界框:

Image with detected boxes

我无法使用实线获得清晰分隔的框,我已经在用paint编辑的图像上尝试过此操作(如下所示)以添加数字,并且它可以正常工作。

enter image description here

我不知道我做错了哪一部分,但是如果有什么需要尝试的地方,或者可能要更改/添加我的问题,请告诉我。

#Loading all required libraries 
%pylab inline
import cv2
import numpy as np
import pandas as pd
import pytesseract
import matplotlib.pyplot as plt
import statistics
from time import sleep
import random

img = cv2.imread('images/scan1.jpg',0)

# for adding border to an image
img1= cv2.copyMakeBorder(img,50,50,50,50,cv2.BORDER_CONSTANT,value=[255,255])

# Thresholding the image
(thresh, th3) = cv2.threshold(img1, 255, 255,cv2.THRESH_BINARY|cv2.THRESH_OTSU)

# to flip image pixel values
th3 = 255-th3

# initialize kernels for table boundaries detections
if(th3.shape[0]<1000):
ver = np.array([[1],
[1],
[1],
[1],
[1],
[1],
[1]])
hor = np.array([[1,1,1,1,1,1]])

else:
ver = np.array([[1],
[1],
[1],
[1],
[1],
[1],
[1],
[1],
[1],
[1],
[1],
[1],
[1],
[1],
[1],
[1],
[1],
[1],
[1]])
hor = np.array([[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]])




# to detect vertical lines of table borders
img_temp1 = cv2.erode(th3, ver, iterations=3)
verticle_lines_img = cv2.dilate(img_temp1, ver, iterations=3)

# to detect horizontal lines of table borders
img_hor = cv2.erode(th3, hor, iterations=3)
hor_lines_img = cv2.dilate(img_hor, hor, iterations=4)

# adding horizontal and vertical lines
hor_ver = cv2.add(hor_lines_img,verticle_lines_img)

hor_ver = 255-hor_ver

# subtracting table borders from image
temp = cv2.subtract(th3,hor_ver)

temp = 255-temp

#Doing xor operation for erasing table boundaries
tt = cv2.bitwise_xor(img1,temp)

iii = cv2.bitwise_not(tt)

tt1=iii.copy()

#kernel initialization
ver1 = np.array([[1,1],
[1,1],
[1,1],
[1,1],
[1,1],
[1,1],
[1,1],
[1,1],
[1,1]])
hor1 = np.array([[1,1,1,1,1,1,1,1,1,1],
[1,1,1,1,1,1,1,1,1,1]])

#morphological operation
temp1 = cv2.erode(tt1, ver1, iterations=2)
verticle_lines_img1 = cv2.dilate(temp1, ver1, iterations=1)

temp12 = cv2.erode(tt1, hor1, iterations=1)
hor_lines_img2 = cv2.dilate(temp12, hor1, iterations=1)

# doing or operation for detecting only text part and removing rest all
hor_ver = cv2.add(hor_lines_img2,verticle_lines_img1)
dim1 = (hor_ver.shape[1],hor_ver.shape[0])
dim = (hor_ver.shape[1]*2,hor_ver.shape[0]*2)

# resizing image to its double size to increase the text size
resized = cv2.resize(hor_ver, dim, interpolation = cv2.INTER_AREA)

#bitwise not operation for fliping the pixel values so as to apply morphological operation such as dilation and erode
want = cv2.bitwise_not(resized)

if(want.shape[0]<1000):
kernel1 = np.array([[1,1,1]])
kernel2 = np.array([[1,1],
[1,1]])
kernel3 = np.array([[1,0,1],[0,1,0],
[1,0,1]])
else:
kernel1 = np.array([[1,1,1,1,1,1]])
kernel2 = np.array([[1,1,1,1,1],
[1,1,1,1,1],
[1,1,1,1,1],
[1,1,1,1,1]])

tt1 = cv2.dilate(want,kernel1,iterations=2)

# getting image back to its original size
resized1 = cv2.resize(tt1, dim1, interpolation = cv2.INTER_AREA)

# Find contours for image, which will detect all the boxes
contours1, hierarchy1 = cv2.findContours(resized1, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

#function to sort contours by its x-axis (top to bottom)
def sort_contours(cnts, method="left-to-right"):
# initialize the reverse flag and sort index
reverse = False
i = 0

# handle if we need to sort in reverse
if method == "right-to-left" or method == "bottom-to-top":
reverse = True

# handle if we are sorting against the y-coordinate rather than
# the x-coordinate of the bounding box
if method == "top-to-bottom" or method == "bottom-to-top":
i = 1

# construct the list of bounding boxes and sort them from top to
# bottom
boundingBoxes = [cv2.boundingRect(c) for c in cnts]
(cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
key=lambda b:b[1][i], reverse=reverse))

# return the list of sorted contours and bounding boxes
return (cnts, boundingBoxes)


#sorting contours by calling fuction
(cnts, boundingBoxes) = sort_contours(contours1, method="top-to-bottom")

#storing value of all bouding box height
heightlist=[]
for i in range(len(boundingBoxes)):
heightlist.append(boundingBoxes[i][3])

#sorting height values
heightlist.sort()

sportion = int(.5*len(heightlist))
eportion = int(0.05*len(heightlist))

#taking 50% to 95% values of heights and calculate their mean
#this will neglect small bounding box which are basically noise
try:
medianheight = statistics.mean(heightlist[-sportion:-eportion])
except:
medianheight = statistics.mean(heightlist[-sportion:-2])

#keeping bounding box which are having height more then 70% of the mean height and deleting all those value where
# ratio of width to height is less then 0.9
box =[]
imag = iii.copy()
for i in range(len(cnts)):
cnt = cnts[i]
x,y,w,h = cv2.boundingRect(cnt)
if(h>=.7*medianheight and w/h > 0.9):
image = cv2.rectangle(imag,(x+4,y-2),(x+w-5,y+h),(0,255,0),1)
box.append([x,y,w,h])
# to show image

###Now we have badly detected boxes image as shown

最佳答案

您走在正确的轨道上。这是您的方法的继续,但进行了一些细微的修改。这个想法是:

  • 获取二进制图像。 加载图像,转换为灰度,以及Otsu的阈值。
  • 删除所有字 rune 本轮廓。 我们创建一个矩形内核,并执行打开操作以仅保留水平/垂直线。这将有效地使文本变成很小的噪音,因此我们找到轮廓并使用轮廓区域进行过滤以将其删除。
  • 修复水平/垂直线并提取每个ROI。 我们将顶点变形以修正和折断线并平滑表格。从这里开始,我们使用带有imutils.sort_contours()参数的top-to-bottom对box字段轮廓进行排序。接下来,我们找到轮廓并使用轮廓区域进行过滤,然后提取每个ROI。


  • 这是每个框字段和提取的ROI的可视化

    enter image description here

    代码
    import cv2
    import numpy as np
    from imutils import contours

    # Load image, grayscale, Otsu's threshold
    image = cv2.imread('1.jpg')
    original = image.copy()
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

    # Remove text characters with morph open and contour filtering
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
    opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
    cnts = cv2.findContours(opening, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    for c in cnts:
    area = cv2.contourArea(c)
    if area < 500:
    cv2.drawContours(opening, [c], -1, (0,0,0), -1)

    # Repair table lines, sort contours, and extract ROI
    close = 255 - cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel, iterations=1)
    cnts = cv2.findContours(close, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    (cnts, _) = contours.sort_contours(cnts, method="top-to-bottom")
    for c in cnts:
    area = cv2.contourArea(c)
    if area < 25000:
    x,y,w,h = cv2.boundingRect(c)
    cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), -1)
    ROI = original[y:y+h, x:x+w]

    # Visualization
    cv2.imshow('image', image)
    cv2.imshow('ROI', ROI)
    cv2.waitKey(20)

    cv2.imshow('opening', opening)
    cv2.imshow('close', close)
    cv2.imshow('image', image)
    cv2.waitKey()

    关于python - 从表格图像中提取单个字段以使用OCR精益求精,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/59580304/

    25 4 0
    Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
    广告合作:1813099741@qq.com 6ren.com