gpt4 book ai didi

python - 如何使用 Python 增强图像中的文本检测

转载 作者:太空宇宙 更新时间:2023-11-03 21:07:42 24 4
gpt4 key购买 nike

我尝试使用 OpenCV Python 检测图像中的文本,特别是带有引号的图像。为此,我首先训练一些文本图像。我检测图像中文本的每个字符进行训练。对于具有正确文字样式的图像,可以正确检测字符。但是对于某些图像,无法正确检测到文本(字符)区域。我在下面附上了代码。如何修改代码以便正确检测字符

import sys
import numpy as np
import cv2
import os

MIN_CONTOUR_AREA = 100

RESIZED_IMAGE_WIDTH = 20
RESIZED_IMAGE_HEIGHT = 30

def main():
imgTrainingNumbers = cv2.imread("E:\God - Level 4 Research Project\Testings\Tharu\godd/jbpoetry.png")

if imgTrainingNumbers is None:
print ("error: image not read from file \n\n")
os.system("pause")
return

imgGray = cv2.cvtColor(imgTrainingNumbers, cv2.COLOR_BGR2GRAY)
imgBlurred = cv2.GaussianBlur(imgGray, (5,5), 0)

imgThresh = cv2.adaptiveThreshold(imgBlurred,
255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV,
11,
2)

cv2.imshow("imgThresh", imgThresh)

imgThreshCopy = imgThresh.copy()

imgContours, npaContours, npaHierarchy = cv2.findContours(imgThreshCopy,
cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)


npaFlattenedImages = np.empty((0, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT))

intClassifications = []

intValidChars = [ord('0'), ord('1'), ord('2'), ord('3'), ord('4'), ord('5'), ord('6'), ord('7'), ord('8'), ord('9'),
ord('A'), ord('B'), ord('C'), ord('D'), ord('E'), ord('F'), ord('G'), ord('H'), ord('I'), ord('J'),
ord('K'), ord('L'), ord('M'), ord('N'), ord('O'), ord('P'), ord('Q'), ord('R'), ord('S'), ord('T'),
ord('U'), ord('V'), ord('W'), ord('X'), ord('Y'), ord('Z'),ord('a'),ord('b'),ord('c'),ord('d'),
ord('e'),ord('f'),ord('g'),ord('h'),ord('i'),ord('j'),ord('k'),ord('l'),ord('m'),ord('n'),ord('o'),
ord('p'),ord('q'),ord('r'),ord('s'),ord('t'),ord('u'),ord('v'),ord('w'),ord('x'),ord('y'),ord('z') ]


for npaContour in npaContours:
if cv2.contourArea(npaContour) > MIN_CONTOUR_AREA:
[intX, intY, intW, intH] = cv2.boundingRect(npaContour)


cv2.rectangle(imgTrainingNumbers,
(intX, intY),
(intX+intW,intY+intH),
(0, 0, 255),
2)

imgROI = imgThresh[intY:intY+intH, intX:intX+intW]
imgROIResized = cv2.resize(imgROI, (RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT))

cv2.imshow("imgROI", imgROI)
cv2.imshow("imgROIResized", imgROIResized)
cv2.imshow("training_numbers.png", imgTrainingNumbers)

intChar = cv2.waitKey(0)

if intChar == 27:
sys.exit()
elif intChar in intValidChars:
print(intChar)
intClassifications.append(intChar)
print(intChar)
npaFlattenedImage = imgROIResized.reshape((1, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT))
npaFlattenedImages = np.append(npaFlattenedImages, npaFlattenedImage, 0)

fltClassifications = np.array(intClassifications, np.float32)

npaClassifications = fltClassifications.reshape((fltClassifications.size, 1))

print ("\n\ntraining complete !!\n")

np.savetxt("classificationsNEWG.txt", npaClassifications)
np.savetxt("flattened_imagesNEWG.txt", npaFlattenedImages)
cv2.destroyAllWindows()
return
if __name__ == "__main__":
main()

Inaccurate text detection

最佳答案

您尝试做的是一种非常幼稚的方法,仅应用阈值和检测轮廓在这里是行不通的。围绕这项任务发表了许多研究论文。您可以引用那些并尝试实现或可以使用著名的 tesseract OCRimage_to_boxes 功能。您可以从here下载。当您使用 python 时,您可以从 here 安装 pytesseract - tesseract 的 python 包装器并使用以下代码实现您的预​​期。

import pytesseract
import cv2

originalImg = cv2.imread('tp.png')
originalImg = cv2.resize(originalImg, None, fx=2.5, fy=2.5)
img = cv2.cvtColor(originalImg, cv2.COLOR_BGR2GRAY)
_,img = cv2.threshold(img,100,255,cv2.THRESH_BINARY)

h, w = img.shape

letters = pytesseract.image_to_boxes(img)
letters = letters.split('\n')
letters = [letter.split() for letter in letters]

for letter in letters:
cv2.rectangle(originalImg, (int(letter[1]), h - int(letter[2])), (int(letter[3]), h - int(letter[4])), (0,0,255), 1)

cv2.imshow('', originalImg)

生成的图像

enter image description here

请注意,有很多错误检测,您需要在训练过程中忽略它们。

关于python - 如何使用 Python 增强图像中的文本检测,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/49777351/

24 4 0
文章推荐: python - 如何使用animation.FuncAnimation仅绘制当前帧?
文章推荐: c# - Linq 代码无法正常工作
文章推荐: python - gridspec 在参数中不使用 ":"?
文章推荐: c# - List 到层次结构