gpt4 book ai didi

python - 使用 python open-cv 分割报纸文章

转载 作者:行者123 更新时间:2023-12-02 15:16:31 25 4
gpt4 key购买 nike

我正在使用下面的代码从报纸图像中分割文章。

def segmenter(image_received):
# Process 1: Lines Detection

img = image_received
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # convert to binary gray image
edges = cv2.Canny(gray, 75, 150) # determine contours
lines = cv2.HoughLinesP(edges, 0.017, np.pi / 180, 60, minLineLength=100, maxLineGap=0.1) # houghlines generation

# drawing houghlines
for line in lines:
x1, y1, x2, y2 = line[0]
cv2.line(img, (x1, y1), (x2, y2), (0, 0, 128), 12) # the houghlines of color (0,0,128) is drawn

# Drawing brown border
bold = cv2.copyMakeBorder(
img, # image source
5, # top width
5, # bottomm width
5, # left width
5, # right width
cv2.BORDER_CONSTANT,
value=(0, 0, 128) # brown color value
)

image = bold
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
if int(len(c) >= 10):
cv2.drawContours(image, [c], 0, (0, 17, 255), 1)

vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
if int(len(c) >= 10):
cv2.drawContours(image, [c], 0, (0, 17, 255), 1)

cv2.imwrite(f'tmp/{str(str(uuid.uuid4()))}.jpg', image)

例如
输入图像是
enter image description here
输出图像是:
enter image description here
存在三个问题:
  • 输出矩形并非在所有情况下都是完整的。
  • 图像也作为文章的一部分在文章内进行分割。但我需要的是仅分割报纸的文本并裁剪所有其他内容。像这样的东西:
    enter image description here
  • 考虑下图:
    enter image description here
    边框表示的文章不是矩形的,要复杂得多。如何使用 python open-cv 或其他图像处理库实现正确的边框?

  • (问题有一个答案 here for matlab 。但我需要一个 python 代码。

    最佳答案

    这是我的管道。
    我认为可以优化。
    初始化

    %matplotlib inline
    import numpy as np
    import cv2
    from matplotlib import pyplot as plt
    加载图片
    image_file_name = 'paper.jpg' 
    image = cv2.imread(image_file_name)

    # gray convertion
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    第一件重要的事情是去除线条。所以我搜索线路。
    grad_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
    grad_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)

    abs_grad_x = cv2.convertScaleAbs(grad_x)
    abs_grad_y = cv2.convertScaleAbs(grad_y)

    # threshold
    thresh_x = cv2.threshold(abs_grad_x, 0, 255, cv2.THRESH_OTSU)[1]
    thresh_y = cv2.threshold(abs_grad_y, 0, 255, cv2.THRESH_OTSU)[1]

    # bluring
    kernel_size = 3
    blur_thresh_x = cv2.GaussianBlur(thresh_x,(kernel_size, kernel_size),0)
    blur_thresh_y = cv2.GaussianBlur(thresh_y,(kernel_size, kernel_size),0)

    # Run Hough on edge detected image

    rho = 1 # distance resolution in pixels of the Hough grid
    theta = np.pi / 180 # angular resolution in radians of the Hough grid
    threshold = 15 # minimum number of votes (intersections in Hough grid cell)
    min_line_length = 200 # minimum number of pixels making up a line
    max_line_gap = 1 # maximum gap in pixels between connectable line segments
    line_image = np.copy(gray) * 0 # creating a blank to draw lines on

    # Vertical lines
    vertical_lines = cv2.HoughLinesP(blur_thresh_x, rho, theta, threshold, np.array([]), min_line_length, max_line_gap)

    if vertical_lines is not None:
    for line in vertical_lines:
    for x1,y1,x2,y2 in line:
    # here it's possible to add a selection of only vertical lines
    if np.abs(y1-y2)> 0.1 * np.abs(x1-x2):
    cv2.line(line_image,(x1,y1),(x2,y2),255,5)

    # Horizontal lines
    horizontal_lines = cv2.HoughLinesP(blur_thresh_y, rho, theta, threshold, np.array([]), min_line_length, max_line_gap)

    if horizontal_lines is not None:
    for line in horizontal_lines:
    for x1,y1,x2,y2 in line:
    # here it's possible to add a selection of only horizontal lines
    if np.abs(x1-x2)> 0.1 * np.abs(y1-y2):
    cv2.line(line_image,(x1,y1),(x2,y2),255,5)
    line_image
    从阈值中删除线条后
    # threshold
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

    # remove lines
    clean_thresh = cv2.subtract(thresh, line_image)
    clean threshold
    然后我搜索短语
    # search the phrases
    dilatation_type = cv2.MORPH_RECT
    horizontal_dilatation = 20 #This is the gap. 20 for the first image, 10 for the second image
    vertical_dilatation = 1
    element = cv2.getStructuringElement(dilatation_type, (2*horizontal_dilatation + 1, 2*vertical_dilatation+1), (horizontal_dilatation, vertical_dilatation))
    dilatation_thresh = cv2.dilate(clean_thresh, element)

    # Fill
    filled_tresh = dilatation_thresh.copy()
    contours, hierarchy = cv2.findContours(dilatation_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

    for cnt in contours:
    cv2.drawContours(filled_tresh, [cnt], -1, 255, cv2.FILLED)
    filled threshold
    现在我检测到边界框
    # Draw bounding boxes
    bounding_box1 = filled_tresh.copy()
    contours, hierarchy = cv2.findContours(bounding_box1, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    for cnt in contours:
    x,y,w,h = cv2.boundingRect(cnt)
    cv2.rectangle(bounding_box1,(x,y),(x+w,y+h),255,cv2.FILLED)
    bounding box 1
    # REPEAT Draw bounding boxes and Find the mean text width
    mean_bb_width = 0 # mean bounding box width

    bounding_box2 = bounding_box1.copy()

    contours, hierarchy = cv2.findContours(bounding_box2, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    num_cnt=0
    for cnt in contours:
    x,y,w,h = cv2.boundingRect(cnt)
    cv2.rectangle(bounding_box2,(x,y),(x+w,y+h),255,cv2.FILLED)
    mean_bb_width = mean_bb_width+w
    num_cnt=num_cnt+1

    mean_bb_width=mean_bb_width/num_cnt
    bounding box 2
    现在我将标题与文本分开
    # define title what has width bigger than 1.5* mean_width 
    min_title_width = 1.5 * mean_bb_width

    raw_title = np.copy(gray) * 0
    raw_text = np.copy(gray) * 0

    # separate titles from phrases
    contours, hierarchy = cv2.findContours(bounding_box2, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    for cnt in contours:
    x,y,w,h = cv2.boundingRect(cnt)
    if w >=min_title_width :
    cv2.drawContours(raw_title, [cnt], -1, 255, cv2.FILLED)
    else :
    cv2.drawContours(raw_text, [cnt], -1, 255, cv2.FILLED)
    separation title text
    然后最后的处理
    image_out = image.copy()

    # Closing parameters
    horizontal_closing = 1
    vertical_closing = 20
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(horizontal_closing,vertical_closing))

    # Processing titles
    # Closing
    closing_title = cv2.morphologyEx(raw_title, cv2.MORPH_CLOSE, kernel)
    # Find contours
    contours, hierarchy = cv2.findContours(closing_title, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    # Draw bounding boxes
    bounding_title = closing_title.copy()
    for cnt in contours:
    x,y,w,h = cv2.boundingRect(cnt)
    cv2.rectangle(image_out,(x,y),(x+w,y+h),(255,0,0),2)

    # Processing text
    # Closing
    closing_text = cv2.morphologyEx(raw_text, cv2.MORPH_CLOSE, kernel)
    # Find contours
    contours, hierarchy = cv2.findContours(closing_text , cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    # Draw bounding boxes
    bounding_text = closing_text.copy()
    for cnt in contours:
    x,y,w,h = cv2.boundingRect(cnt)
    cv2.rectangle(image_out,(x,y),(x+w,y+h),(0,255,0),2)
    结果是
    image out
    将参数水平膨胀从 20 更改为 10,我获得了第二张图像(我删除了您添加的红色边框)以下结果
    image out 2

    关于python - 使用 python open-cv 分割报纸文章,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/64241837/

    25 4 0
    Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
    广告合作:1813099741@qq.com 6ren.com