python - 使用 python open-cv 分割报纸文章-6ren

python - 使用 python open-cv 分割报纸文章

转载作者：行者123 更新时间：2023-12-02 15:16:31

我正在使用下面的代码从报纸图像中分割文章。

def segmenter(image_received):
    # Process 1: Lines Detection

    img = image_received
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # convert to binary gray image
    edges = cv2.Canny(gray, 75, 150)  # determine contours
    lines = cv2.HoughLinesP(edges, 0.017, np.pi / 180, 60, minLineLength=100, maxLineGap=0.1)  # houghlines generation

    # drawing houghlines
    for line in lines:
        x1, y1, x2, y2 = line[0]
        cv2.line(img, (x1, y1), (x2, y2), (0, 0, 128), 12)  # the houghlines of color (0,0,128) is drawn

    # Drawing brown border
    bold = cv2.copyMakeBorder(
        img,  # image source
        5,  # top width
        5,  # bottomm width
        5,  # left width
        5,  # right width
        cv2.BORDER_CONSTANT,
        value=(0, 0, 128)  # brown color value
    )

    image = bold
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

    horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
    detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
    cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    for c in cnts:
        if int(len(c) >= 10):
            cv2.drawContours(image, [c], 0, (0, 17, 255), 1)

    vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
    detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
    cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    for c in cnts:
        if int(len(c) >= 10):
            cv2.drawContours(image, [c], 0, (0, 17, 255), 1)

    cv2.imwrite(f'tmp/{str(str(uuid.uuid4()))}.jpg', image)

例如
输入图像是

输出图像是:

存在三个问题:

输出矩形并非在所有情况下都是完整的。

图像也作为文章的一部分在文章内进行分割。但我需要的是仅分割报纸的文本并裁剪所有其他内容。像这样的东西:

考虑下图:

边框表示的文章不是矩形的，要复杂得多。如何使用 python open-cv 或其他图像处理库实现正确的边框？

(问题有一个答案 here for matlab 。但我需要一个 python 代码。

最佳答案

这是我的管道。
我认为可以优化。
初始化

%matplotlib inline
import numpy as np
import cv2
from matplotlib import pyplot as plt

加载图片

image_file_name = 'paper.jpg' 
image = cv2.imread(image_file_name)

# gray convertion
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

第一件重要的事情是去除线条。所以我搜索线路。

grad_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
grad_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)

abs_grad_x = cv2.convertScaleAbs(grad_x)
abs_grad_y = cv2.convertScaleAbs(grad_y)

# threshold
thresh_x = cv2.threshold(abs_grad_x, 0, 255,  cv2.THRESH_OTSU)[1]
thresh_y = cv2.threshold(abs_grad_y, 0, 255,  cv2.THRESH_OTSU)[1]

# bluring 
kernel_size = 3
blur_thresh_x = cv2.GaussianBlur(thresh_x,(kernel_size, kernel_size),0)
blur_thresh_y = cv2.GaussianBlur(thresh_y,(kernel_size, kernel_size),0)

# Run Hough on edge detected image

rho = 1  # distance resolution in pixels of the Hough grid   
theta = np.pi / 180  # angular resolution in radians of the Hough grid
threshold = 15  # minimum number of votes (intersections in Hough grid cell)  
min_line_length = 200  # minimum number of pixels making up a line   
max_line_gap = 1  # maximum gap in pixels between connectable line segments   
line_image = np.copy(gray) * 0  # creating a blank to draw lines on

# Vertical lines
vertical_lines = cv2.HoughLinesP(blur_thresh_x, rho, theta, threshold, np.array([]), min_line_length, max_line_gap)

if vertical_lines is not None:
    for line in vertical_lines:
        for x1,y1,x2,y2 in line:
            # here it's possible to add a selection of only vertical lines
            if np.abs(y1-y2)> 0.1 * np.abs(x1-x2):
                cv2.line(line_image,(x1,y1),(x2,y2),255,5)

# Horizontal lines
horizontal_lines = cv2.HoughLinesP(blur_thresh_y, rho, theta, threshold, np.array([]), min_line_length, max_line_gap)

if horizontal_lines is not None:
    for line in horizontal_lines:
        for x1,y1,x2,y2 in line:
            # here it's possible to add a selection of only horizontal lines
            if np.abs(x1-x2)> 0.1 * np.abs(y1-y2):
                cv2.line(line_image,(x1,y1),(x2,y2),255,5)

从阈值中删除线条后

# threshold
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

# remove lines
clean_thresh = cv2.subtract(thresh, line_image)

然后我搜索短语

# search the phrases
dilatation_type = cv2.MORPH_RECT
horizontal_dilatation = 20 #This is the gap. 20 for the first image, 10 for the second image
vertical_dilatation = 1
element = cv2.getStructuringElement(dilatation_type, (2*horizontal_dilatation + 1, 2*vertical_dilatation+1), (horizontal_dilatation, vertical_dilatation))
dilatation_thresh = cv2.dilate(clean_thresh, element)

# Fill
filled_tresh = dilatation_thresh.copy()
contours, hierarchy = cv2.findContours(dilatation_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

for cnt in contours:
    cv2.drawContours(filled_tresh, [cnt], -1, 255, cv2.FILLED)

现在我检测到边界框

# Draw bounding boxes
bounding_box1 = filled_tresh.copy()
contours, hierarchy = cv2.findContours(bounding_box1, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

for cnt in contours:
    x,y,w,h = cv2.boundingRect(cnt)
    cv2.rectangle(bounding_box1,(x,y),(x+w,y+h),255,cv2.FILLED)

# REPEAT Draw bounding boxes and Find the mean text width
mean_bb_width = 0 # mean bounding box width

bounding_box2 = bounding_box1.copy()

contours, hierarchy = cv2.findContours(bounding_box2, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

num_cnt=0
for cnt in contours:
    x,y,w,h = cv2.boundingRect(cnt)
    cv2.rectangle(bounding_box2,(x,y),(x+w,y+h),255,cv2.FILLED)
    mean_bb_width = mean_bb_width+w
    num_cnt=num_cnt+1
    
mean_bb_width=mean_bb_width/num_cnt

现在我将标题与文本分开

# define title what has width bigger than 1.5* mean_width 
min_title_width = 1.5 * mean_bb_width

raw_title = np.copy(gray) * 0  
raw_text = np.copy(gray) * 0  

# separate titles from phrases
contours, hierarchy = cv2.findContours(bounding_box2, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

for cnt in contours:
    x,y,w,h = cv2.boundingRect(cnt)
    if w >=min_title_width :
        cv2.drawContours(raw_title, [cnt], -1, 255, cv2.FILLED)
    else :
        cv2.drawContours(raw_text, [cnt], -1, 255, cv2.FILLED)

然后最后的处理

image_out = image.copy()

# Closing parameters
horizontal_closing = 1 
vertical_closing = 20
kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(horizontal_closing,vertical_closing))

# Processing titles
# Closing
closing_title = cv2.morphologyEx(raw_title, cv2.MORPH_CLOSE, kernel)
# Find contours
contours, hierarchy = cv2.findContours(closing_title, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Draw bounding boxes
bounding_title = closing_title.copy()
for cnt in contours:
    x,y,w,h = cv2.boundingRect(cnt)
    cv2.rectangle(image_out,(x,y),(x+w,y+h),(255,0,0),2)

# Processing text
# Closing
closing_text = cv2.morphologyEx(raw_text, cv2.MORPH_CLOSE, kernel)
# Find contours
contours, hierarchy = cv2.findContours(closing_text , cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Draw bounding boxes
bounding_text = closing_text.copy()
for cnt in contours:
    x,y,w,h = cv2.boundingRect(cnt)
    cv2.rectangle(image_out,(x,y),(x+w,y+h),(0,255,0),2)

结果是

将参数水平膨胀从 20 更改为 10，我获得了第二张图像(我删除了您添加的红色边框)以下结果

关于python - 使用 python open-cv 分割报纸文章，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/64241837/

文章推荐： vba - 在现有 If block 中添加新的 If 语句

文章推荐： sql - 将字符串转换为日期和时间

文章推荐： geocoding - 有人知道进行离线反向地理编码的好方法吗？

文章推荐： r - 在 R 的 data.table 中添加一个空列表作为值

c - OpenAL/OpenAL 软件
OpenAL.org && 创意开发网站已关闭。我选择替代版本 OpenAL Soft .我很担心，因为在 OpenAL Soft 的二进制安装中我找不到 alut.h header 。 alut.h
安卓工作室 : open file operation failed (The open file operation failed to connect to the open and save panel service. )
我使用 Android Studio 已经有一段时间了，但有一天应用程序突然出错了。当我尝试单击我的目录以查找要导入或打开的文件时，应用程序变得异常缓慢并且根本没有响应。当我最终成功切换到存储我的文件
javascript - Firefox 4 : Dynamically opened window opens new tabs in old window (i. e. window.opener)
自 Firefox 4 以来，这似乎是一个奇怪的功能变化。在使用 window.open() 打开一个窗口后，当用鼠标中键单击打开的窗口中的链接时(或右键单击并选择“在新窗口中打开”选项卡') 导致链
ruby - Open::URI.open 返回什么？
我无法从 Open::URI 的 rdoc 中得知当我这样做时返回的是什么: result = open(url) URL 返回 XML，但我如何查看/解析 XML？最佳答案 open 返回一个 I
rs.open sql,conn,1,1与rs.open sql,conn,1.3还有rs.open sql,conn,3,2区别
经常开发asp但对于细致的说法，真实不太清楚，这里简单的介绍下。一般情况下读取数据都是用rs.open sql,conn,1,1 修改数据:rs.open sql,conn,1,3 删除
python - open() 和 path.open() 的区别
关于 pathlib 标准库中的模块，是 path.open() 方法只是内置 open() 的“包装器”功能？最佳答案如果您阅读了 source code的 pathlib.Path.open你
open-liberty - 如何更改 Open Liberty 运行时语言？
我想将 Open Liberty 运行时的语言更改为 en_US从 Eclipse IDE 中，但我不知道如何。也尝试使用 JVM 参数的首选项来设置它，但它没有用。 -Duser.language
python - PyCharm 认为函数 "open"没有参数 "opener"
这是我所拥有的: 参数“opener”未在可能的函数调用参数中列出。这是 PyCharm 错误还是其他原因？ PyCharm 2018.3.5 社区版，Windows 7 上的 Python 3.6.
java - Java中使用Neo4jGraph.open()或GraphFactory.open()访问远程Neo4j数据库
我正在使用 Tinkerpop 的 GraphFactory.open(Configuration 配置) Java 命令来访问 Neo4j 数据库。一个最低限度的工作示例是: Configurat
Python， 'open' 和 'with open' 之间的区别
这个问题在这里已经有了答案: What is the python "with" statement designed for? (11 个答案) 关闭 7 年前。我没有使用过 with 语句，但
python - 内置 open() 函数中 opener 参数的用途是什么？
我正在玩 python 3.5 中的 open 函数。我不明白 opener 参数(最后一个参数)在 open 函数中的用法。根据 python 文档:可以通过将可调用对象作为打开器传递来使用自定义打
linux - |删除 : can't open | grep: can't open grep
关闭。此题需要details or clarity 。目前不接受答案。想要改进这个问题吗？通过 editing this post 添加详细信息并澄清问题. 已关闭 5 年前。 Improve th
python - OpenAL Python openal.audio 模块未找到
我试图用 Python 来做一些模拟 3D 声音的工作。我试图运行此代码(答案中提供):Python openAL 3D sound类似，两次都收到: ModuleNotFoundError: No
python - 错误或功能 : open and io. open 不可互换
我一直认为 open 和 io.open 可以互换。显然不是，如果我相信这个片段: import ctypes, io class POINT(ctypes.Structure): _fie
python - os.open vs open，使用什么
这个问题在这里已经有了答案: What's the difference between io.open() and os.open() on Python? (7 个答案) 关闭 9 年前。我是
c# - ChannelFactory.Open VS IClientChannel.Open
我正在尝试更好地了解 WCF 的一些内部工作原理。我已经做了相当多的环顾四周，但我无法找到关于 ChannelFactory.Open() 与 IClientChannel.Open() 相比的明确解
python - 使用 "open()"与 "with open()"读取文件
这个问题在这里已经有了答案: What is the python "with" statement designed for? (11 个答案) 关闭 7 年前。我知道有很多关于在 python
adodb.recordset.open(rs.open)方法参数详解
CFSDN坚持开源创造价值，我们致力于搭建一个资源共享平台，让每一个IT人在这里找到属于你的精彩世界. 这篇CFSDN的博客文章adodb.recordset.open(rs.open)方法参数详解由
javascript - 使用 window.open 但阻止使用 window.opener
不久前我遇到了一个interesting security hole Link 看起来足够无害，但有一个漏洞，因为默认情况下，正在打开的页面允许打开的页面通过 window.opener 回调到它。有
javascript - 折叠列表 : Keep open only the active one open
这在我的应用程序上运行良好，但由于某种原因我无法让它在这里正常工作。无论如何，我的问题是，当我单击列表标题时，我想关闭之前打开的列表标题并仅保留事件的列表标题打开。目前它会打开我点击的所有内容，但也会

行者123

个人简介

我是一名优秀的程序员,十分优秀！

作者热门文章

滴滴打车优惠券免费领取

全站热门文章

首页

博学

6Ren·AI

商城

python - 使用 python open-cv 分割报纸文章