python - 在 cv2.resize() 之后寻找新的坐标-6ren

python - 在 cv2.resize() 之后寻找新的坐标

转载作者：太空宇宙更新时间：2023-11-03 22:39:09

我正在关注 text-detction-ctpn .该项目的作者 resize() 对图像进行计算。如果不调整图像大小，内存就会爆炸。

我设法用这个函数将调整大小的盒子转换回原来的盒子

def transform_boxes(boxes: np.ndarray, h, w, rh, rw):
    """
    Transform back the original coordinate
    :param boxes:
    :param h: height of the original
    :param w: width of the original
    :param rh: re-sized height
    :param rw: re-sized height
    :return:
    """
    z = np.copy(boxes)
    z[:, 0] = z[:, 0] / rh
    z[:, 2] = z[:, 2] / rh
    z[:, 4] = z[:, 4] / rh
    z[:, 6] = z[:, 6] / rh

    z[:, 1] = z[:, 1] / rw
    z[:, 3] = z[:, 3] / rw
    z[:, 5] = z[:, 5] / rw
    z[:, 7] = z[:, 7] / rw

    return z

变换误差在大坐标数上显着

更新:
撞头后。我决定把我的代码放在这里。希望更多的眼睛能帮助我发现错误。

# coding=utf-8
import os
import shutil
import sys
import time

import cv2
import numpy as np
import tensorflow as tf

from my_utils import draw_squares

sys.path.append(os.getcwd())
from nets import model_train as model
from utils.rpn_msr.proposal_layer import proposal_layer
from utils.text_connector.detectors import TextDetector

tf.app.flags.DEFINE_string('test_data_path', 'data/demo/', '')
tf.app.flags.DEFINE_string('output_path', 'data/res/', '')
tf.app.flags.DEFINE_string('gpu', '0', '')
tf.app.flags.DEFINE_string('checkpoint_path', 'checkpoints_mlt/', '')
FLAGS = tf.app.flags.FLAGS
from pprint import pprint


def transform_boxes(boxes: np.ndarray, im):
    """
    Transform back the original coordinate
    :param boxes:
    :param im: The original image
    :return:
    """
    z = np.copy(boxes)
    (height, width, colors) = im.shape
    new_h, new_w, img_size = get_new_wh(im)
    z[:, 0::2] = height * z[:, 0::2] / new_h
    z[:, 1::2] = width * z[:, 1::2] / new_w

    return z


def get_images():
    files = []
    exts = ['jpg', 'png', 'jpeg', 'JPG']
    for parent, dirnames, filenames in os.walk(FLAGS.test_data_path):
        for filename in filenames:
            for ext in exts:
                if filename.endswith(ext):
                    files.append(os.path.join(parent, filename))
                    break
    print('Find {} images'.format(len(files)))
    return files


def get_new_wh(img):
    """
    Get only new width and new height
    :param img:
    :return:
    """
    img_size = img.shape
    im_size_min = np.min(img_size[0:2])
    im_size_max = np.max(img_size[0:2])

    im_scale = float(600) / float(im_size_min)
    if np.round(im_scale * im_size_max) > 1200:
        im_scale = float(1200) / float(im_size_max)
    new_h = int(img_size[0] * im_scale)
    new_w = int(img_size[1] * im_scale)

    new_h = new_h if new_h // 16 == 0 else (new_h // 16 + 1) * 16
    new_w = new_w if new_w // 16 == 0 else (new_w // 16 + 1) * 16

    return new_h, new_w, img_size


def resize_image(img):
    new_h, new_w, img_size = get_new_wh(img)
    re_im = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
    return re_im, (new_h / img_size[0], new_w / img_size[1])


def main(argv=None):
    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image')
        input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info')

        global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)
                start = time.time()
                try:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                img, (rh, rw) = resize_image(im)
                h, w, c = img.shape
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={input_image: [img],
                                                                  input_im_info: im_info})

                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]

                textdetector = TextDetector(DETECT_MODE='H')
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2])
                boxes = np.array(boxes, dtype=np.int)

                new_boxes = transform_boxes(boxes, im)

                cost_time = (time.time() - start)
                print("cost time: {:.2f}s".format(cost_time))

                # The original output from re-sized picture
                # draw_squares(new_boxes, im, rh, rw, im_fn, scores, resize=False)
                draw_squares(new_boxes, im, im.shape[0], im.shape[1], im_fn, scores, resize=False)


if __name__ == '__main__':
    tf.app.run()

我看不到 demo.py 的 boxes 输出。现在不是问题。我设法使用 playground.py 绘制红点。

import os

import cv2
import numpy as np
import tensorflow as tf

FLAGS = tf.app.flags.FLAGS


def draw_squares(boxes, img, rh, rw, im_fn, scores, resize=True):
    for i, box in enumerate(boxes):
        cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0),
                      thickness=2)
    if resize:
        img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR)
    cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1])

    with open(os.path.join(FLAGS.output_path, os.path.splitext(os.path.basename(im_fn))[0]) + ".txt",
              "w") as f:
        for i, box in enumerate(boxes):
            line = ",".join(str(box[k]) for k in range(8))
            line += "," + str(scores[i]) + "\r\n"
            f.writelines(line)

尽管我避免使用 rw 和 rh，因为它们被分开并且在这期间可能会丢失精度。我使用 get_new_wh() 获取 new_h 和 new_w 值。结果是一样的。

重现我的结果。
1.查看我的fork
2. 将文件放入data/demo/目录。原图如下
3. 将目录更改为根项目
4. pip install -r requirements.txt
5. python main/demo.py # 用Python3执行
6. python playground.py # 查看原始副本的输出
如果您想尝试，这是原始图片

图像处理步骤
1. 程序调整图片大小并使boxes坐标在较小的那个上。
2.在复制的图片上标记方框。
3. 编程 resize() 将结果恢复到接近原始大小。

问题:
较小图片上的输出 boxes 坐标无法使用朴素映射函数转换为原始图片的正确坐标。越大，错误越多。

问题:
如何在执行 cv2.resize() 后获得像素的正确坐标？

最佳答案

当你试图反转盒子上的调整大小操作时，你除以 rh 和 rw 但你永远不会乘以 h 和 w。

z[:, 0] = h * z[:, 0] / rh
z[:, 1] = w * z[:, 1] / rw

这解释了为什么您的错误随着图像的增大而变大。

作为旁注，您可以使用 numpy 索引来避免每行重复四次:

z[:, 0::2] = h * z[:, 0::2] / rh
z[:, 1::2] = w * z[:, 1::2] / rw

关于python - 在 cv2.resize() 之后寻找新的坐标，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/55804948/

文章推荐： c# - 如果目标中存在 Blob，如何覆盖 Blob

文章推荐： node.js - 'npm install Bones' 安装旧版本的下划线

文章推荐： javascript - 使用 jQuery 在另一个 div 中克隆一个 div

文章推荐： c# - 无法使用 Miniprofiler 打印查询

python - Python 中的集群或合并集群以减少组数 (Python)
我正在处理一组标记为 160 个组的 173k 点。我想通过合并最接近的(到 9 或 10 个组)来减少组/集群的数量。我搜索过 sklearn 或类似的库，但没有成功。我猜它只是通过 knn 聚类
python - python 列表的子集基于同一列表的元素组，pythonically
我有一个扁平数字列表，这些数字逻辑上以 3 为一组，其中每个三元组是 (number, __ignored, flag[0 or 1])，例如: [7,56,1, 8,0,0, 2,0,0, 6,1,
python - 激活 Python 虚拟环境并在另一个 Python 脚本中调用 Python 脚本
我正在使用 pipenv 来管理我的包。我想编写一个 python 脚本来调用另一个使用不同虚拟环境(VE)的 python 脚本。如何运行使用 VE1 的 python 脚本 1 并调用另一个 p
python - 在焕然一新的 Python 环境中以编程方式从 Python 内部执行 Python 文件
假设我有一个文件 script.py 位于 path = "foo/bar/script.py"。我正在寻找一种在 Python 中通过函数 execute_script() 从我的主要 Python
python - 从 python 脚本但在 python 脚本之外运行 python 脚本
这听起来像是谜语或笑话，但实际上我还没有找到这个问题的答案。问题到底是什么？我想运行 2 个脚本。在第一个脚本中，我调用另一个脚本，但我希望它们继续并行，而不是在两个单独的线程中。主要是我不希望第
python - 使用不同的 python 从 python 运行 python 脚本
我有一个带有 python 2.5.5 的软件。我想发送一个命令，该命令将在 python 2.7.5 中启动一个脚本，然后继续执行该脚本。我试过用 #!python2.7.5 和http://re
python - 为什么从 Python 命令行调用 Python 时 Python 无法找到并运行我的脚本？
我在 python 命令行(使用 python 2.7)中，并尝试运行 Python 脚本。我的操作系统是 Windows 7。我已将我的目录设置为包含我所有脚本的文件夹，使用: os.chdir("
python - 使用动态版本的 Python 执行嵌入的 Python 代码时出现致命的 Python 错误
剧透:部分解决(见最后)。以下是使用 Python 嵌入的代码示例: #include int main(int argc, char** argv) { Py_SetPythonHome
python - python 中识别 python 数组或列表中最大累积差异的最快方法是什么？
假设我有以下列表，对应于及时的股票价格: prices = [1, 3, 7, 10, 9, 8, 5, 3, 6, 8, 12, 9, 6, 10, 13, 8, 4, 11] 我想确定以下总体上最
python - (Python) 通过单选按钮 python 更新背景
所以我试图在选择某个单选按钮时更改此框架的背景。我的框架位于一个类中，并且单选按钮的功能位于该类之外。 (这样我就可以在所有其他框架上调用它们。) 问题是每当我选择单选按钮时都会出现以下错误: co
python - python 中的字符串与正则表达式比较在 python 中失败
我正在尝试将字符串与 python 中的正则表达式进行比较，如下所示， #!/usr/bin/env python3 import re str1 = "Expecting property name
python - python 如何加载Boost.Python 库？
考虑以下原型(prototype) Boost.Python 模块，该模块从单独的 C++ 头文件中引入类“D”。 /* file: a/b.cpp */ BOOST_PYTHON_MODULE(c)
python - python 检查模块 python 的问题
如何编写一个程序来“识别函数调用的行号？” python 检查模块提供了定位行号的选项，但是， def di(): return inspect.currentframe().f_back.f_l
python - 系统 python 与用户 python
我已经使用 macports 安装了 Python 2.7，并且由于我的 $PATH 变量，这就是我输入 $ python 时得到的变量。然而，virtualenv 默认使用 Python 2.6，除
python - [Python] : Python re. 长字符串行的搜索速度优化
我只想问如何加快 python 上的 re.search 速度。我有一个很长的字符串行，长度为 176861(即带有一些符号的字母数字字符)，我使用此函数测试了该行以进行研究: def getExe
python - 编辑字符串 python 正则表达式 python
list1= [u'%app%%General%%Council%', u'%people%', u'%people%%Regional%%Council%%Mandate%', u'%ppp%%Ge
python - Python 映射中的副作用(Python "do" block )
这个问题在这里已经有了答案: Is it Pythonic to use list comprehensions for just side effects? (7 个答案) 关闭 4 个月前。告
python - 使用其值逻辑组合两个 python 列表 - Python
我想用 Python 将两个列表组合成一个列表，方法如下: a = [1,1,1,2,2,2,3,3,3,3] b= ["Sun", "is", "bright", "June","and" ,"Ju
python - Boost.Python python 链接错误
我正在运行带有最新 Boost 发行版 (1.55.0) 的 Mac OS X 10.8.4 (Darwin 12.4.0)。我正在按照说明 here构建包含在我的发行版中的教程 Boost-Pyth
python - 在 Python 中仅使用内置库制作一个基本的网络抓取工具 - Python
学习 Python，我正在尝试制作一个没有任何第 3 方库的网络抓取工具，这样过程对我来说并没有简化，而且我知道我在做什么。我浏览了一些在线资源，但所有这些都让我对某些事情感到困惑。 html 看起来

太空宇宙

个人简介

我是一名优秀的程序员,十分优秀！

作者热门文章

滴滴打车优惠券免费领取

全站热门文章

首页

博学

6Ren·AI

商城

python - 在 cv2.resize() 之后寻找新的坐标