c++ - 对于简单的 StereoBM 算法，为什么我的代码比 opencv 慢得多？-6ren

c++ - 对于简单的 StereoBM 算法，为什么我的代码比 opencv 慢得多？

转载作者：可可西里更新时间：2023-11-01 17:57:38

这是我的测试代码，用于实现一个简单的 testBM 算法，没有预过滤。但当窗口尺寸较大时，它需要大约 400 毫秒甚至更多，而 opencv 的 StereoBM(CPU 而非 GPU)需要 20 毫秒。我已经检查了 StereoBM 的来源，但我很难理解它。有谁知道为什么？

下面是我的代码。

void testBM(const Mat &left0, 
            const Mat &right0, 
            Mat &disparity, 
            int SAD, 
            int searchRange)
{
    int cols = left0.cols;
    int rows = left0.rows;
    int total = cols*rows;
    const uchar* data_left = left0.ptr<uchar>(0);
    const uchar* data_right = right0.ptr<uchar>(0);
    uchar* data_dm = new uchar[total];
    int dbNum = 2 * SAD + 1;
    int dNum = dbNum * dbNum;
    //x is col index in the dbNum * dbNum window
    //y is row index in this window
    //z is (x + y * cols).
    //I compute them in advance for avoid computing repeatedly.
    Point3i *dLocDif = new Point3i[dNum];
    for (int i = 0; i < dNum; i++)
    {
        dLocDif[i] = Point3i(i%dbNum - SAD, i / dbNum - SAD, 0);
        dLocDif[i].z = dLocDif[i].x + dLocDif[i].y * cols;
    }

    //I compute disparity difference for eache search range to avoid
    //computing repeatedly.
    uchar* dif_ = new uchar[total*searchRange];
    for (int _search = 0; _search < searchRange; _search++)
    {
        int th = _search * total;
        for (int i = 0; i < total; i++)
        {
            int c = i % cols - _search;
            if (c < 0) continue;
            dif_[i+th] = (uchar)std::abs(data_left[i] - data_right[i-_search]);
        }
    }
    for (int p = 0; p < total; p++)
    {
        int min = 50 * dNum;
        int dm = -256;
        int _col = p % cols;
        int _row = p / cols;
        int th = 0;

        //I search for the smallest difference between left and right image
        // using def_.
        for (int _search = 0; _search < searchRange; _search++, th += total)
        {
            if (_col + _search > cols) break;
            int temp = 0;
            for (int i = 0; i < dNum; i++)
            {
                int _c = _col + dLocDif[i].x;
                if (_c >= cols || _c < 0) continue;
                int _r = _row + dLocDif[i].y;
                if (_r >= rows || _r < 0) continue;
                temp += dif_[th + p + dLocDif[i].z];
                if (temp > min)
                {
                    break;
                }
            }
            if (temp < min)
            {
                dm = _search;
                min = temp;
            }
        }
        data_dm[p] = dm;
    }
    disparity = Mat(rows, cols, CV_8UC1, data_dm);
}

这里是opencv中StereoBM的基本源码。初始化后我有点困惑。谁能简单解释一下？

static void
findStereoCorrespondenceBM( const Mat& left, const Mat& right,
                            Mat& disp, Mat& cost, const CvStereoBMState& state,
                            uchar* buf, int _dy0, int _dy1 )
{
    const int ALIGN = 16;
    int x, y, d;
    int wsz = state.SADWindowSize, wsz2 = wsz/2;
    int dy0 = MIN(_dy0, wsz2+1), dy1 = MIN(_dy1, wsz2+1);
    int ndisp = state.numberOfDisparities;
    int mindisp = state.minDisparity;
    int lofs = MAX(ndisp - 1 + mindisp, 0);
    int rofs = -MIN(ndisp - 1 + mindisp, 0);
    int width = left.cols, height = left.rows;
    int width1 = width - rofs - ndisp + 1;
    int ftzero = state.preFilterCap;
    int textureThreshold = state.textureThreshold;
    int uniquenessRatio = state.uniquenessRatio;
    short FILTERED = (short)((mindisp - 1) << DISPARITY_SHIFT);

    int *sad, *hsad0, *hsad, *hsad_sub, *htext;
    uchar *cbuf0, *cbuf;
    const uchar* lptr0 = left.data + lofs;
    const uchar* rptr0 = right.data + rofs;
    const uchar *lptr, *lptr_sub, *rptr;
    short* dptr = (short*)disp.data;
    int sstep = (int)left.step;
    int dstep = (int)(disp.step/sizeof(dptr[0]));
    int cstep = (height+dy0+dy1)*ndisp;
    int costbuf = 0;
    int coststep = cost.data ? (int)(cost.step/sizeof(costbuf)) : 0;
    const int TABSZ = 256;
    uchar tab[TABSZ];

    sad = (int*)alignPtr(buf + sizeof(sad[0]), ALIGN);
    hsad0 = (int*)alignPtr(sad + ndisp + 1 + dy0*ndisp, ALIGN);
    htext = (int*)alignPtr((int*)(hsad0 + (height+dy1)*ndisp) + wsz2 + 2, ALIGN);
    cbuf0 = (uchar*)alignPtr((uchar*)(htext + height + wsz2 + 2) + dy0*ndisp, ALIGN);

    for( x = 0; x < TABSZ; x++ )
        tab[x] = (uchar)std::abs(x - ftzero);

    // initialize buffers
    memset( hsad0 - dy0*ndisp, 0, (height + dy0 + dy1)*ndisp*sizeof(hsad0[0]) );
    memset( htext - wsz2 - 1, 0, (height + wsz + 1)*sizeof(htext[0]) );

    for( x = -wsz2-1; x < wsz2; x++ )
    {
        hsad = hsad0 - dy0*ndisp; cbuf = cbuf0 + (x + wsz2 + 1)*cstep - dy0*ndisp;
        lptr = lptr0 + std::min(std::max(x, -lofs), width-lofs-1) - dy0*sstep;
        rptr = rptr0 + std::min(std::max(x, -rofs), width-rofs-1) - dy0*sstep;

        for( y = -dy0; y < height + dy1; y++, hsad += ndisp, cbuf += ndisp, lptr += sstep, rptr += sstep )
        {
            int lval = lptr[0];
            for( d = 0; d < ndisp; d++ )
            {
                int diff = std::abs(lval - rptr[d]);
                cbuf[d] = (uchar)diff;
                hsad[d] = (int)(hsad[d] + diff);
            }
            htext[y] += tab[lval];
        }
    }

    // initialize the left and right borders of the disparity map
    for( y = 0; y < height; y++ )
    {
        for( x = 0; x < lofs; x++ )
            dptr[y*dstep + x] = FILTERED;
        for( x = lofs + width1; x < width; x++ )
            dptr[y*dstep + x] = FILTERED;
    }
    dptr += lofs;

    for( x = 0; x < width1; x++, dptr++ )
    {
        int* costptr = cost.data ? (int*)cost.data + lofs + x : &costbuf;
        int x0 = x - wsz2 - 1, x1 = x + wsz2;
        const uchar* cbuf_sub = cbuf0 + ((x0 + wsz2 + 1) % (wsz + 1))*cstep - dy0*ndisp;
        cbuf = cbuf0 + ((x1 + wsz2 + 1) % (wsz + 1))*cstep - dy0*ndisp;
        hsad = hsad0 - dy0*ndisp;
        lptr_sub = lptr0 + MIN(MAX(x0, -lofs), width-1-lofs) - dy0*sstep;
        lptr = lptr0 + MIN(MAX(x1, -lofs), width-1-lofs) - dy0*sstep;
        rptr = rptr0 + MIN(MAX(x1, -rofs), width-1-rofs) - dy0*sstep;

        for( y = -dy0; y < height + dy1; y++, cbuf += ndisp, cbuf_sub += ndisp,
             hsad += ndisp, lptr += sstep, lptr_sub += sstep, rptr += sstep )
        {
            int lval = lptr[0];
            for( d = 0; d < ndisp; d++ )
            {
                int diff = std::abs(lval - rptr[d]);
                cbuf[d] = (uchar)diff;
                hsad[d] = hsad[d] + diff - cbuf_sub[d];
            }
            htext[y] += tab[lval] - tab[lptr_sub[0]];
        }

        // fill borders
        for( y = dy1; y <= wsz2; y++ )
            htext[height+y] = htext[height+dy1-1];
        for( y = -wsz2-1; y < -dy0; y++ )
            htext[y] = htext[-dy0];

        // initialize sums
        for( d = 0; d < ndisp; d++ )
            sad[d] = (int)(hsad0[d-ndisp*dy0]*(wsz2 + 2 - dy0));

        hsad = hsad0 + (1 - dy0)*ndisp;
        for( y = 1 - dy0; y < wsz2; y++, hsad += ndisp )
            for( d = 0; d < ndisp; d++ )
                sad[d] = (int)(sad[d] + hsad[d]);
        int tsum = 0;
        for( y = -wsz2-1; y < wsz2; y++ )
            tsum += htext[y];

        // finally, start the real processing
        for( y = 0; y < height; y++ )
        {
            int minsad = INT_MAX, mind = -1;
            hsad = hsad0 + MIN(y + wsz2, height+dy1-1)*ndisp;
            hsad_sub = hsad0 + MAX(y - wsz2 - 1, -dy0)*ndisp;

            for( d = 0; d < ndisp; d++ )
            {
                int currsad = sad[d] + hsad[d] - hsad_sub[d];
                sad[d] = currsad;
                if( currsad < minsad )
                {
                    minsad = currsad;
                    mind = d;
                }
            }
            tsum += htext[y + wsz2] - htext[y - wsz2 - 1];
            if( tsum < textureThreshold )
            {
                dptr[y*dstep] = FILTERED;
                continue;
            }

            if( uniquenessRatio > 0 )
            {
                int thresh = minsad + (minsad * uniquenessRatio/100);
                for( d = 0; d < ndisp; d++ )
                {
                    if( sad[d] <= thresh && (d < mind-1 || d > mind+1))
                        break;
                }
                if( d < ndisp )
                {
                    dptr[y*dstep] = FILTERED;
                    continue;
                }
            }

            {
            sad[-1] = sad[1];
            sad[ndisp] = sad[ndisp-2];
            int p = sad[mind+1], n = sad[mind-1];
            d = p + n - 2*sad[mind] + std::abs(p - n);
            dptr[y*dstep] = (short)(((ndisp - mind - 1 + mindisp)*256 + (d != 0 ? (p-n)*256/d : 0) + 15) >> 4);
            costptr[y*coststep] = sad[mind];
            }
        }
    }
}

最佳答案

OpenCV 并行执行许多算法； parallel_for/do 抽象 TBB、PPL 和 OpenMP 后端。

原始图像被 segmentation 为多个子区域，并为每个子区域执行 findStereoCorrespondenceBM()。这可以通过我们看到的界面实现，因为 cv::Mat 可以用作子图像的 View ，而无需复制实际像素数据。您可以在程序执行期间通过查看正在使用的处理器(例如，使用 Windows 上的进程浏览器或 Unix 上的 top)来验证这一点。

(最初由 Hauke Heibel 作为评论发表)

关于c++ - 对于简单的 StereoBM 算法，为什么我的代码比 opencv 慢得多？，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/29878761/

文章推荐： c++ - STL 算法是否针对速度进行了优化？

文章推荐： c++ - C11 & C++11 扩展和通用字符转义

c - 我尝试理解 [c 代码 -> 汇编] 代码
我尝试理解[c代码 -> 汇编]代码 void node::Check( data & _data1, vector& _data2) { -> push ebp -> mov ebp,esp ->
c# - 在当前表单(代码)的上下文中从字符串动态运行 C# 代码
我需要在当前表单(代码)的上下文中运行文本文件中的代码。其中一项要求是让代码创建新控件并将其添加到当前窗体。例如，在Form1.cs中: using System.Windows.Forms; ..
c# - c++代码(malloc方法)到c#代码
我有此 C++ 代码并将其转换为 C# (.net Framework 4) 代码。有没有人给我一些关于 malloc、free 和 sprintf 方法的提示？ int monate = ee; d
C 代码，简单的 Web 服务器(代码 OK)
我的网络服务器代码有问题 #include #include #include #include #include #include #include int
html - 将特定列表元素置于斜体的 CSS 代码(不更改 html 代码)
给定以下 html 代码，将列表中的第三个元素(即“美丽”一词)以斜体显示的 CSS 代码是什么？当然，我可以给这个元素一个 id 或一个 class，但 html 代码必须保持不变。谢谢
javascript - 是否有一些库可用于 IQR 代码(不是 QR 代码)？
关闭。这个问题不符合Stack Overflow guidelines .它目前不接受答案。我们不允许提问寻求书籍、工具、软件库等的推荐。您可以编辑问题，以便用事实和引用来回答。关闭 7 年前。
macros - 在 Inno Setup [代码] 部分将宏扩展为 Pascal 代码
我试图制作一个宏来避免重复代码和注释。我试过这个: #define GrowOnPage(any Page, any Component) Component.Width := Page.Surfa
c# - 我正在尝试将我的旧 c++ 代码 "translate"转换为 c# 代码
我正在尝试将我的旧 C++ 代码“翻译”成头条新闻所暗示的 C# 代码。问题是我是 C# 中的新手，并不是所有的东西都像 C++ 中那样。在 C++ 中这些解决方案运行良好，但在 C# 中只是不能。我
r - 让 Visual Studio 代码(自动)格式化 R 代码
在 Windows 10 上工作，R 语言的格式化程序似乎没有在 Visual Studio Code 中完成它的工作。我试过R support for Visual Studio Code和 R-T
dynamic - 是否可以在 Python 脚本中生成和执行 Python 代码？ [动态 Python 代码]
我正在处理一些报告(计数)，我必须获取不同参数的计数。非常简单但乏味。一个参数的示例查询: qCountsEmployee = ( "select count(*) from %s wher
ios - 随机和偶然的网络错误(NSURLErrorDomain 代码=-1001 和 NSURLErrorDomain 代码=-1005)
最近几天我尝试从 d00m 调试网络错误。我开始用尽想法/线索，我希望其他 SO 用户拥有可能有用的宝贵经验。我希望能够提供所有相关信息，但我个人无法控制服务器环境。整个事情始于用户注意到我们应用程
javascript - visual studio 代码 intellisense 不适用于 dojo amd 代码
我有一个 app.js 文件，其中包含如下 dojo amd 模式代码: require(["dojo/dom", ..], function(dom){ dom.byId('someId').i
cuda - 'code=sm_X' 是否仅嵌入二进制(cubin)代码，或 PTX 代码，或两者？
我对“-gencode”语句中的“code=sm_X”选项有点困惑。一个例子:NVCC 编译器选项有什么作用 -gencode arch=compute_13,code=sm_13 嵌入库中？只有
javascript - 在 Javascript 下拉列表中添加 HTML 代码，Javascript 不评估 HTML 代码
我为我的表格使用 X-editable 框架。但是我有一些问题。 $(document).ready(function() { $('.access').editable({
python - 在 linux 中运行 flask/python 代码？基本 flask 代码
我一直在通过本教程学习 flask/python http://blog.miguelgrinberg.com/post/the-flask-mega-tutorial-part-i-hello-wo
vim - G 代码 M 代码 VI 和 EMACS 的 CNC 语法
我想将 Vim 和 EMACS 用于 CNC、G 代码和 M 代码。 Vim 或 EMACS 是否有任何语法或模式来处理这种类型的代码？最佳答案一些快速搜索使我找到了 this vim 和 thi
iphone - 寻找关于将 Pre-Storyboard 代码 (XCode4) 移动到 Storyboard 代码 (XCode5) 的教程
关闭。这个问题不符合Stack Overflow guidelines .它目前不接受答案。想改进这个问题？更新问题，使其成为 on-topic对于堆栈溢出。 7年前关闭。 Improve this
vim - 如何让 Vim 理解 *.md 文件包含 Markdown 代码，而不是 Modula-2 代码？
这个问题在这里已经有了答案: Enabling markdown highlighting in Vim (5 个回答) 6年前关闭。当我在 Vim 中编辑包含 Markdown 代码的 READM
ios - 错误域=AVFoundationErrorDomain 代码=-11800 "The operation could not be completed"{错误域=NSOSStatusErrorDomain 代码=-16976 "(null)"}
我正在 Swift3 iOS 中开发视频应用程序。基本上我必须将视频 Assets 和音频与淡入淡出效果合并为一个并将其保存到 iPhone 画廊。为此，我使用以下方法: private func d
jenkins - 无法通过 Jenkins 管道作业的 jenkinsfile 中的 Groovy 代码(或 java 代码)创建文件
pipeline { agent any stages { stage('Build') { steps { e

可可西里

个人简介

我是一名优秀的程序员,十分优秀！

作者热门文章

滴滴打车优惠券免费领取

全站热门文章

首页

博学

6Ren·AI

商城

c++ - 对于简单的 StereoBM 算法，为什么我的代码比 opencv 慢得多？