gpt4 book ai didi

c++ - 对于简单的 StereoBM 算法,为什么我的代码比 opencv 慢得多?

转载 作者:可可西里 更新时间:2023-11-01 17:57:38 27 4
gpt4 key购买 nike

这是我的测试代码,用于实现一个简单的 testBM 算法,没有预过滤。但当窗口尺寸较大时,它需要大约 400 毫秒甚至更多,而 opencv 的 StereoBM(CPU 而非 GPU)需要 20 毫秒。我已经检查了 StereoBM 的来源,但我很难理解它。有谁知道为什么?

下面是我的代码。

void testBM(const Mat &left0, 
const Mat &right0,
Mat &disparity,
int SAD,
int searchRange)
{
int cols = left0.cols;
int rows = left0.rows;
int total = cols*rows;
const uchar* data_left = left0.ptr<uchar>(0);
const uchar* data_right = right0.ptr<uchar>(0);
uchar* data_dm = new uchar[total];
int dbNum = 2 * SAD + 1;
int dNum = dbNum * dbNum;
//x is col index in the dbNum * dbNum window
//y is row index in this window
//z is (x + y * cols).
//I compute them in advance for avoid computing repeatedly.
Point3i *dLocDif = new Point3i[dNum];
for (int i = 0; i < dNum; i++)
{
dLocDif[i] = Point3i(i%dbNum - SAD, i / dbNum - SAD, 0);
dLocDif[i].z = dLocDif[i].x + dLocDif[i].y * cols;
}

//I compute disparity difference for eache search range to avoid
//computing repeatedly.
uchar* dif_ = new uchar[total*searchRange];
for (int _search = 0; _search < searchRange; _search++)
{
int th = _search * total;
for (int i = 0; i < total; i++)
{
int c = i % cols - _search;
if (c < 0) continue;
dif_[i+th] = (uchar)std::abs(data_left[i] - data_right[i-_search]);
}
}
for (int p = 0; p < total; p++)
{
int min = 50 * dNum;
int dm = -256;
int _col = p % cols;
int _row = p / cols;
int th = 0;

//I search for the smallest difference between left and right image
// using def_.
for (int _search = 0; _search < searchRange; _search++, th += total)
{
if (_col + _search > cols) break;
int temp = 0;
for (int i = 0; i < dNum; i++)
{
int _c = _col + dLocDif[i].x;
if (_c >= cols || _c < 0) continue;
int _r = _row + dLocDif[i].y;
if (_r >= rows || _r < 0) continue;
temp += dif_[th + p + dLocDif[i].z];
if (temp > min)
{
break;
}
}
if (temp < min)
{
dm = _search;
min = temp;
}
}
data_dm[p] = dm;
}
disparity = Mat(rows, cols, CV_8UC1, data_dm);
}

这里是opencv中StereoBM的基本源码。初始化后我有点困惑。谁能简单解释一下?

static void
findStereoCorrespondenceBM( const Mat& left, const Mat& right,
Mat& disp, Mat& cost, const CvStereoBMState& state,
uchar* buf, int _dy0, int _dy1 )
{
const int ALIGN = 16;
int x, y, d;
int wsz = state.SADWindowSize, wsz2 = wsz/2;
int dy0 = MIN(_dy0, wsz2+1), dy1 = MIN(_dy1, wsz2+1);
int ndisp = state.numberOfDisparities;
int mindisp = state.minDisparity;
int lofs = MAX(ndisp - 1 + mindisp, 0);
int rofs = -MIN(ndisp - 1 + mindisp, 0);
int width = left.cols, height = left.rows;
int width1 = width - rofs - ndisp + 1;
int ftzero = state.preFilterCap;
int textureThreshold = state.textureThreshold;
int uniquenessRatio = state.uniquenessRatio;
short FILTERED = (short)((mindisp - 1) << DISPARITY_SHIFT);

int *sad, *hsad0, *hsad, *hsad_sub, *htext;
uchar *cbuf0, *cbuf;
const uchar* lptr0 = left.data + lofs;
const uchar* rptr0 = right.data + rofs;
const uchar *lptr, *lptr_sub, *rptr;
short* dptr = (short*)disp.data;
int sstep = (int)left.step;
int dstep = (int)(disp.step/sizeof(dptr[0]));
int cstep = (height+dy0+dy1)*ndisp;
int costbuf = 0;
int coststep = cost.data ? (int)(cost.step/sizeof(costbuf)) : 0;
const int TABSZ = 256;
uchar tab[TABSZ];

sad = (int*)alignPtr(buf + sizeof(sad[0]), ALIGN);
hsad0 = (int*)alignPtr(sad + ndisp + 1 + dy0*ndisp, ALIGN);
htext = (int*)alignPtr((int*)(hsad0 + (height+dy1)*ndisp) + wsz2 + 2, ALIGN);
cbuf0 = (uchar*)alignPtr((uchar*)(htext + height + wsz2 + 2) + dy0*ndisp, ALIGN);

for( x = 0; x < TABSZ; x++ )
tab[x] = (uchar)std::abs(x - ftzero);

// initialize buffers
memset( hsad0 - dy0*ndisp, 0, (height + dy0 + dy1)*ndisp*sizeof(hsad0[0]) );
memset( htext - wsz2 - 1, 0, (height + wsz + 1)*sizeof(htext[0]) );

for( x = -wsz2-1; x < wsz2; x++ )
{
hsad = hsad0 - dy0*ndisp; cbuf = cbuf0 + (x + wsz2 + 1)*cstep - dy0*ndisp;
lptr = lptr0 + std::min(std::max(x, -lofs), width-lofs-1) - dy0*sstep;
rptr = rptr0 + std::min(std::max(x, -rofs), width-rofs-1) - dy0*sstep;

for( y = -dy0; y < height + dy1; y++, hsad += ndisp, cbuf += ndisp, lptr += sstep, rptr += sstep )
{
int lval = lptr[0];
for( d = 0; d < ndisp; d++ )
{
int diff = std::abs(lval - rptr[d]);
cbuf[d] = (uchar)diff;
hsad[d] = (int)(hsad[d] + diff);
}
htext[y] += tab[lval];
}
}

// initialize the left and right borders of the disparity map
for( y = 0; y < height; y++ )
{
for( x = 0; x < lofs; x++ )
dptr[y*dstep + x] = FILTERED;
for( x = lofs + width1; x < width; x++ )
dptr[y*dstep + x] = FILTERED;
}
dptr += lofs;

for( x = 0; x < width1; x++, dptr++ )
{
int* costptr = cost.data ? (int*)cost.data + lofs + x : &costbuf;
int x0 = x - wsz2 - 1, x1 = x + wsz2;
const uchar* cbuf_sub = cbuf0 + ((x0 + wsz2 + 1) % (wsz + 1))*cstep - dy0*ndisp;
cbuf = cbuf0 + ((x1 + wsz2 + 1) % (wsz + 1))*cstep - dy0*ndisp;
hsad = hsad0 - dy0*ndisp;
lptr_sub = lptr0 + MIN(MAX(x0, -lofs), width-1-lofs) - dy0*sstep;
lptr = lptr0 + MIN(MAX(x1, -lofs), width-1-lofs) - dy0*sstep;
rptr = rptr0 + MIN(MAX(x1, -rofs), width-1-rofs) - dy0*sstep;

for( y = -dy0; y < height + dy1; y++, cbuf += ndisp, cbuf_sub += ndisp,
hsad += ndisp, lptr += sstep, lptr_sub += sstep, rptr += sstep )
{
int lval = lptr[0];
for( d = 0; d < ndisp; d++ )
{
int diff = std::abs(lval - rptr[d]);
cbuf[d] = (uchar)diff;
hsad[d] = hsad[d] + diff - cbuf_sub[d];
}
htext[y] += tab[lval] - tab[lptr_sub[0]];
}

// fill borders
for( y = dy1; y <= wsz2; y++ )
htext[height+y] = htext[height+dy1-1];
for( y = -wsz2-1; y < -dy0; y++ )
htext[y] = htext[-dy0];

// initialize sums
for( d = 0; d < ndisp; d++ )
sad[d] = (int)(hsad0[d-ndisp*dy0]*(wsz2 + 2 - dy0));

hsad = hsad0 + (1 - dy0)*ndisp;
for( y = 1 - dy0; y < wsz2; y++, hsad += ndisp )
for( d = 0; d < ndisp; d++ )
sad[d] = (int)(sad[d] + hsad[d]);
int tsum = 0;
for( y = -wsz2-1; y < wsz2; y++ )
tsum += htext[y];

// finally, start the real processing
for( y = 0; y < height; y++ )
{
int minsad = INT_MAX, mind = -1;
hsad = hsad0 + MIN(y + wsz2, height+dy1-1)*ndisp;
hsad_sub = hsad0 + MAX(y - wsz2 - 1, -dy0)*ndisp;

for( d = 0; d < ndisp; d++ )
{
int currsad = sad[d] + hsad[d] - hsad_sub[d];
sad[d] = currsad;
if( currsad < minsad )
{
minsad = currsad;
mind = d;
}
}
tsum += htext[y + wsz2] - htext[y - wsz2 - 1];
if( tsum < textureThreshold )
{
dptr[y*dstep] = FILTERED;
continue;
}

if( uniquenessRatio > 0 )
{
int thresh = minsad + (minsad * uniquenessRatio/100);
for( d = 0; d < ndisp; d++ )
{
if( sad[d] <= thresh && (d < mind-1 || d > mind+1))
break;
}
if( d < ndisp )
{
dptr[y*dstep] = FILTERED;
continue;
}
}

{
sad[-1] = sad[1];
sad[ndisp] = sad[ndisp-2];
int p = sad[mind+1], n = sad[mind-1];
d = p + n - 2*sad[mind] + std::abs(p - n);
dptr[y*dstep] = (short)(((ndisp - mind - 1 + mindisp)*256 + (d != 0 ? (p-n)*256/d : 0) + 15) >> 4);
costptr[y*coststep] = sad[mind];
}
}
}
}

最佳答案

OpenCV 并行执行许多算法; parallel_for/do 抽象 TBB、PPL 和 OpenMP 后端。

原始图像被 segmentation 为多个子区域,并为每个子区域执行 findStereoCorrespondenceBM()。这可以通过我们看到的界面实现,因为 cv::Mat 可以用作子图像的 View ,而无需复制实际像素数据。您可以在程序执行期间通过查看正在使用的处理器(例如,使用 Windows 上的进程浏览器或 Unix 上的 top)来验证这一点。

(最初由 Hauke Heibel 作为评论发表)

关于c++ - 对于简单的 StereoBM 算法,为什么我的代码比 opencv 慢得多?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/29878761/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com