c++ - 为什么我不是分支预测的受害者？-6ren

c++ - 为什么我不是分支预测的受害者？

转载作者：行者123 更新时间：2023-11-30 02:34:50

我正在编写一个函数来创建高斯滤波器(使用 Armadillo 库)，它可以是 2D 或 3D，具体取决于它接收到的输入的维数。这是代码:

template <class ty>
ty gaussianFilter(const ty& input, double sigma)
{
    // Our filter will be initialized to the same size as our input.
    ty filter = ty(input); // Copy constructor.

    uword nRows = filter.n_rows;
    uword nCols = filter.n_cols;
    uword nSlic = filter.n_elem / (nRows*nCols); // If 2D, nSlic == 1.

    // Offsets with respect to the middle.
    double rowOffset = static_cast<double>(nRows/2);
    double colOffset = static_cast<double>(nCols/2);
    double sliceOffset = static_cast<double>(nSlic/2);

    // Counters.
    double x = 0 , y = 0, z = 0;

for (uword rowIndex = 0; rowIndex < nRows; rowIndex++) {
      x = static_cast<double>(rowIndex) - rowOffset;
      for (uword colIndex = 0; colIndex < nCols; colIndex++) {
        y = static_cast<double>(colIndex) - colOffset;
        for (uword sliIndex = 0; sliIndex < nSlic; sliIndex++) {
          z = static_cast<double>(sliIndex) - sliceOffset;
          // If-statement inside for-loop looks terribly inefficient
          // but the compiler should take care of this.
          if (nSlic == 1){ // If 2D, Gauss filter for 2D.
            filter(rowIndex*nCols + colIndex) = ...
          }
          else
          { // Gauss filter for 3D. 
            filter((rowIndex*nCols + colIndex)*nSlic + sliIndex) = ...
          }
       }    
     }
 }

如我们所见，在最内层的循环中有一个 if 语句，它检查第三维(nSlic)的大小是否等于 1。一旦在函数的开头计算，nSlic 将不会改变它的值，所以编译器应该足够聪明来优化条件分支，我不应该损失任何性能。

但是...如果我从循环中删除 if 语句，我将获得性能提升。

if (nSlic == 1)
  { // Gauss filter for 2D.
    for (uword rowIndex = 0; rowIndex < nRows; rowIndex++) {
      x = static_cast<double>(rowIndex) - rowOffset;
      for (uword colIndex = 0; colIndex < nCols; colIndex++) {
        y = static_cast<double>(colIndex) - colOffset;
        for (uword sliIndex = 0; sliIndex < nSlic; sliIndex++) {
          z = static_cast<double>(sliIndex) - sliceOffset;
          {filter(rowIndex*nCols + colIndex) = ...
        }
      } 
    }
  }
else
  {
    for (uword rowIndex = 0; rowIndex < nRows; rowIndex++) {
      x = static_cast<double>(rowIndex) - rowOffset;
      for (uword colIndex = 0; colIndex < nCols; colIndex++) {
        y = static_cast<double>(colIndex) - colOffset;
        for (uword sliIndex = 0; sliIndex < nSlic; sliIndex++) {
          z = static_cast<double>(sliIndex) - sliceOffset;
          {filter((rowIndex*nCols + colIndex)*nSlic + sliIndex) = ...                                     
        }
      } 
    }
  }