gpt4 book ai didi

c# - 空间域卷积代码的性能

转载 作者:行者123 更新时间:2023-11-30 20:25:18 25 4
gpt4 key购买 nike

参见: Image convolution in spatial domain

下面的代码实现了空间域的线性卷积。

    public static double[,] ConvolutionSpatial(double[,] paddedImage, double[,] mask, double offset)
{
double min = 0.0;
double max = 1.0;

double factor = GetFactor(mask);

int paddedImageWidth = paddedImage.GetLength(0);
int paddedImageHeight = paddedImage.GetLength(1);

int maskWidth = mask.GetLength(0);
int maskHeight = mask.GetLength(1);

int imageWidth = paddedImageWidth - maskWidth;
int imageHeight = paddedImageHeight - maskHeight;

double[,] convolve = new double[imageWidth, imageHeight];

for (int y = 0; y < imageHeight; y++)
{
for (int x = 0; x < imageWidth; x++)
{
double sum = Sum(paddedImage, mask, x, y);

convolve[x, y] = Math.Min(Math.Max((sum / factor) + offset, min), max);

string str = string.Empty;
}
}

return convolve;
}

public static double Sum(double[,] paddedImage1, double[,] mask1, int startX, int startY)
{
double sum = 0;

int maskWidth = mask1.GetLength(0);
int maskHeight = mask1.GetLength(1);

for (int y = startY; y < (startY + maskHeight); y++)
{
for (int x = startX; x < (startX + maskWidth); x++)
{
double img = paddedImage1[x, y];
double msk = mask1[maskWidth - x + startX - 1, maskHeight - y + startY - 1];
sum = sum + (img * msk);
}
}

return sum;
}

public static double GetFactor(double[,] kernel)
{
double sum = 0.0;

int width = kernel.GetLength(0);
int height = kernel.GetLength(1);

for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x++)
{
sum += kernel[x, y];
}
}

return (sum == 0) ? 1 : sum;
}

性能如下:

 image-size     kernel-size   time-elapsed  
------------------------------------------
100x100 3x3 13ms
512x512 3x3 291ms
1018x1280 3x3 1687ms
100x100 100x100 4983ms
512x512 512x512 35624394ms
1018x1280 1018x1280 [practically unusable]

我有两个问题:

  1. 它看起来像下降性能吗?
  2. 如果,我怎样才能提高性能?

最佳答案

  1. 这取决于您的最终要求。
  2. 显而易见的事情要做:用锯齿状数组 [][] 替换多维数组 [,] 并反转嵌套循环的顺序:

    for (int x = 0; ...; x++)
    {
    for (int y = 0; ...; y++)
    {
    ...
    }
    }

    代替

    for (int y = 0; ...; y++)
    {
    for (int x = 0; ...; x++)
    {
    ...
    }
    }

在第一种情况下,CPU 缓存行的使用效率更高(因为左侧索引表示连续的行),而后一种情况在每次迭代时都会使缓存行无效。

因此,与原始实现相比,具有锯齿状数组和反向循环的代码基准显示 1018x1280 3x3 卷积的性能提高了 2 倍:

BenchmarkDotNet=v0.11.1, OS=Windows 10.0.17134.167 (1803/April2018Update/Redstone4)
Intel Core i7-7700 CPU 3.60GHz (Kaby Lake), 1 CPU, 8 logical and 4 physical cores
Frequency=3515624 Hz, Resolution=284.4445 ns, Timer=TSC
[Host] : .NET Framework 4.7.2 (CLR 4.0.30319.42000), 64bit RyuJIT-v4.7.3131.0
RyuJitX64 : .NET Framework 4.7.2 (CLR 4.0.30319.42000), 64bit RyuJIT-v4.7.3131.0

Job=RyuJitX64 Jit=RyuJit Platform=X64

Method | Mean | Error | StdDev |
------------- |---------:|----------:|----------:|
BenchmarkOld | 61.82 ms | 0.3979 ms | 0.3527 ms |
BenchmarkNew | 26.98 ms | 0.1050 ms | 0.0982 ms |

代码如下:

    public static double[][] ConvolutionSpatial(double[][] paddedImage, double[][] mask, double offset)
{
double min = 0.0;
double max = 1.0;

double factor = GetFactor(mask);

int paddedImageWidth = paddedImage.Length;
int paddedImageHeight = paddedImage[0].Length;

int maskWidth = mask.Length;
int maskHeight = mask[0].Length;

int imageWidth = paddedImageWidth - maskWidth;
int imageHeight = paddedImageHeight - maskHeight;

double[][] convolve = new double[imageWidth][];

for (int x = 0; x < imageWidth; x++)
{
convolve[x] = new double[imageHeight];
for (int y = 0; y < imageHeight; y++)
{
double sum = Sum(paddedImage, mask, x, y);
convolve[x][y] = Math.Min(Math.Max((sum / factor) + offset, min), max);
string str = string.Empty;
}
}

return convolve;
}

public static double Sum(double[][] paddedImage1, double[][] mask1, int startX, int startY)
{
double sum = 0;

int maskWidth = mask1.Length;

for (int x = startX; x < (startX + maskWidth); x++)
{
var maskHeight = mask1[maskWidth - x + startX - 1].Length;
for (int y = startY; y < (startY + maskHeight); y++)
{
double img = paddedImage1[x][y];
double msk = mask1[maskWidth - x + startX - 1][maskHeight - y + startY - 1];
sum = sum + (img * msk);
}
}

return sum;
}

public static double GetFactor(double[][] kernel)
{
double sum = 0.0;

int width = kernel.Length;

for (int x = 0; x < width; x++)
{
var height = kernel[x].Length;
for (int y = 0; y < height; y++)
{
sum += kernel[x][y];
}
}

return (sum == 0) ? 1 : sum;
}

而且我认为它可以通过应用 SIMD 运算得到更多改进。

关于c# - 空间域卷积代码的性能,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/52037025/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com