gpt4 book ai didi

c# - 比较两个不同长度的整数数组时提高性能

转载 作者:太空狗 更新时间:2023-10-30 00:53:47 26 4
gpt4 key购买 nike

我有一个瓶颈(或者至少是我认为我可以做得更好的领域)这个比较器,它基本上是一个序数字符串比较器,但对整数(ushort,虽然我认为这不重要)数组有效。

数组可以有不同的长度,但长度只有在元素 [0..n] 匹配时才有意义,其中 n 是最短数组的长度。在这种情况下,较长的数组被认为“更大”。

1 2 3 < 1 2 4

1 2 3 5 < 1 2 4

1 2 5 3 > 1 2 4

1 2 3 4 > 1 2 3

    public int Compare(ushort[] x, ushort[] y)
{
int pos = 0;
int len = Math.Min(x.Length, y.Length);
while (pos < len && x[pos] == y[pos])
pos++;

return pos < len ?
x[pos].CompareTo(y[pos]) :
x.Length.CompareTo(y.Length);

}

关于如何优化它有什么想法吗?

更新

回应一位评论者关于我在这里实际做的事情:我意识到很久以前我问过一个与此相关的问题,它准确地显示了我在上下文中所做的事情。唯一的主要区别是我现在使用 ushort 数组而不是字符串作为键,因为它更紧凑。

使用整个路径作为键让我可以使用部分键从排序集中获取 View ,从而为子集查询提供高性能。我试图在构建索引时提高性能。 Data structure for indexed searches of subsets

顺便说一下,到目前为止,我对这里的回答印象深刻,多年来我就 SO 提出了很多问题,但这无疑是我见过的最周到、最有趣的答案集。我还不确定我的具体问题的正确答案是什么(即 - 短数组的数百万次比较),但他们中的每一个都教会了我一些我不知道的东西。

最佳答案

这是我想出的,我使用您的代码和一些并行代码的组合测试了大约 1600 万 (2^24)。

public int CompareParallel(ushort[]x, ushort[] y, int len, int segLen)
{
int compareArrLen = ( len / segLen ) + 1;
int [ ] compareArr = new int [ compareArrLen ];
Parallel.For ( 0 , compareArrLen ,
new Action<int , ParallelLoopState> ( ( i , state ) =>
{
if ( state.LowestBreakIteration.HasValue
&& state.LowestBreakIteration.Value < i )
return;
int segEnd = ( i + 1 ) * segLen;
int k = len < segEnd ? len : segEnd;
for ( int j = i * segLen ; j < k ; j++ )
if ( x [ j ] != y [ j ] )
{
compareArr [ i ] = ( x [ j ].CompareTo ( y [ j ] ) );
state.Break ( );
return;
}
} ) );
int r = compareArrLen - 1;
while ( r >= 0 )
{
if ( compareArr [ r ] != 0 )
return compareArr [ r ];
r--;
}
return x.Length.CompareTo ( y.Length );
}

public int CompareSequential ( ushort [ ] x , ushort [ ] y, int len )
{
int pos = 0;
while ( pos < len && x [ pos ] == y [ pos ] )
pos++;

return pos < len ?
x [ pos ].CompareTo ( y [ pos ] ) :
x.Length.CompareTo ( y.Length );

}

public int Compare( ushort [ ] x , ushort [ ] y )
{
//determined through testing to be the best on my machine
const int cutOff = 4096;
int len = x.Length < y.Length ? x.Length : y.Length;
//check if len is above a specific threshold
//and if first and a number in the middle are equal
//chose equal because we know that there is a chance that more
//then 50% of the list is equal, which would make the overhead
//worth the effort
if ( len > cutOff && x [ len - 1 ] == y [ len - 1 ]
&& x [ len/2 ] == y [ len/2 ] )
{
//segment length was determined to be best through testing
//at around 8% of the size of the array seemed to have the
//on my machine
return CompareParallel ( x , y , len , (len / 100)*8 );
}
return CompareSequential ( x , y, len );
}

这是我写的测试:

class Program
{
[Flags]
private enum InfoLevel:byte
{
Detail=0x01, Summary=0x02
}

private static InfoLevel logLevel = InfoLevel.Summary;

private static void LogDetail ( string content )
{
LogInfo ( InfoLevel.Detail,content );
}

private static void LogSummary ( string content )
{
LogInfo ( InfoLevel.Summary , content );
}

private static void LogInfo ( InfoLevel level , string content )
{
if ( ( level & logLevel ) == level )
Console.WriteLine ( content );
}

private static void LogInfo ( InfoLevel level , string format,
params object[] arg )
{
if ( ( level & logLevel ) == level )
Console.WriteLine ( format:format, arg:arg );
}

private static void LogDetail ( string format , params object [ ] arg )
{
LogInfo ( InfoLevel.Detail , format, arg );
}

private static void LogSummary ( string format , params object [ ] arg )
{
LogInfo ( InfoLevel.Summary , format, arg );
}

const string _randTestResultHeader = "\r\nRandom Array Content\r\n";
const string _equalArrayResultHeader = "Only Length Different\r\n\r\n";
const string _summaryTestResultsHeader =
"Size\t\tOrig Elps\tPara Elps\tComp Elps\r\n";
const string _summaryBodyContent =
"{0}\t\t{1:0.0000}\t\t{2:0.0000}\t\t{3:0.00000}\r\n";

static void Main ( string [ ] args )
{
Console.SetOut(new StreamWriter(File.Create("out.txt")));

int segLen = 0;
int segPercent = 7;
Console.WriteLine ( "Algorithm Test, Time results in milliseconds" );
for ( ; segPercent < 13; segPercent ++ )
{
Console.WriteLine (
"Test Run with parallel Dynamic segment size at {0}%"
+" of Array Size (Comp always at 8%)\r\n" , segPercent);

StringBuilder _aggrRandResults = new StringBuilder ( );
StringBuilder _aggrEqualResults = new StringBuilder ( );

_aggrRandResults.Append ( _randTestResultHeader );
_aggrEqualResults.Append ( _equalArrayResultHeader );

_aggrEqualResults.Append ( _summaryTestResultsHeader );
_aggrRandResults.Append ( _summaryTestResultsHeader );


for ( int i = 10 ; i < 25 ; i++ )
{
int baseLen = ( int ) Math.Pow ( 2 , i );
segLen = ( baseLen / 100 ) * segPercent;

var testName = "Equal Length ";
var equalTestAverage = RandomRunTest ( testName , baseLen ,
baseLen, segLen );
testName = "Left Side Larger";
var lslargerTestAverage=RandomRunTest(testName,baseLen+10,
baseLen, segLen );
testName = "Right Side Larger";
var rslargerTestAverage = RandomRunTest ( testName , baseLen ,
baseLen + 10, segLen );

double [ ] completelyRandomTestAvg = new double [ 3 ];
for ( int l = 0 ; l < completelyRandomTestAvg.Length ; l++ )
completelyRandomTestAvg [ l ] = ( equalTestAverage [ l ] +
lslargerTestAverage [ l ] +
rslargerTestAverage [ l ] ) / 3;

LogDetail ( "\r\nRandom Test Results:" );
LogDetail ("Original Composite Test Average: {0}" ,
completelyRandomTestAvg [ 0 ] );
LogDetail ( "Parallel Composite Test Average: {0}" ,
completelyRandomTestAvg [ 1 ] );

_aggrRandResults.AppendFormat ( _summaryBodyContent ,
baseLen ,
completelyRandomTestAvg [ 0 ] ,
completelyRandomTestAvg [ 1 ] ,
completelyRandomTestAvg [ 2 ]);

testName = "Equal Len And Values";
var equalEqualTest = EqualTill ( testName , baseLen ,
baseLen, segLen );

testName = "LHS Larger";
var equalLHSLargerTest = EqualTill ( testName , baseLen + 10 ,
baseLen, segLen );

testName = "RHS Larger";
var equalRHSLargerTest = EqualTill ( testName , baseLen ,
baseLen + 10, segLen );

double [ ] mostlyEqualTestAvg = new double [ 3 ];
for ( int l = 0 ; l < mostlyEqualTestAvg.Length ; l++ )
mostlyEqualTestAvg [ l ] = ( ( equalEqualTest [ l ] +
equalLHSLargerTest [ l ] +
equalRHSLargerTest [ l ] ) / 3 );

LogDetail( "\r\nLength Different Test Results" );
LogDetail( "Original Composite Test Average: {0}" ,
mostlyEqualTestAvg [ 0 ] );
LogDetail( "Parallel Composite Test Average: {0}" ,
mostlyEqualTestAvg [ 1 ] );

_aggrEqualResults.AppendFormat ( _summaryBodyContent ,
baseLen ,
mostlyEqualTestAvg [ 0 ] ,
mostlyEqualTestAvg [ 1 ] ,
mostlyEqualTestAvg [ 2 ]);
}

LogSummary ( _aggrRandResults.ToString() + "\r\n");
LogSummary ( _aggrEqualResults.ToString()+ "\r\n");

}
Console.Out.Flush ( );
}


private const string _testBody =
"\r\n\tOriginal:: Result:{0}, Elapsed:{1}"
+"\r\n\tParallel:: Result:{2}, Elapsed:{3}"
+"\r\n\tComposite:: Result:{4}, Elapsed:{5}";
private const string _testHeader =
"\r\nTesting {0}, Array Lengths: {1}, {2}";
public static double[] RandomRunTest(string testName, int shortArr1Len,
int shortArr2Len, int parallelSegLen)
{

var shortArr1 = new ushort [ shortArr1Len ];
var shortArr2 = new ushort [ shortArr2Len ];
double [ ] avgTimes = new double [ 3 ];

LogDetail ( _testHeader , testName , shortArr1Len , shortArr2Len ) ;
for ( int i = 0 ; i < 10 ; i++ )
{
int arrlen1 = shortArr1.Length , arrlen2 = shortArr2.Length;

double[] currResults = new double [ 3 ];

FillCompareArray ( shortArr1 , shortArr1.Length );
FillCompareArray ( shortArr2 , shortArr2.Length );

var sw = new Stopwatch ( );

//Force Garbage Collection
//to avoid having it effect
//the test results this way
//test 2 may have to garbage
//collect due to running second
GC.Collect ( );
sw.Start ( );
int origResult = Compare ( shortArr1 , shortArr2 );
sw.Stop ( );
currResults[0] = sw.Elapsed.TotalMilliseconds;
sw.Reset ( );

GC.Collect ( );
sw.Start ( );
int parallelResult = CompareParallelOnly ( shortArr1 , shortArr2,
parallelSegLen );
sw.Stop ( );
currResults [ 1 ] = sw.Elapsed.TotalMilliseconds;
sw.Reset ( );

GC.Collect ( );
sw.Start ( );
int compositeResults = CompareComposite ( shortArr1 , shortArr2 );
sw.Stop ( );
currResults [ 2 ] = sw.Elapsed.TotalMilliseconds;

LogDetail ( _testBody, origResult , currResults[0] ,
parallelResult , currResults[1],
compositeResults, currResults[2]);

for ( int l = 0 ; l < currResults.Length ; l++ )
avgTimes [ l ] = ( ( avgTimes[l]*i)+currResults[l])
/ ( i + 1 );
}
LogDetail ( "\r\nAverage Run Time Original: {0}" , avgTimes[0]);
LogDetail ( "Average Run Time Parallel: {0}" , avgTimes[1]);
LogDetail ( "Average Run Time Composite: {0}" , avgTimes [ 2 ] );

return avgTimes;
}

public static double [ ] EqualTill ( string testName, int shortArr1Len ,
int shortArr2Len, int parallelSegLen)
{

const string _testHeader =
"\r\nTesting When Array Difference is "
+"Only Length({0}), Array Lengths: {1}, {2}";

int baseLen = shortArr1Len > shortArr2Len
? shortArr2Len : shortArr1Len;

var shortArr1 = new ushort [ shortArr1Len ];
var shortArr2 = new ushort [ shortArr2Len ];
double [ ] avgTimes = new double [ 3 ];

LogDetail( _testHeader , testName , shortArr1Len , shortArr2Len );
for ( int i = 0 ; i < 10 ; i++ )
{

FillCompareArray ( shortArr1 , shortArr1Len);
Array.Copy ( shortArr1 , shortArr2, baseLen );
double [ ] currElapsedTime = new double [ 3 ];
var sw = new Stopwatch ( );
//See previous explaination
GC.Collect ( );
sw.Start ( );
int origResult = Compare ( shortArr1 , shortArr2 );
sw.Stop ( );
currElapsedTime[0] = sw.Elapsed.TotalMilliseconds;
sw.Reset ( );

GC.Collect ( );
sw.Start ( );
int parallelResult = CompareParallelOnly ( shortArr1, shortArr2,
parallelSegLen );
sw.Stop ( );
currElapsedTime[1] = sw.Elapsed.TotalMilliseconds;
sw.Reset ( );

GC.Collect ( );
sw.Start ( );
var compositeResult = CompareComposite ( shortArr1 , shortArr2 );
sw.Stop ( );
currElapsedTime [ 2 ] = sw.Elapsed.TotalMilliseconds;

LogDetail ( _testBody , origResult , currElapsedTime[0] ,
parallelResult , currElapsedTime[1],
compositeResult,currElapsedTime[2]);

for ( int l = 0 ; l < currElapsedTime.Length ; l++ )
avgTimes [ l ] = ( ( avgTimes [ l ] * i )
+ currElapsedTime[l])/(i + 1);
}
LogDetail ( "\r\nAverage Run Time Original: {0}" , avgTimes [ 0 ] );
LogDetail ( "Average Run Time Parallel: {0}" , avgTimes [ 1 ] );
LogDetail ( "Average Run Time Composite: {0}" , avgTimes [ 2 ] );
return avgTimes;
}


static Random rand = new Random ( );
public static void FillCompareArray ( ushort[] compareArray, int length )
{
var retVals = new byte[length];
( rand ).NextBytes ( retVals );
Array.Copy ( retVals , compareArray , length);
}

public static int CompareParallelOnly ( ushort [ ] x , ushort[] y,
int segLen )
{
int len = x.Length<y.Length ? x.Length:y.Length;
int compareArrLen = (len/segLen)+1;
int[] compareArr = new int [ compareArrLen ];
Parallel.For ( 0 , compareArrLen ,
new Action<int , ParallelLoopState> ( ( i , state ) =>
{
if ( state.LowestBreakIteration.HasValue
&& state.LowestBreakIteration.Value < i )
return;
int segEnd = ( i + 1 ) * segLen;
int k = len<segEnd?len:segEnd;

for ( int j = i * segLen ; j < k ; j++ )
if ( x [ j ] != y [ j ] )
{
compareArr [ i ] = ( x [ j ].CompareTo ( y [ j ] ) );
state.Break ( );
return;
}
} ) );
int r=compareArrLen-1;
while ( r >= 0 )
{
if ( compareArr [ r ] != 0 )
return compareArr [ r ];
r--;
}
return x.Length.CompareTo ( y.Length );
}

public static int Compare ( ushort [ ] x , ushort [ ] y )
{
int pos = 0;
int len = Math.Min ( x.Length , y.Length );
while ( pos < len && x [ pos ] == y [ pos ] )
pos++;

return pos < len ?
x [ pos ].CompareTo ( y [ pos ] ) :
x.Length.CompareTo ( y.Length );

}

public static int CompareParallel ( ushort[] x, ushort[] y, int len,
int segLen )
{
int compareArrLen = ( len / segLen ) + 1;
int [ ] compareArr = new int [ compareArrLen ];
Parallel.For ( 0 , compareArrLen ,
new Action<int , ParallelLoopState> ( ( i , state ) =>
{
if ( state.LowestBreakIteration.HasValue
&& state.LowestBreakIteration.Value < i )
return;
int segEnd = ( i + 1 ) * segLen;
int k = len < segEnd ? len : segEnd;
for ( int j = i * segLen ; j < k ; j++ )
if ( x [ j ] != y [ j ] )
{
compareArr [ i ] = ( x [ j ].CompareTo ( y [ j ] ) );
state.Break ( );
return;
}
} ) );
int r = compareArrLen - 1;
while ( r >= 0 )
{
if ( compareArr [ r ] != 0 )
return compareArr [ r ];
r--;
}
return x.Length.CompareTo ( y.Length );
}

public static int CompareSequential(ushort [ ] x , ushort [ ] y, int len)
{
int pos = 0;
while ( pos < len && x [ pos ] == y [ pos ] )
pos++;

return pos < len ?
x [ pos ].CompareTo ( y [ pos ] ) :
x.Length.CompareTo ( y.Length );

}

public static int CompareComposite ( ushort [ ] x , ushort [ ] y )
{
const int cutOff = 4096;
int len = x.Length < y.Length ? x.Length : y.Length;

if ( len > cutOff && x [ len - 1 ] == y [ len - 1 ]
&& x [ len/2 ] == y [ len/2 ] )
return CompareParallel ( x , y , len , (len / 100)*8 );

return CompareSequential ( x , y, len );
}
}

注意:

确保您使用优化的代码进行构建,当我不包括这一步时,结果非常不同,它使并行代码看起来比实际的改进要大得多。

我得到的结果是,对于非常长的相同数字集,执行时间减少了大约 33%。它仍然随着输入的增加而线性增长,但速度较慢。对于小数据集(在我的机器上小于 4092),它的启动速度也较慢,但通常花费的时间足够小(在我的机器上为 .001 毫秒),如果你确实得到了,那么使用它是值得的一个几乎相等的大数组。

关于c# - 比较两个不同长度的整数数组时提高性能,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/15117859/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com