- c - 在位数组中找到第一个零
- linux - Unix 显示有关匹配两种模式之一的文件的信息
- 正则表达式替换多个文件
- linux - 隐藏来自 xtrace 的命令
我有一个瓶颈(或者至少是我认为我可以做得更好的领域)这个比较器,它基本上是一个序数字符串比较器,但对整数(ushort,虽然我认为这不重要)数组有效。
数组可以有不同的长度,但长度只有在元素 [0..n] 匹配时才有意义,其中 n 是最短数组的长度。在这种情况下,较长的数组被认为“更大”。
1 2 3 < 1 2 4
1 2 3 5 < 1 2 4
1 2 5 3 > 1 2 4
1 2 3 4 > 1 2 3
public int Compare(ushort[] x, ushort[] y)
{
int pos = 0;
int len = Math.Min(x.Length, y.Length);
while (pos < len && x[pos] == y[pos])
pos++;
return pos < len ?
x[pos].CompareTo(y[pos]) :
x.Length.CompareTo(y.Length);
}
关于如何优化它有什么想法吗?
更新
回应一位评论者关于我在这里实际做的事情:我意识到很久以前我问过一个与此相关的问题,它准确地显示了我在上下文中所做的事情。唯一的主要区别是我现在使用 ushort
数组而不是字符串作为键,因为它更紧凑。
使用整个路径作为键让我可以使用部分键从排序集中获取 View ,从而为子集查询提供高性能。我试图在构建索引时提高性能。 Data structure for indexed searches of subsets
顺便说一下,到目前为止,我对这里的回答印象深刻,多年来我就 SO 提出了很多问题,但这无疑是我见过的最周到、最有趣的答案集。我还不确定我的具体问题的正确答案是什么(即 - 短数组的数百万次比较),但他们中的每一个都教会了我一些我不知道的东西。
最佳答案
这是我想出的,我使用您的代码和一些并行代码的组合测试了大约 1600 万 (2^24)。
public int CompareParallel(ushort[]x, ushort[] y, int len, int segLen)
{
int compareArrLen = ( len / segLen ) + 1;
int [ ] compareArr = new int [ compareArrLen ];
Parallel.For ( 0 , compareArrLen ,
new Action<int , ParallelLoopState> ( ( i , state ) =>
{
if ( state.LowestBreakIteration.HasValue
&& state.LowestBreakIteration.Value < i )
return;
int segEnd = ( i + 1 ) * segLen;
int k = len < segEnd ? len : segEnd;
for ( int j = i * segLen ; j < k ; j++ )
if ( x [ j ] != y [ j ] )
{
compareArr [ i ] = ( x [ j ].CompareTo ( y [ j ] ) );
state.Break ( );
return;
}
} ) );
int r = compareArrLen - 1;
while ( r >= 0 )
{
if ( compareArr [ r ] != 0 )
return compareArr [ r ];
r--;
}
return x.Length.CompareTo ( y.Length );
}
public int CompareSequential ( ushort [ ] x , ushort [ ] y, int len )
{
int pos = 0;
while ( pos < len && x [ pos ] == y [ pos ] )
pos++;
return pos < len ?
x [ pos ].CompareTo ( y [ pos ] ) :
x.Length.CompareTo ( y.Length );
}
public int Compare( ushort [ ] x , ushort [ ] y )
{
//determined through testing to be the best on my machine
const int cutOff = 4096;
int len = x.Length < y.Length ? x.Length : y.Length;
//check if len is above a specific threshold
//and if first and a number in the middle are equal
//chose equal because we know that there is a chance that more
//then 50% of the list is equal, which would make the overhead
//worth the effort
if ( len > cutOff && x [ len - 1 ] == y [ len - 1 ]
&& x [ len/2 ] == y [ len/2 ] )
{
//segment length was determined to be best through testing
//at around 8% of the size of the array seemed to have the
//on my machine
return CompareParallel ( x , y , len , (len / 100)*8 );
}
return CompareSequential ( x , y, len );
}
这是我写的测试:
class Program
{
[Flags]
private enum InfoLevel:byte
{
Detail=0x01, Summary=0x02
}
private static InfoLevel logLevel = InfoLevel.Summary;
private static void LogDetail ( string content )
{
LogInfo ( InfoLevel.Detail,content );
}
private static void LogSummary ( string content )
{
LogInfo ( InfoLevel.Summary , content );
}
private static void LogInfo ( InfoLevel level , string content )
{
if ( ( level & logLevel ) == level )
Console.WriteLine ( content );
}
private static void LogInfo ( InfoLevel level , string format,
params object[] arg )
{
if ( ( level & logLevel ) == level )
Console.WriteLine ( format:format, arg:arg );
}
private static void LogDetail ( string format , params object [ ] arg )
{
LogInfo ( InfoLevel.Detail , format, arg );
}
private static void LogSummary ( string format , params object [ ] arg )
{
LogInfo ( InfoLevel.Summary , format, arg );
}
const string _randTestResultHeader = "\r\nRandom Array Content\r\n";
const string _equalArrayResultHeader = "Only Length Different\r\n\r\n";
const string _summaryTestResultsHeader =
"Size\t\tOrig Elps\tPara Elps\tComp Elps\r\n";
const string _summaryBodyContent =
"{0}\t\t{1:0.0000}\t\t{2:0.0000}\t\t{3:0.00000}\r\n";
static void Main ( string [ ] args )
{
Console.SetOut(new StreamWriter(File.Create("out.txt")));
int segLen = 0;
int segPercent = 7;
Console.WriteLine ( "Algorithm Test, Time results in milliseconds" );
for ( ; segPercent < 13; segPercent ++ )
{
Console.WriteLine (
"Test Run with parallel Dynamic segment size at {0}%"
+" of Array Size (Comp always at 8%)\r\n" , segPercent);
StringBuilder _aggrRandResults = new StringBuilder ( );
StringBuilder _aggrEqualResults = new StringBuilder ( );
_aggrRandResults.Append ( _randTestResultHeader );
_aggrEqualResults.Append ( _equalArrayResultHeader );
_aggrEqualResults.Append ( _summaryTestResultsHeader );
_aggrRandResults.Append ( _summaryTestResultsHeader );
for ( int i = 10 ; i < 25 ; i++ )
{
int baseLen = ( int ) Math.Pow ( 2 , i );
segLen = ( baseLen / 100 ) * segPercent;
var testName = "Equal Length ";
var equalTestAverage = RandomRunTest ( testName , baseLen ,
baseLen, segLen );
testName = "Left Side Larger";
var lslargerTestAverage=RandomRunTest(testName,baseLen+10,
baseLen, segLen );
testName = "Right Side Larger";
var rslargerTestAverage = RandomRunTest ( testName , baseLen ,
baseLen + 10, segLen );
double [ ] completelyRandomTestAvg = new double [ 3 ];
for ( int l = 0 ; l < completelyRandomTestAvg.Length ; l++ )
completelyRandomTestAvg [ l ] = ( equalTestAverage [ l ] +
lslargerTestAverage [ l ] +
rslargerTestAverage [ l ] ) / 3;
LogDetail ( "\r\nRandom Test Results:" );
LogDetail ("Original Composite Test Average: {0}" ,
completelyRandomTestAvg [ 0 ] );
LogDetail ( "Parallel Composite Test Average: {0}" ,
completelyRandomTestAvg [ 1 ] );
_aggrRandResults.AppendFormat ( _summaryBodyContent ,
baseLen ,
completelyRandomTestAvg [ 0 ] ,
completelyRandomTestAvg [ 1 ] ,
completelyRandomTestAvg [ 2 ]);
testName = "Equal Len And Values";
var equalEqualTest = EqualTill ( testName , baseLen ,
baseLen, segLen );
testName = "LHS Larger";
var equalLHSLargerTest = EqualTill ( testName , baseLen + 10 ,
baseLen, segLen );
testName = "RHS Larger";
var equalRHSLargerTest = EqualTill ( testName , baseLen ,
baseLen + 10, segLen );
double [ ] mostlyEqualTestAvg = new double [ 3 ];
for ( int l = 0 ; l < mostlyEqualTestAvg.Length ; l++ )
mostlyEqualTestAvg [ l ] = ( ( equalEqualTest [ l ] +
equalLHSLargerTest [ l ] +
equalRHSLargerTest [ l ] ) / 3 );
LogDetail( "\r\nLength Different Test Results" );
LogDetail( "Original Composite Test Average: {0}" ,
mostlyEqualTestAvg [ 0 ] );
LogDetail( "Parallel Composite Test Average: {0}" ,
mostlyEqualTestAvg [ 1 ] );
_aggrEqualResults.AppendFormat ( _summaryBodyContent ,
baseLen ,
mostlyEqualTestAvg [ 0 ] ,
mostlyEqualTestAvg [ 1 ] ,
mostlyEqualTestAvg [ 2 ]);
}
LogSummary ( _aggrRandResults.ToString() + "\r\n");
LogSummary ( _aggrEqualResults.ToString()+ "\r\n");
}
Console.Out.Flush ( );
}
private const string _testBody =
"\r\n\tOriginal:: Result:{0}, Elapsed:{1}"
+"\r\n\tParallel:: Result:{2}, Elapsed:{3}"
+"\r\n\tComposite:: Result:{4}, Elapsed:{5}";
private const string _testHeader =
"\r\nTesting {0}, Array Lengths: {1}, {2}";
public static double[] RandomRunTest(string testName, int shortArr1Len,
int shortArr2Len, int parallelSegLen)
{
var shortArr1 = new ushort [ shortArr1Len ];
var shortArr2 = new ushort [ shortArr2Len ];
double [ ] avgTimes = new double [ 3 ];
LogDetail ( _testHeader , testName , shortArr1Len , shortArr2Len ) ;
for ( int i = 0 ; i < 10 ; i++ )
{
int arrlen1 = shortArr1.Length , arrlen2 = shortArr2.Length;
double[] currResults = new double [ 3 ];
FillCompareArray ( shortArr1 , shortArr1.Length );
FillCompareArray ( shortArr2 , shortArr2.Length );
var sw = new Stopwatch ( );
//Force Garbage Collection
//to avoid having it effect
//the test results this way
//test 2 may have to garbage
//collect due to running second
GC.Collect ( );
sw.Start ( );
int origResult = Compare ( shortArr1 , shortArr2 );
sw.Stop ( );
currResults[0] = sw.Elapsed.TotalMilliseconds;
sw.Reset ( );
GC.Collect ( );
sw.Start ( );
int parallelResult = CompareParallelOnly ( shortArr1 , shortArr2,
parallelSegLen );
sw.Stop ( );
currResults [ 1 ] = sw.Elapsed.TotalMilliseconds;
sw.Reset ( );
GC.Collect ( );
sw.Start ( );
int compositeResults = CompareComposite ( shortArr1 , shortArr2 );
sw.Stop ( );
currResults [ 2 ] = sw.Elapsed.TotalMilliseconds;
LogDetail ( _testBody, origResult , currResults[0] ,
parallelResult , currResults[1],
compositeResults, currResults[2]);
for ( int l = 0 ; l < currResults.Length ; l++ )
avgTimes [ l ] = ( ( avgTimes[l]*i)+currResults[l])
/ ( i + 1 );
}
LogDetail ( "\r\nAverage Run Time Original: {0}" , avgTimes[0]);
LogDetail ( "Average Run Time Parallel: {0}" , avgTimes[1]);
LogDetail ( "Average Run Time Composite: {0}" , avgTimes [ 2 ] );
return avgTimes;
}
public static double [ ] EqualTill ( string testName, int shortArr1Len ,
int shortArr2Len, int parallelSegLen)
{
const string _testHeader =
"\r\nTesting When Array Difference is "
+"Only Length({0}), Array Lengths: {1}, {2}";
int baseLen = shortArr1Len > shortArr2Len
? shortArr2Len : shortArr1Len;
var shortArr1 = new ushort [ shortArr1Len ];
var shortArr2 = new ushort [ shortArr2Len ];
double [ ] avgTimes = new double [ 3 ];
LogDetail( _testHeader , testName , shortArr1Len , shortArr2Len );
for ( int i = 0 ; i < 10 ; i++ )
{
FillCompareArray ( shortArr1 , shortArr1Len);
Array.Copy ( shortArr1 , shortArr2, baseLen );
double [ ] currElapsedTime = new double [ 3 ];
var sw = new Stopwatch ( );
//See previous explaination
GC.Collect ( );
sw.Start ( );
int origResult = Compare ( shortArr1 , shortArr2 );
sw.Stop ( );
currElapsedTime[0] = sw.Elapsed.TotalMilliseconds;
sw.Reset ( );
GC.Collect ( );
sw.Start ( );
int parallelResult = CompareParallelOnly ( shortArr1, shortArr2,
parallelSegLen );
sw.Stop ( );
currElapsedTime[1] = sw.Elapsed.TotalMilliseconds;
sw.Reset ( );
GC.Collect ( );
sw.Start ( );
var compositeResult = CompareComposite ( shortArr1 , shortArr2 );
sw.Stop ( );
currElapsedTime [ 2 ] = sw.Elapsed.TotalMilliseconds;
LogDetail ( _testBody , origResult , currElapsedTime[0] ,
parallelResult , currElapsedTime[1],
compositeResult,currElapsedTime[2]);
for ( int l = 0 ; l < currElapsedTime.Length ; l++ )
avgTimes [ l ] = ( ( avgTimes [ l ] * i )
+ currElapsedTime[l])/(i + 1);
}
LogDetail ( "\r\nAverage Run Time Original: {0}" , avgTimes [ 0 ] );
LogDetail ( "Average Run Time Parallel: {0}" , avgTimes [ 1 ] );
LogDetail ( "Average Run Time Composite: {0}" , avgTimes [ 2 ] );
return avgTimes;
}
static Random rand = new Random ( );
public static void FillCompareArray ( ushort[] compareArray, int length )
{
var retVals = new byte[length];
( rand ).NextBytes ( retVals );
Array.Copy ( retVals , compareArray , length);
}
public static int CompareParallelOnly ( ushort [ ] x , ushort[] y,
int segLen )
{
int len = x.Length<y.Length ? x.Length:y.Length;
int compareArrLen = (len/segLen)+1;
int[] compareArr = new int [ compareArrLen ];
Parallel.For ( 0 , compareArrLen ,
new Action<int , ParallelLoopState> ( ( i , state ) =>
{
if ( state.LowestBreakIteration.HasValue
&& state.LowestBreakIteration.Value < i )
return;
int segEnd = ( i + 1 ) * segLen;
int k = len<segEnd?len:segEnd;
for ( int j = i * segLen ; j < k ; j++ )
if ( x [ j ] != y [ j ] )
{
compareArr [ i ] = ( x [ j ].CompareTo ( y [ j ] ) );
state.Break ( );
return;
}
} ) );
int r=compareArrLen-1;
while ( r >= 0 )
{
if ( compareArr [ r ] != 0 )
return compareArr [ r ];
r--;
}
return x.Length.CompareTo ( y.Length );
}
public static int Compare ( ushort [ ] x , ushort [ ] y )
{
int pos = 0;
int len = Math.Min ( x.Length , y.Length );
while ( pos < len && x [ pos ] == y [ pos ] )
pos++;
return pos < len ?
x [ pos ].CompareTo ( y [ pos ] ) :
x.Length.CompareTo ( y.Length );
}
public static int CompareParallel ( ushort[] x, ushort[] y, int len,
int segLen )
{
int compareArrLen = ( len / segLen ) + 1;
int [ ] compareArr = new int [ compareArrLen ];
Parallel.For ( 0 , compareArrLen ,
new Action<int , ParallelLoopState> ( ( i , state ) =>
{
if ( state.LowestBreakIteration.HasValue
&& state.LowestBreakIteration.Value < i )
return;
int segEnd = ( i + 1 ) * segLen;
int k = len < segEnd ? len : segEnd;
for ( int j = i * segLen ; j < k ; j++ )
if ( x [ j ] != y [ j ] )
{
compareArr [ i ] = ( x [ j ].CompareTo ( y [ j ] ) );
state.Break ( );
return;
}
} ) );
int r = compareArrLen - 1;
while ( r >= 0 )
{
if ( compareArr [ r ] != 0 )
return compareArr [ r ];
r--;
}
return x.Length.CompareTo ( y.Length );
}
public static int CompareSequential(ushort [ ] x , ushort [ ] y, int len)
{
int pos = 0;
while ( pos < len && x [ pos ] == y [ pos ] )
pos++;
return pos < len ?
x [ pos ].CompareTo ( y [ pos ] ) :
x.Length.CompareTo ( y.Length );
}
public static int CompareComposite ( ushort [ ] x , ushort [ ] y )
{
const int cutOff = 4096;
int len = x.Length < y.Length ? x.Length : y.Length;
if ( len > cutOff && x [ len - 1 ] == y [ len - 1 ]
&& x [ len/2 ] == y [ len/2 ] )
return CompareParallel ( x , y , len , (len / 100)*8 );
return CompareSequential ( x , y, len );
}
}
注意:
确保您使用优化的代码进行构建,当我不包括这一步时,结果非常不同,它使并行代码看起来比实际的改进要大得多。
我得到的结果是,对于非常长的相同数字集,执行时间减少了大约 33%。它仍然随着输入的增加而线性增长,但速度较慢。对于小数据集(在我的机器上小于 4092),它的启动速度也较慢,但通常花费的时间足够小(在我的机器上为 .001 毫秒),如果你确实得到了,那么使用它是值得的一个几乎相等的大数组。
关于c# - 比较两个不同长度的整数数组时提高性能,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/15117859/
将 KLV 字符串拆分为键、长度、值作为元素的列表/元组的更有效方法是什么? 为了添加一点背景,前 3 位数字作为键,接下来的 2 位表示值的长度。 我已经能够使用以下代码解决该问题。但我不认为我的代
首先,我试图从文件中提取视频持续时间,然后在无需实际上传文件的情况下显示它。 当用户选择视频时 - 信息将显示在其下方,包括文件名、文件大小、文件类型。不管我的技能多么糟糕 - 我无法显示持续时间。我
我是 Scala 编程新手,这是我的问题:如何计算每行的字符串数量?我的数据框由一列 Array[String] 类型组成。 friendsDF: org.apache.spark.sql.DataF
我有一个React Web应用程序(create-react-app),该应用程序使用react-hook-forms上传歌曲并使用axios将其发送到我的Node / express服务器。 我想确
如果给你一个网络掩码(例如 255.255.255.0),你如何在 Java 中获得它的长度/位(例如 8)? 最佳答案 如果您想找出整数低端有多少个零位,请尝试 Integer.numberOfTr
我需要使用 jQuery 获取 div 数量的长度。 我可以得到它,但在两个单击事件中声明变量,但这似乎是错误的,然后我还需要使用它来根据数字显示隐藏按钮。我觉得我不必将代码加倍。 在这里摆弄 htt
我对此感到非常绝望,到目前为止我在 www 上找不到任何东西。 情况如下: 我正在使用 Python。 我有 3 个数组:x 坐标、y 坐标和半径。 我想使用给定的 x 和 y 坐标创建散点图。 到目
我有一个表单,我通过 jQuery 的加载函数动态添加新的输入和选择元素。有时加载的元素故意为空,在这种情况下我想隐藏容器 div,这样它就不会破坏样式。 问题是,我似乎无法计算加载的元素,因此不知道
我决定通过替换来使我的代码更清晰 if (wrappedSet.length > 0) 类似 if (wrappedSet.exists()) 是否有任何 native jq 函数可以实现此目的?或者
简单的问题。如果我有一个如下表: CREATE TABLE `exampletable` ( `id` int(11) NOT NULL AUTO_INCREMENT, `textfield`
我正在使用经典 ASP/MySQL 将长用户输入插入到我的数据库中,该输入是从富文本编辑器生成的。该列设置为 LONG-TEXT。 作为参数化查询(准备语句)的新手,我不确定用于此特定查询的数据长度。
我正在获取 Stripe 交易费用的值(value)并通过禁用的文本字段显示它。 由于输入文本域,句子出现较大空隙 This is the amount $3.50____________that n
我有一个 div,其背景图像的大小设置为包含。但是,图像是视网膜计算机(Macbook Pro 等)的双分辨率图像,所以我希望能够以某种方式让页面知道即使我说的是背景大小:包含 200x200 图像,
我正在开发一个具有“已保存”和“已完成”模块的小部件。当我删除元素时,它会从 dom 中删除/淡化它,但是当我将其标记为完成时,它会将其克隆到已完成的选项卡。这工作很棒,但顶部括号内的数字不适合我。这
我有一个来自 json 提要的数组,我知道在 jArray 中有一个联盟,但我需要计算出该数组的计数,以防稍后将第二个添加到提要中。目前 log cat 没有注销“teamFeedStructure”
目标:给定一个混合类型的数组,确定每个级别的元素数量。如果同一层有两个子数组,则它们的每个元素都计入该层元素的总数。 方法: Array.prototype.elementsAtLevels = fu
我需要帮助为 Java 中的单链表制作 int size(); 方法。 这是我目前所拥有的,但它没有返回正确的列表大小。 public int size() { int size = 0;
我正在为学校作业创建一个文件服务器应用程序。我目前拥有的是一个简单的 Client 类,它通过 TCP 发送图像,还有一个 Server 类接收图像并将其写入文件。 这是我的客户端代码 import
我有这对功能 (,) length :: Foldable t => t a -> b -> (Int, b) 和, head :: [a] -> a 我想了解的类型 (,) length he
我正在GitHub Pages上使用Jekyll来构建博客,并希望获得传递给YAML前题中Liquid模板的page.title字符串的长度,该字符串在每个帖子的YAML主题中。我还没有找到一种简单的
我是一名优秀的程序员,十分优秀!