- c - 在位数组中找到第一个零
- linux - Unix 显示有关匹配两种模式之一的文件的信息
- 正则表达式替换多个文件
- linux - 隐藏来自 xtrace 的命令
当我遇到一个有趣的结果时,我正在做一些有趣的实验计算:
Completed 1024x1024 pixels with 700 points in...
For Loop (Inline): 19636ms
For Loop: 12612ms
Parallel.For Loop: 3835ms
这不是我所期望的。
系统:Windows 7 64位,i3 2120 [双核,4线程],Visual Studio 2010。
构建:优化开启, Release模式 [无调试器],32 位。
次要的兴趣是令人失望的 64 位性能。虽然它在比率方面更符合我的预期,但它通过整体变慢来实现这一点。
Completed 1024x1024 pixels with 700 points in...
For Loop (Inline): 23409ms
For Loop: 24373ms
Parallel.For Loop: 6839ms
计算很简单:对于索引 x 和 y,找到最接近的 Vector3 并将其存储在二维数组中。
这个问题,如果你敢的话,是试图解释为什么内联 for 循环这么慢。解释 64 位版本性能不足的奖励积分。
using System;
using System.Diagnostics;
using System.Threading.Tasks;
namespace TextureFromPoints
{
class Program
{
const int numPoints = 700;
const int textureSize = 1024;
static Random rnd = new Random();
static void Main(string[] args)
{
while (true)
{
Console.WriteLine("Starting");
Console.WriteLine();
var pointCloud = new Vector3[numPoints];
for (int i = 0; i < numPoints; i++)
pointCloud[i] = new Vector3(textureSize);
var result1 = new Vector3[textureSize, textureSize];
var result2 = new Vector3[textureSize, textureSize];
var result3 = new Vector3[textureSize, textureSize];
var sw1 = Stopwatch.StartNew();
for (int x = 0; x < textureSize; x++)
for (int y = 0; y < textureSize; y++)
{
var targetPos = new Vector3(x, y, 0);
var nearestV3 = pointCloud[0];
var nearestV3Distance = nearestV3.DistanceToPoint(targetPos);
for (int i = 1; i < numPoints; i++)
{
var currentV3 = pointCloud[i];
var currentV3Distance = currentV3.DistanceToPoint(targetPos);
if (currentV3Distance < nearestV3Distance)
{
nearestV3 = currentV3;
nearestV3Distance = currentV3Distance;
}
}
result1[x, y] = nearestV3;
}
sw1.Stop();
var sw2 = Stopwatch.StartNew();
for (int x = 0; x < textureSize; x++)
for (int y = 0; y < textureSize; y++)
Computation(pointCloud, result2, x, y);
sw2.Stop();
var sw3 = Stopwatch.StartNew();
Parallel.For(0, textureSize, x =>
{
for (int y = 0; y < textureSize; y++)
Computation(pointCloud, result3, x, y);
});
sw3.Stop();
Console.WriteLine("Completed {0}x{0} pixels with {1} points in...", textureSize, numPoints);
Console.WriteLine("{0}: {1}ms", "For Loop (Inline)", sw1.ElapsedMilliseconds);
Console.WriteLine("{0}: {1}ms", "For Loop", sw2.ElapsedMilliseconds);
Console.WriteLine("{0}: {1}ms", "Parallel.For Loop", sw3.ElapsedMilliseconds);
Console.WriteLine();
Console.Write("Verifying Data: ");
Console.WriteLine(CheckResults(result1, result2) && CheckResults(result1, result3) ? "Valid" : "Error");
Console.WriteLine(); Console.WriteLine();
Console.ReadLine();
}
}
private static bool CheckResults(Vector3[,] lhs, Vector3[,] rhs)
{
for (int x = 0; x < textureSize; x++)
for (int y = 0; y < textureSize; y++)
if (!lhs[x, y].Equals(rhs[x, y]))
return false;
return true;
}
private static void Computation(Vector3[] pointCloud, Vector3[,] result, int x, int y)
{
var targetPos = new Vector3(x, y, 0);
var nearestV3 = pointCloud[0];
var nearestV3Distance = nearestV3.DistanceToPoint(targetPos);
for (int i = 1; i < numPoints; i++)
{
var currentV3 = pointCloud[i];
var currentV3Distance = currentV3.DistanceToPoint(targetPos);
if (currentV3Distance < nearestV3Distance)
{
nearestV3 = currentV3;
nearestV3Distance = currentV3Distance;
}
}
result[x, y] = nearestV3;
}
struct Vector3
{
public float x;
public float y;
public float z;
public Vector3(float x, float y, float z)
{
this.x = x;
this.y = y;
this.z = z;
}
public Vector3(float randomDistance)
{
this.x = (float)rnd.NextDouble() * randomDistance;
this.y = (float)rnd.NextDouble() * randomDistance;
this.z = (float)rnd.NextDouble() * randomDistance;
}
public static Vector3 operator -(Vector3 a, Vector3 b)
{
return new Vector3(a.x - b.x, a.y - b.y, a.z - b.z);
}
public float sqrMagnitude()
{
return x * x + y * y + z * z;
}
public float DistanceToPoint(Vector3 point)
{
return (this - point).sqrMagnitude();
}
}
}
}
更新:感谢 Drew Marsh 的努力,我们现在有了这个内联所有 V3 操作的 super 优化版本。
using System;
using System.Diagnostics;
using System.Threading.Tasks;
namespace TextureFromPoints
{
class RevisedProgram
{
const int numPoints = 700;
const int textureSize = 1024;
static Random rnd = new Random();
static void Main(string[] args)
{
while (true)
{
Console.WriteLine("Starting REVISED");
Console.WriteLine();
var pointCloud = new Vector3[numPoints];
for (int i = 0; i < numPoints; i++)
pointCloud[i] = new Vector3(textureSize);
var result1 = new Vector3[textureSize, textureSize];
var result2 = new Vector3[textureSize, textureSize];
var result3 = new Vector3[textureSize, textureSize];
var sw1 = Inline(pointCloud, result1);
var sw2 = NotInline(pointCloud, result2);
var sw3 = Parallelized(pointCloud, result3);
Console.WriteLine("Completed {0}x{0} pixels with {1} points in...", textureSize, numPoints);
Console.WriteLine("{0}: {1}ms", "For Loop (Inline)", sw1.ElapsedMilliseconds);
Console.WriteLine("{0}: {1}ms", "For Loop", sw2.ElapsedMilliseconds);
Console.WriteLine("{0}: {1}ms", "Parallel.For Loop", sw3.ElapsedMilliseconds);
Console.WriteLine();
Console.Write("Verifying Data: ");
Console.WriteLine(CheckResults(result1, result2) && CheckResults(result1, result3) ? "Valid" : "Error");
Console.WriteLine();
Console.WriteLine();
Console.ReadLine();
}
}
private static Stopwatch Parallelized(Vector3[] pointCloud, Vector3[,] result3)
{
var sw3 = Stopwatch.StartNew();
Parallel.For(0, textureSize, x =>
{
for (int y = 0; y < textureSize; y++)
Computation(pointCloud, result3, x, y);
});
sw3.Stop();
return sw3;
}
private static Stopwatch NotInline(Vector3[] pointCloud, Vector3[,] result2)
{
var sw2 = Stopwatch.StartNew();
for (int x = 0; x < textureSize; x++)
for (int y = 0; y < textureSize; y++)
Computation(pointCloud, result2, x, y);
sw2.Stop();
return sw2;
}
private static Stopwatch Inline(Vector3[] pointCloud, Vector3[,] result1)
{
var sw1 = Stopwatch.StartNew();
for (int x = 0; x < textureSize; x++)
for (int y = 0; y < textureSize; y++)
{
var targetPos = new Vector3(x, y, 0);
var nearestV3 = pointCloud[0];
Vector3 temp1 = new Vector3(nearestV3.x - targetPos.x, nearestV3.y - targetPos.y, nearestV3.z - targetPos.z);
var nearestV3Distance = temp1.x * temp1.x + temp1.y * temp1.y + temp1.z * temp1.z;
for (int i = 1; i < numPoints; i++)
{
var currentV3 = pointCloud[i];
Vector3 temp2 = new Vector3(currentV3.x - targetPos.x, currentV3.y - targetPos.y, currentV3.z - targetPos.z);
var currentV3Distance = temp2.x * temp2.x + temp2.y * temp2.y + temp2.z * temp2.z;
if (currentV3Distance < nearestV3Distance)
{
nearestV3 = currentV3;
nearestV3Distance = currentV3Distance;
}
}
result1[x, y] = nearestV3;
}
sw1.Stop();
return sw1;
}
private static bool CheckResults(Vector3[,] lhs, Vector3[,] rhs)
{
for (int x = 0; x < textureSize; x++)
for (int y = 0; y < textureSize; y++)
if (!lhs[x, y].Equals(rhs[x, y]))
return false;
return true;
}
private static void Computation(Vector3[] pointCloud, Vector3[,] result, int x, int y)
{
var targetPos = new Vector3(x, y, 0);
var nearestV3 = pointCloud[0];
Vector3 temp1 = new Vector3(nearestV3.x - targetPos.x, nearestV3.y - targetPos.y, nearestV3.z - targetPos.z);
var nearestV3Distance = temp1.x * temp1.x + temp1.y * temp1.y + temp1.z * temp1.z;
for (int i = 1; i < numPoints; i++)
{
var currentV3 = pointCloud[i];
Vector3 temp2 = new Vector3(currentV3.x - targetPos.x, currentV3.y - targetPos.y, currentV3.z - targetPos.z);
var currentV3Distance = temp2.x * temp2.x + temp2.y * temp2.y + temp2.z * temp2.z;
if (currentV3Distance < nearestV3Distance)
{
nearestV3 = currentV3;
nearestV3Distance = currentV3Distance;
}
}
result[x, y] = nearestV3;
}
struct Vector3
{
public float x;
public float y;
public float z;
public Vector3(float x, float y, float z)
{
this.x = x;
this.y = y;
this.z = z;
}
public Vector3(float randomDistance)
{
this.x = (float)rnd.NextDouble() * randomDistance;
this.y = (float)rnd.NextDouble() * randomDistance;
this.z = (float)rnd.NextDouble() * randomDistance;
}
}
}
}
结果如下:
x86
Completed 1024x1024 pixels with 700 points in...
For Loop (Inline): 3820ms
For Loop: 3962ms
Parallel.For Loop: 1681ms
x64
Completed 1024x1024 pixels with 700 points in...
For Loop (Inline): 10978ms
For Loop: 10924ms
Parallel.For Loop: 3073ms
所以好消息是我们可以大幅提高此代码的性能 - 并使单线程版本的运行速度与其并行表亲保持一致。
坏消息是,这意味着完全放弃 x64 并手动内联所有数学。
在这个阶段,我对编译器的性能感到非常失望 - 我希望它们会好得多。
结论
这真是荒唐可悲……虽然我们真的不知道为什么我们可以有根据地猜测它是由一个愚蠢的编译器引起的。只需将编译器从 x64 更改为 x86 并进行一些手动内联,即可将 24 秒缩短为 3.8 秒,这并不是我所期望的。然而,我已经完成了我正在编写的概念证明,多亏了一个简单的空间哈希,我可以在 0.7 秒内计算出一个 1024 x 1024 的图像,其中有 70,000 个“点”——比我原来的 x64 场景快 ~340000%,并且没有线程或内衬。因此,我已经接受了一个答案 - 迫切的需求已经消失,尽管我仍在调查这个问题。
最佳答案
所有数据来自 8 核 i7、Win7、x64
令人惊讶的是,您确实获得了 5 倍。您编写的此测试的一个问题是,您已将所有三种方法都放在 Main 方法中,这迫使编译器必须创建 gobblygook 并保持同步以满足 中使用的闭包的需要Parallel.For
妨碍了内联方法。如果按如下方式分解工作,您会发现所有三种实现的性能都明显提高……至少对于 x86:
x86 之前:
For Loop (Inline): 24313ms
For Loop: 25236ms
Parallel.For Loop: 3840ms
x86 之后:
For Loop (Inline): 13007ms
For Loop: 13013ms
Parallel.For Loop: 2208ms
因此,查看我的 x86 Parallel。对于结果,您会发现它的扩展约为 ~5.9 倍,并且每个版本在隔离时都快得多。
接下来,值得注意的是,经过同样的更改后 x64 绝对没有任何增益。事实上,在 3 项测试中的 2 项测试中,每次运行的结果都略高。
x64 之前
For Loop (Inline): 24222ms
For Loop: 25197ms
Parallel.For Loop: 3810ms
x64 之后
For Loop (Inline): 25302ms
For Loop: 25209ms
Parallel.For Loop: 3821ms
除了人们一直想出这样的代码使 x64 JIT 看起来很糟糕之外,我没有直接的答案为什么 x64 会如此糟糕,所以也许其他人可以插话。
也就是说,在这样的实现中,我确实有另一件事你可能想要考虑研究:缓存行失效。 There is an awesome MSDN article here由@StephenToub 撰写,解释了这一切。长话短说;博士;就是这样,因为您的所有数据都存储在一个数组和差异中。具有不同本地 (L2) 缓存的内核将修改该阵列的部分内容,它们必须将数据与与它们重叠的其他内核同步。如果部分差异。正在处理的核心靠得太近,你最终会得到很多这样的同步,这会吞噬你的并行 yield 。这篇文章展示了一种技术,您实际上在工作数组中分配了足够的空间来分隔包含您要处理的数据的实际部分,这样当这些核心处理数据时,它们不必使另一个无效核心。 for 循环,而不是更接近 8 倍。我敢打赌,如果您投入工作来解决任何缓存行失效问题,您可以从中再挤出 10% 以上。请记住,在设置和协调并行工作时总会有一些开销,因此您永远不会获得 100% 的完美。
这是您的程序的修订版,其中每种方法都分解为单独的方法:
using System;
using System.Diagnostics;
using System.Threading.Tasks;
namespace TextureFromPoints
{
class RevisedProgram
{
const int numPoints = 700;
const int textureSize = 1024;
static Random rnd = new Random();
static void Main(string[] args)
{
while(true)
{
Console.WriteLine("Starting REVISED");
Console.WriteLine();
var pointCloud = new Vector3[numPoints];
for(int i = 0; i < numPoints; i++)
pointCloud[i] = new Vector3(textureSize);
var result1 = new Vector3[textureSize, textureSize];
var result2 = new Vector3[textureSize, textureSize];
var result3 = new Vector3[textureSize, textureSize];
var sw1 = Inline(pointCloud, result1);
var sw2 = NotInline(pointCloud, result2);
var sw3 = Parallelized(pointCloud, result3);
Console.WriteLine("Completed {0}x{0} pixels with {1} points in...", textureSize, numPoints);
Console.WriteLine("{0}: {1}ms", "For Loop (Inline)", sw1.ElapsedMilliseconds);
Console.WriteLine("{0}: {1}ms", "For Loop", sw2.ElapsedMilliseconds);
Console.WriteLine("{0}: {1}ms", "Parallel.For Loop", sw3.ElapsedMilliseconds);
Console.WriteLine();
Console.Write("Verifying Data: ");
Console.WriteLine(CheckResults(result1, result2) && CheckResults(result1, result3) ? "Valid" : "Error");
Console.WriteLine();
Console.WriteLine();
Console.ReadLine();
}
}
private static Stopwatch Parallelized(Vector3[] pointCloud, Vector3[,] result3)
{
var sw3 = Stopwatch.StartNew();
Parallel.For(0, textureSize, x =>
{
for(int y = 0; y < textureSize; y++)
Computation(pointCloud, result3, x, y);
});
sw3.Stop();
return sw3;
}
private static Stopwatch NotInline(Vector3[] pointCloud, Vector3[,] result2)
{
var sw2 = Stopwatch.StartNew();
for(int x = 0; x < textureSize; x++)
for(int y = 0; y < textureSize; y++)
Computation(pointCloud, result2, x, y);
sw2.Stop();
return sw2;
}
private static Stopwatch Inline(Vector3[] pointCloud, Vector3[,] result1)
{
var sw1 = Stopwatch.StartNew();
for(int x = 0; x < textureSize; x++)
for(int y = 0; y < textureSize; y++)
{
var targetPos = new Vector3(x, y, 0);
var nearestV3 = pointCloud[0];
var nearestV3Distance = nearestV3.DistanceToPoint(targetPos);
for(int i = 1; i < numPoints; i++)
{
var currentV3 = pointCloud[i];
var currentV3Distance = currentV3.DistanceToPoint(targetPos);
if(currentV3Distance < nearestV3Distance)
{
nearestV3 = currentV3;
nearestV3Distance = currentV3Distance;
}
}
result1[x, y] = nearestV3;
}
sw1.Stop();
return sw1;
}
private static bool CheckResults(Vector3[,] lhs, Vector3[,] rhs)
{
for(int x = 0; x < textureSize; x++)
for(int y = 0; y < textureSize; y++)
if(!lhs[x, y].Equals(rhs[x, y]))
return false;
return true;
}
private static void Computation(Vector3[] pointCloud, Vector3[,] result, int x, int y)
{
var targetPos = new Vector3(x, y, 0);
var nearestV3 = pointCloud[0];
var nearestV3Distance = nearestV3.DistanceToPoint(targetPos);
for(int i = 1; i < numPoints; i++)
{
var currentV3 = pointCloud[i];
var currentV3Distance = currentV3.DistanceToPoint(targetPos);
if(currentV3Distance < nearestV3Distance)
{
nearestV3 = currentV3;
nearestV3Distance = currentV3Distance;
}
}
result[x, y] = nearestV3;
}
struct Vector3
{
public float x;
public float y;
public float z;
public Vector3(float x, float y, float z)
{
this.x = x;
this.y = y;
this.z = z;
}
public Vector3(float randomDistance)
{
this.x = (float)rnd.NextDouble() * randomDistance;
this.y = (float)rnd.NextDouble() * randomDistance;
this.z = (float)rnd.NextDouble() * randomDistance;
}
public static Vector3 operator -(Vector3 a, Vector3 b)
{
return new Vector3(a.x - b.x, a.y - b.y, a.z - b.z);
}
public float sqrMagnitude()
{
return x * x + y * y + z * z;
}
public float DistanceToPoint(Vector3 point)
{
return (this - point).sqrMagnitude();
}
}
}
}
根据 Feng Yuan 指出的 x64 JIT 未内联的方法,您可以将程序更改为内联计算,并从 x64 版本获得比 x86 版本更好的性能。这显然很糟糕,但这是我以前见过 x64 JIT 破坏的那种东西。这是新数字:
内联 x64 之后:
For Loop (Inline): 19032ms
For Loop: 19209ms
Parallel.For Loop: 3015ms
代码的内联版本:
using System;
using System.Diagnostics;
using System.Threading.Tasks;
namespace TextureFromPoints
{
class RevisedProgram
{
const int numPoints = 700;
const int textureSize = 1024;
static Random rnd = new Random();
static void Main(string[] args)
{
while(true)
{
Console.WriteLine("Starting REVISED");
Console.WriteLine();
var pointCloud = new Vector3[numPoints];
for(int i = 0; i < numPoints; i++)
pointCloud[i] = new Vector3(textureSize);
var result1 = new Vector3[textureSize, textureSize];
var result2 = new Vector3[textureSize, textureSize];
var result3 = new Vector3[textureSize, textureSize];
var sw1 = Inline(pointCloud, result1);
var sw2 = NotInline(pointCloud, result2);
var sw3 = Parallelized(pointCloud, result3);
Console.WriteLine("Completed {0}x{0} pixels with {1} points in...", textureSize, numPoints);
Console.WriteLine("{0}: {1}ms", "For Loop (Inline)", sw1.ElapsedMilliseconds);
Console.WriteLine("{0}: {1}ms", "For Loop", sw2.ElapsedMilliseconds);
Console.WriteLine("{0}: {1}ms", "Parallel.For Loop", sw3.ElapsedMilliseconds);
Console.WriteLine();
Console.Write("Verifying Data: ");
Console.WriteLine(CheckResults(result1, result2) && CheckResults(result1, result3) ? "Valid" : "Error");
Console.WriteLine();
Console.WriteLine();
Console.ReadLine();
}
}
private static Stopwatch Parallelized(Vector3[] pointCloud, Vector3[,] result3)
{
var sw3 = Stopwatch.StartNew();
Parallel.For(0, textureSize, x =>
{
for(int y = 0; y < textureSize; y++)
Computation(pointCloud, result3, x, y);
});
sw3.Stop();
return sw3;
}
private static Stopwatch NotInline(Vector3[] pointCloud, Vector3[,] result2)
{
var sw2 = Stopwatch.StartNew();
for(int x = 0; x < textureSize; x++)
for(int y = 0; y < textureSize; y++)
Computation(pointCloud, result2, x, y);
sw2.Stop();
return sw2;
}
private static Stopwatch Inline(Vector3[] pointCloud, Vector3[,] result1)
{
var sw1 = Stopwatch.StartNew();
for(int x = 0; x < textureSize; x++)
for(int y = 0; y < textureSize; y++)
{
var targetPos = new Vector3(x, y, 0);
var nearestV3 = pointCloud[0];
Vector3 temp1 = nearestV3 - targetPos;
var nearestV3Distance = temp1.x * temp1.x + temp1.y * temp1.y + temp1.z * temp1.z;
for(int i = 1; i < numPoints; i++)
{
var currentV3 = pointCloud[i];
Vector3 temp2 = currentV3 - targetPos;
var currentV3Distance = temp2.x * temp2.x + temp2.y * temp2.y + temp2.z * temp2.z;
if(currentV3Distance < nearestV3Distance)
{
nearestV3 = currentV3;
nearestV3Distance = currentV3Distance;
}
}
result1[x, y] = nearestV3;
}
sw1.Stop();
return sw1;
}
private static bool CheckResults(Vector3[,] lhs, Vector3[,] rhs)
{
for(int x = 0; x < textureSize; x++)
for(int y = 0; y < textureSize; y++)
if(!lhs[x, y].Equals(rhs[x, y]))
return false;
return true;
}
private static void Computation(Vector3[] pointCloud, Vector3[,] result, int x, int y)
{
var targetPos = new Vector3(x, y, 0);
var nearestV3 = pointCloud[0];
Vector3 temp1 = nearestV3 - targetPos;
var nearestV3Distance = temp1.x * temp1.x + temp1.y * temp1.y + temp1.z * temp1.z;
for(int i = 1; i < numPoints; i++)
{
var currentV3 = pointCloud[i];
Vector3 temp2 = currentV3 - targetPos;
var currentV3Distance = temp2.x * temp2.x + temp2.y * temp2.y + temp2.z * temp2.z;
if(currentV3Distance < nearestV3Distance)
{
nearestV3 = currentV3;
nearestV3Distance = currentV3Distance;
}
}
result[x, y] = nearestV3;
}
private static float DistanceToPoint(Vector3 vector, Vector3 point)
{
Vector3 final = vector - point;
return final.x * final.x + final.y * final.y + final.z * final.z;
}
struct Vector3
{
public float x;
public float y;
public float z;
public Vector3(float x, float y, float z)
{
this.x = x;
this.y = y;
this.z = z;
}
public Vector3(float randomDistance)
{
this.x = (float)rnd.NextDouble() * randomDistance;
this.y = (float)rnd.NextDouble() * randomDistance;
this.z = (float)rnd.NextDouble() * randomDistance;
}
public static Vector3 operator -(Vector3 a, Vector3 b)
{
return new Vector3(a.x - b.x, a.y - b.y, a.z - b.z);
}
}
}
}
关于c# - 在双核上使用 Parallel.For... 实现 5 倍性能?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/11572635/
在 Oracle 中,PARALLEL 被广泛使用。提示 PARALLEL、PARALLEL(8) 和 PARALLEL(a,8) 有什么区别。如何选择最佳的查询提示? SELECT /*+ PARA
好的,我希望以前没有问过这个问题,因为在搜索中很难找到。 我查看了 F95 手册,但仍然觉得这很模糊: For the simple case of: DO i=0,99 END DO 我正
我有一个 C-shell 脚本,其中有一个名为 $hosts_string 的变量,格式为: host1,host2,...,hostN 我还有一个名为 $chrs_string 的变量,其形式为:
是否可以从由gnu parallel产生的脚本的多次运行中调用gnu parallel? 我有一个python脚本,可以运行100个顺序顺序迭代,并且在每次迭代中的某处,并行计算4个值(使用gnu p
我想在几个输入上运行几个长时间运行的进程。例如。: solver_a problem_1 solver_b problem_1 ... solver_b problem_18 solver_c pro
TParallel.&For 和 TParallel.For 之间有区别吗? 两者都可以在 Delphi 10 Seattle 中编译。那么我应该坚持哪一个呢? 最佳答案 TParallel.&For
我第一次使用 julia 进行并行计算.我有点头疼。所以假设我开始 julia如下:julia -p 4 .然后我为所有处理器声明 a 函数,然后将它与 pmap 一起使用还有@parallel fo
关闭。这个问题是off-topic .它目前不接受答案。 想改善这个问题吗? Update the question所以它是 on-topic对于堆栈溢出。 10年前关闭。 Improve this
我有一堆相互排斥的方法,因此可以并行运行。有这样做的好方法吗?到目前为止,我有以下两种实现方式,但我不确定是否应该选择其中一种。 使用 Parallel.For : Parallel.For(0, 2
我对并行运行脚本很感兴趣,并且我已经开始查看 GNU 并行工具,但是我遇到了一些麻烦。我的脚本 doSomething 有 3 个参数,我想在参数的不同值上并行运行脚本。我该怎么做? 我试过:para
我需要在多核(和多线程)机器上运行多个作业。我正在使用 GNU Parallel utility跨核心分配作业以加速任务。要执行的命令在名为“命令”的文件中可用。我使用以下命令运行 GNU Paral
我正在尝试使用如下两个输入运行 Python 脚本。我得到了大约 300 个这两个输入,所以我想知道是否有人可以建议如何并行运行它们。 单次运行看起来像: python stable.py KOG_1
每天我都必须更新一堆存储库,并在其中一些中执行另一个命令(来自 CARTON,Perl 模块依赖管理器)。我总是使用循环来执行此操作,但我想与 并行执行GNU 并行 如果可能,但我不太了解它的tuto
正如标题所说:@parallel 之间究竟有什么区别?和 pmap ?我的意思不是明显的一个是循环的宏,另一个适用于函数,我的意思是它们的实现究竟有什么不同,我应该如何使用这些知识在它们之间进行选择?
我有一些矩阵乘法运算。我想通过多个处理器并行执行这些操作。这可以使用 MPI(消息传递接口(interface))在高性能计算集群上完成。 同样,我可以使用多个辅助角色在云中进行一些并行化吗?有什么办
joblib模块提供了一个简单的帮助程序类,以使用多处理并行编写循环的循环。 这段代码使用列表推导来完成这项工作: import time from math import sqrt from job
我的问题是这样的one .但我想做一些不同的事情... 例如,在我的并行区域内,我想在 4 个线程上运行我的代码。当每个线程进入 for 循环时,我想在 8 个线程上运行我的代码。像 #pramga
我正在尝试使用 ipython 并行库中的并行计算。但是我对此知之甚少,而且我发现很难从对并行计算一无所知的人那里阅读该文档。 有趣的是,我发现的所有教程都只是重复使用文档中的示例,并使用相同的解释,
我的项目结构看起来像 Root + subproj1 + subproj2 在每个子项目中定义了自己的任务 run(){}。 我想要做的是从 Root 项目的运行任务并行运行 :subpro
我有一个 Foo ID 的列表。我需要为每个 ID 调用一个存储过程。 例如 Guid[] siteIds = ...; // typically contains 100 to 300 elemen
我是一名优秀的程序员,十分优秀!