gpt4 book ai didi

c++ - OpenMP 并行代码运行速度较慢

转载 作者:太空宇宙 更新时间:2023-11-04 04:48:51 25 4
gpt4 key购买 nike

我看过很多这样的帖子,尽管阅读了大量内容,但我似乎无法在 OpenMP 中正确并行化以下代码,因为串行版本目前运行速度比这快得多:

static double red_black_parallel_for_step(simulation* simObj, double stepSize, double* red, double* black){
double tmp = 0.0;
double avg = 0.0;
double old = 0.0;
double max = -HUGE_VAL;
#pragma omp parallel \
shared(black, red, max) \
firstprivate(old, avg, tmp)
{
double priv_max = -HUGE_VAL;
#pragma omp for
for(unsigned int j = 0; j < (*simObj).NY+2; j++){
for(unsigned int i = 0; i < (int)floor((double)((*simObj).NX+2.0)/2.0); i++){
for(unsigned int k = 1; k < (*simObj).NZ; k++){
if(red[IX3] == HUGE_VAL) continue;
old = red[IX3];
avg = 0.0;
const int x1 = ( black[IX3+IX3_XR1STEP] != HUGE_VAL );
const int x2 = ( black[IX3+IX3_XR2STEP] != HUGE_VAL );
const int y1 = ( black[IX3+IX3_YSTEP] != HUGE_VAL );
const int y2 = ( black[IX3-IX3_YSTEP] != HUGE_VAL );
const int z1 = ( black[IX3+IX3_ZSTEP] != HUGE_VAL );
const int z2 = ( black[IX3-IX3_ZSTEP] != HUGE_VAL );
if (x1) avg += black[IX3+IX3_XR1STEP];
if (x2) avg += black[IX3+IX3_XR2STEP];
if (y1) avg += black[IX3+IX3_YSTEP];
if (y2) avg += black[IX3-IX3_YSTEP];
if (z1) avg += black[IX3+IX3_ZSTEP];
if (z2) avg += black[IX3-IX3_ZSTEP];
avg /= (double) (x1+x2+y1+y2+z1+z2);
red[IX3] = old + stepSize * (avg - old);
tmp = fabs(old - red[IX3]) / fabs(old);
if( tmp > priv_max ) priv_max = tmp;
}
}
}
#pragma omp flush (max)
if ( priv_max > max ) {
#pragma omp critical
{
if ( priv_max > max ) max = priv_max;
}
}
}
#pragma omp parallel \
shared(black, red, max) \
firstprivate(old, avg, tmp)
{
double priv_max = -HUGE_VAL;
#pragma omp for
for(unsigned int j = 0; j < (*simObj).NY+2; j++){
for(unsigned int i = 0; i < (int)floor((double)((*simObj).NX+2)/2.0); i++){
for (unsigned int k = 1; k < (*simObj).NZ; k++ ){
if ( black[IX3] == HUGE_VAL ) continue;
old = black[IX3];
avg = 0.0;
const int x1 = ( red[IX3+IX3_XB1STEP] != HUGE_VAL );
const int x2 = ( red[IX3+IX3_XB2STEP] != HUGE_VAL );
const int y1 = ( red[IX3+IX3_YSTEP] != HUGE_VAL );
const int y2 = ( red[IX3-IX3_YSTEP] != HUGE_VAL );
const int z1 = ( red[IX3+IX3_ZSTEP] != HUGE_VAL );
const int z2 = ( red[IX3-IX3_ZSTEP] != HUGE_VAL );
if (x1) avg += red[IX3+IX3_XB1STEP];
if (x2) avg += red[IX3+IX3_XB2STEP];
if (y1) avg += red[IX3+IX3_YSTEP];
if (y2) avg += red[IX3-IX3_YSTEP];
if (z1) avg += red[IX3+IX3_ZSTEP];
if (z2) avg += red[IX3-IX3_ZSTEP];
avg /= (double) (x1+x2+y1+y2+z1+z2);
black[IX3] = old + stepSize * (avg - old);
tmp = fabs(old - black[IX3]) / fabs(old);
if( tmp > priv_max ) priv_max = tmp;
}
}
}
#pragma omp flush (max)
if ( priv_max > max ) {
#pragma omp critical
{
if ( priv_max > max ) max = priv_max;
}
}
}
return max;
}

复杂的因素是我需要跟踪红色/黑色迭代之间的最大相对变化 (max)。任何帮助将非常感激。

最佳答案

仅在比较之后尝试刷新,并且仅在关键 block 内:

/* not here: #pragma omp flush (max) */
if ( priv_max > max ) { // this should filter out most of the flush operations
#pragma omp critical
{
if ( priv_max > max ) max = priv_max; // now flush; this operation will be exclusive/"critical"
#pragma omp flush (max)
}
}

关于c++ - OpenMP 并行代码运行速度较慢,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/18070146/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com