gpt4 book ai didi

c - #pragma omp parallel 用于降低速度而不是增加速度 - 在格子玻尔兹曼中

转载 作者:行者123 更新时间:2023-11-30 17:46:08 25 4
gpt4 key购买 nike

我是 OpenMP 新手,我正在尝试并行化此循环:

int ii,jj,kk;                 /* generic counters */
const double c_sq = 1.0/3.0; /* square of speed of sound */
const double w0 = 4.0/9.0; /* weighting factor */
const double w1 = 1.0/9.0; /* weighting factor */
const double w2 = 1.0/36.0; /* weighting factor */
double u_x,u_y; /* av. velocities in x and y directions */
double u[NSPEEDS]; /* directional velocities */
double d_equ[NSPEEDS]; /* equilibrium densities */
double u_sq; /* squared velocity */
double local_density; /* sum of densities in a particular cell */

/* loop over the cells in the grid
** NB the collision step is called after
** the propagate step and so values of interest
** are in the scratch-space grid */
//#pragma omp parallel for private (ii, jj, kk, d_equ) shared (cells, tmp_cells)
for(ii=0;ii<params.ny;ii++) {
for(jj=0;jj<params.nx;jj++) {
/* don't consider occupied cells */
if(!obstacles[ii*params.nx + jj]) {
/* compute local density total */
local_density = 0.0;
for(kk=0;kk<NSPEEDS;kk++) {
local_density += tmp_cells[ii*params.nx + jj].speeds[kk];
}
/* compute x velocity component */
u_x = (tmp_cells[ii*params.nx + jj].speeds[1] +
tmp_cells[ii*params.nx + jj].speeds[5] +
tmp_cells[ii*params.nx + jj].speeds[8]
- (tmp_cells[ii*params.nx + jj].speeds[3] +
tmp_cells[ii*params.nx + jj].speeds[6] +
tmp_cells[ii*params.nx + jj].speeds[7]))
/ local_density;
u_y = (tmp_cells[ii*params.nx + jj].speeds[2] +
tmp_cells[ii*params.nx + jj].speeds[5] +
tmp_cells[ii*params.nx + jj].speeds[6]
- (tmp_cells[ii*params.nx + jj].speeds[4] +
tmp_cells[ii*params.nx + jj].speeds[7] +
tmp_cells[ii*params.nx + jj].speeds[8]))
/ local_density;
/* velocity squared */
u_sq = u_x * u_x + u_y * u_y;
/* directional velocity components */
u[1] = u_x; /* east */
u[2] = u_y; /* north */
u[3] = - u_x; /* west */
u[4] = - u_y; /* south */
u[5] = u_x + u_y; /* north-east */
u[6] = - u_x + u_y; /* north-west */
u[7] = - u_x - u_y; /* south-west */
u[8] = u_x - u_y; /* south-east */
/* equilibrium densities */
/* zero velocity density: weight w0 */
d_equ[0] = w0 * local_density * (1.0 - u_sq / (2.0 * c_sq));
/* axis speeds: weight w1 */
d_equ[1] = w1 * local_density * (1.0 + u[1] / c_sq
+ (u[1] * u[1]) / (2.0 * c_sq * c_sq)
- u_sq / (2.0 * c_sq));
d_equ[2] = w1 * local_density * (1.0 + u[2] / c_sq
+ (u[2] * u[2]) / (2.0 * c_sq * c_sq)
- u_sq / (2.0 * c_sq));
d_equ[3] = w1 * local_density * (1.0 + u[3] / c_sq
+ (u[3] * u[3]) / (2.0 * c_sq * c_sq)
- u_sq / (2.0 * c_sq));
d_equ[4] = w1 * local_density * (1.0 + u[4] / c_sq
+ (u[4] * u[4]) / (2.0 * c_sq * c_sq)
- u_sq / (2.0 * c_sq));
/* diagonal speeds: weight w2 */
d_equ[5] = w2 * local_density * (1.0 + u[5] / c_sq
+ (u[5] * u[5]) / (2.0 * c_sq * c_sq)
- u_sq / (2.0 * c_sq));
d_equ[6] = w2 * local_density * (1.0 + u[6] / c_sq
+ (u[6] * u[6]) / (2.0 * c_sq * c_sq)
- u_sq / (2.0 * c_sq));
d_equ[7] = w2 * local_density * (1.0 + u[7] / c_sq
+ (u[7] * u[7]) / (2.0 * c_sq * c_sq)
- u_sq / (2.0 * c_sq));
d_equ[8] = w2 * local_density * (1.0 + u[8] / c_sq
+ (u[8] * u[8]) / (2.0 * c_sq * c_sq)
- u_sq / (2.0 * c_sq));
/* relaxation step */
for(kk=0;kk<NSPEEDS;kk++) {
cells[ii*params.nx + jj].speeds[kk] = (tmp_cells[ii*params.nx + jj].speeds[kk]
+ params.omega *
(d_equ[kk] - tmp_cells[ii*params.nx + jj].speeds[kk]));
}
}
}
}

params.nx =300 & params.ny = 200 对于 300x200 d2q9 LB 立方体...我评论的 pragma 语句只会导致增加的减少并且还会导致雷诺数乱序...我尝试合并 2 for 循环通过使其看起来像这样来避免可能的错误共享:

c=params.nx*params.ny;
#pragma omp for private (ii,jj,kk,d_equ) shared (cells, tmp_cells)
for(ii=0;ii<c;ii++) {
/* don't consider occupied cells */
if(obstacles[ii]) {
/* called after propagate, so taking values from scratch space
** mirroring, and writing into main grid */
cells[ii].speeds[1] = tmp_cells[ii].speeds[3];
cells[ii].speeds[2] = tmp_cells[ii].speeds[4];
......
....
}

pragma 仍然无法帮助我加快速度.. 不过,我在这里得到了正确的结果.. 自上周六以来我一直在研究这个问题,但还没有结束我所付费用的任何地方.. 没有发现太多帮助在线资源..我非常感谢帮助...

最佳答案

这是我的疯狂猜测:

  • 您的#pragma 声明可能缺少parallel 关键字,从而阻止循环并行化。
  • 您在代码开头声明的大多数变量都没有定义为私有(private)变量,因此它们被隐式定义为共享变量。这使得它们在第一种情况下容易受到竞争条件的影响(但在第二种情况下则不会,因为您的代码是按顺序运行的)。您应该将它们定义为私有(private)或(更好)在 for 循环内声明它们,从而使它们成为私有(private):
for(i = 0; i < params.nx * params.ny ; i++) {
double u[NSPEEDS];
double d_equ[NSPEEDS];
...
int kk;
}

关于c - #pragma omp parallel 用于降低速度而不是增加速度 - 在格子玻尔兹曼中,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/19439048/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com