我正在实现红黑 SOR 的并行版本。
我想获得每个进程的最大误差的 MPI_Allreduce 部分不起作用。它永远不会改变,即使只有一个过程,它也会给出高于 2.0 的值。怎么回事??
这是代码,有趣的部分在 sor 例程的末尾。
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <mpi.h>
#define M 700
int numtasks, rank, inext, iprev, ista, iend;
double unew[M + 2][M + 2] = {{ 0 }};
double solution[M + 2][M + 2] = {{ 0 }};
double uold[M + 2][M + 2] = {{ 0 }};
double bufs1[M + 2];
double bufs2[M + 2];
double bufr1[M + 2];
double bufr2[M + 2];
MPI_Request ireqs1;
MPI_Request ireqs2;
MPI_Request ireqr1;
MPI_Request ireqr2;
MPI_Status istatus;
double compute_error(double solution[][M + 2], double u[][M + 2], const int m);
int sor(double unew[][M + 2], double uold[][M + 2], double solution[][M + 2], const double omega, const double tol, const int m);
void para_range(const int n1, const int n2, const int nprocs, const int irank, int * restrict ista, int * restrict iend);
void shift(const int iflg);
inline int min(const int a, const int b);
int main(int argc, char * argv[])
{
const int m = M;
int ierr = MPI_Init(&argc, &argv);
if(ierr != MPI_SUCCESS)
{
perror("MPI init failed. Terminating T800.\n");
exit(1);
}
int i, j;
const double begin = MPI_Wtime();
const double pi = 4.0 * atan(1.0);
const double h = pi / (m + 1);
MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
for(i = 0; i < m + 2; ++i)
{
uold[i][M + 1] = sin(i * h);
}
for(i = 0; i < m + 2; ++i)
{
for(j = 0; j < m + 1; ++j)
{
uold[i][j] = j * h * uold[i][M + 1];
}
}
for(i = 0; i < m + 2; ++i)
{
for(j = 0; j < m + 2; ++j)
{
solution[i][j] = sinh(j * h) * sin(i * h) / sinh(pi);
}
}
para_range(0, m, numtasks, rank, &ista, &iend);
//printf("rank %d: from %d to %d\n", rank, ista, iend);
inext = rank + 1;
iprev = rank - 1;
if(inext == numtasks)
{
inext = MPI_PROC_NULL;
}
if(iprev == -1)
{
iprev = MPI_PROC_NULL;
}
const double omega = 2.0 / ( 1.0 + sin(pi / (m + 1)) );
const double tol = 0.001;
const int iters = sor(unew, uold, solution, omega, tol, m);
const double end = MPI_Wtime();
MPI_Finalize();
if(rank == 0)
{
printf(" \n");
printf(" Omega = %f\n", omega);
printf(" It took %d iterations.\n", iters);
printf("Total time = %f\n\n\n", end - begin);
}
return 0;
}
double compute_error(double solution[][M + 2], double u[][M + 2], const int m)
{
double error = 0.0;
int i, j;
for(i = 1; i < m + 1; ++i)
{
for(j = 1; j < m + 1; ++j)
{
const double abs_diff = fabs(solution[i][j] - u[i][j]);
if(error < abs_diff)
{
error = abs_diff;
}
}
}
return error;
}
int sor(double unew[][M + 2], double uold[][M + 2], double solution[][M + 2], const double omega, const double tol, const int m)
{
int i, j;
int iters = 0;
double error = compute_error(solution, uold, m);
double error2 = error;
double temp;
while(error2 > tol)
{
shift(0);
for(i = 1; i < m + 1; ++i)
{
for(j = (i % 2) + 1; j < m + 1; j += 2)
{
temp = 0.25 * (uold[i][j - 1] + uold[i - 1][j]
+ uold[i + 1][j] + uold[i][j + 1]) - uold[i][j];
uold[i][j] += omega * temp;
}
}
shift(1);
for(i = 1; i < m + 1; ++i)
{
for(j = ((i + 1) % 2) + 1; j < m + 1; j += 2)
{
temp = 0.25 * (uold[i][j - 1] + uold[i - 1][j]
+ uold[i + 1][j] + uold[i][j + 1]) - uold[i][j];
uold[i][j] += omega * temp;
}
}
++iters;
if(iters % 20 == 0)
{
// THIS IS NOT COOL
error = compute_error(solution, uold, m);
MPI_Allreduce(&error, &error2, 1, MPI_REAL, MPI_MAX, MPI_COMM_WORLD);
//printf("%d=%f\n", rank, error2);
}
}
return iters;
}
void para_range(const int n1, const int n2, const int nprocs, const int irank, int * restrict ista, int * restrict iend)
{
const int iwork = ((n2 - n1) / nprocs) + 1;
*ista = min(irank * iwork + n1, n2 + 1);
*iend = min(*ista + iwork - 1, n2);
}
void shift(const int iflg)
{
const int is1 = ((ista + iflg) % 2) + 1;
const int is2 = ((iend + iflg) % 2) + 1;
const int ir1 = 3 - is1;
const int ir2 = 3 - is2;
int i, icnt1=0, icnt2=0;
if(rank != 0)
{
icnt1 = 0;
for(i = is1; i < M; i += 2)
{
++icnt1;
bufs1[icnt1] = unew[i][ista];
}
}
if (rank != numtasks - 1)
{
icnt2 = 0;
for(i = is2; i < M; i += 2)
{
++icnt2;
bufs2[icnt2] = unew[i][iend];
}
}
MPI_Isend(bufs1, icnt1, MPI_REAL, iprev, 1, MPI_COMM_WORLD, &ireqs1);
MPI_Isend(bufs2, icnt2, MPI_REAL, inext, 1, MPI_COMM_WORLD, &ireqs2);
MPI_Irecv(bufr1, M + 2, MPI_REAL, iprev, 1, MPI_COMM_WORLD, &ireqr1);
MPI_Irecv(bufr2, M + 2, MPI_REAL, inext, 1, MPI_COMM_WORLD, &ireqr2);
MPI_Wait(&ireqs1, &istatus);
MPI_Wait(&ireqs2, &istatus);
MPI_Wait(&ireqr1, &istatus);
MPI_Wait(&ireqr2, &istatus);
int icnt;
if(rank != 0)
{
icnt = 0;
for(i = ir1; i < M; i += 2)
{
++icnt;
unew[i][ista - 1] = bufr1[icnt];
}
}
if(rank != numtasks - 1)
{
icnt = 0;
for(i = ir2; i < M; i += 2)
{
++icnt;
unew[i][iend + 1] = bufr2[icnt];
}
}
}
inline int min(const int a, const int b)
{
if(a > b)
{
return b;
}
return a;
}
我是一名优秀的程序员,十分优秀!