multithreading - OpenMP:增加线程数时的明显竞争状态-6ren

multithreading - OpenMP:增加线程数时的明显竞争状态

转载作者：行者123 更新时间：2023-12-03 13:02:09

我有这段代码(只是删除了很长且在并行区域之外的变量的初始化)。我正在本地计算机(4个物理核心，8个线程)上对其进行测试，并将速度和结果与其串行版本进行比较。当我使用4个以上的线程运行代码时，有时似乎会在某些竞争条件下发生，并且两种情况下的最终输出(在并行区域之后写入磁盘的变量T)是不同的。当我使用4个或更少的线程运行时，一切都很好，两个代码都以相同的迭代次数运行，并给出最终结果。
根据文档，每个OMP DO块的末尾都有一个隐式同步(除非您指定nowait)。

program test

integer :: nx=500,ny=500
integer :: i,j,iteration

double precision, allocatable, dimension(:,:) :: T, T_old
double precision :: dx,dy,dt
double precision :: error,change,delta,errtol

allocate(T(0:nx+1,0:ny+1))
allocate(T_old(0:nx+1,0:ny+1))

! initialisation of T, T_old, dt, dx, dy and errtol

error=1.0d0
iteration=0

!$OMP PARALLEL SHARED(error,iteration,change) private(i,j,delta)

do while (error.gt.errtol.and.error.lt.10.0d0)
  change=0.0d0
!$OMP DO schedule(static) reduction(max:change)
  do j=1,ny
    do i=1,nx
      delta=dt*( (T_old(i+1,j)-2.0d0*T_old(i,j)+T_old(i-1,j))/dx**2 + &
                  (T_old(i,j+1)-2.0d0*T_old(i,j)+T_old(i,j-1))/dy**2  )
      T(i,j)=T_old(i,j)+delta
      change=max(delta,change)
    enddo
  enddo
!$OMP END DO
! implicit barrier (implies FLUSH) at end of parallel do region (unless you specify nowait clause)


!$OMP SINGLE
  error=change
! just one thread updates iteration
  iteration=iteration+1
  ! write(*,*) iteration, error
!$OMP END SINGLE

!$OMP DO schedule(static)
  ! update T_old
  do j=1,ny
    do i=1,nx
      T_old(i,j)=T(i,j)
    enddo
  enddo
!$OMP END DO
enddo
!$OMP END PARALLEL

! write T to disk

deallocate(T,T_old)

end program test

编辑:正确的代码，请参阅@Gilles注释:

program test

integer :: nx=500,ny=500
integer :: i,j,iteration

double precision, allocatable, dimension(:,:) :: T, T_old
double precision :: dx,dy,dt
double precision :: error,change,delta,errtol

allocate(T(0:nx+1,0:ny+1))
allocate(T_old(0:nx+1,0:ny+1))

! initialisation of T, T_old, dt, dx, dy and errtol

error=1.0d0
iteration=0
change=0.0d0

!$OMP PARALLEL SHARED(error,iteration,change) private(i,j,delta)

do while (error.gt.errtol.and.error.lt.10.0d0)
!$OMP DO schedule(static) reduction(max:change)
  do j=1,ny
    do i=1,nx
      delta=dt*( (T_old(i+1,j)-2.0d0*T_old(i,j)+T_old(i-1,j))/dx**2 + &
                  (T_old(i,j+1)-2.0d0*T_old(i,j)+T_old(i,j-1))/dy**2  )
      T(i,j)=T_old(i,j)+delta
      change=max(delta,change)
    enddo
  enddo
!$OMP END DO
! implicit barrier (implies FLUSH) at end of parallel do region (unless you specify nowait clause)


!$OMP SINGLE
  error=change
  change=0.0d0
! just one thread updates iteration
  iteration=iteration+1
  ! write(*,*) iteration, error
!$OMP END SINGLE

!$OMP DO schedule(static)
  ! update T_old
  do j=1,ny
    do i=1,nx
      T_old(i,j)=T(i,j)
    enddo
  enddo
!$OMP END DO
enddo
!$OMP END PARALLEL

! write T to disk

deallocate(T,T_old)

end program test

最佳答案

在DO WHILE循环中，重新初始化变量change时的竞争条件已删除。通过在并行区域之外初始化change并使用!$OMP SINGLE指令保护其在并行区域中的更新来解决。

program test

integer :: nx=500,ny=500
integer :: i,j,iteration

double precision, allocatable, dimension(:,:) :: T, T_old
double precision :: dx,dy,dt
double precision :: error,change,delta,errtol

allocate(T(0:nx+1,0:ny+1))
allocate(T_old(0:nx+1,0:ny+1))

! initialisation of T, T_old, dt, dx, dy and errtol

error=1.0d0
iteration=0
change=0.0d0

!$OMP PARALLEL SHARED(error,iteration,change) private(i,j,delta)

do while (error.gt.errtol.and.error.lt.10.0d0)
!$OMP DO schedule(static) reduction(max:change)
  do j=1,ny
    do i=1,nx
      delta=dt*( (T_old(i+1,j)-2.0d0*T_old(i,j)+T_old(i-1,j))/dx**2 + &
                  (T_old(i,j+1)-2.0d0*T_old(i,j)+T_old(i,j-1))/dy**2  )
      T(i,j)=T_old(i,j)+delta
      change=max(delta,change)
    enddo
  enddo
!$OMP END DO
! implicit barrier (implies FLUSH) at end of parallel do region (unless you specify nowait clause)


!$OMP SINGLE
  error=change
  change=0.0d0
! just one thread updates iteration
  iteration=iteration+1
  ! write(*,*) iteration, error
!$OMP END SINGLE

!$OMP DO schedule(static)
  ! update T_old
  do j=1,ny
    do i=1,nx
      T_old(i,j)=T(i,j)
    enddo
  enddo
!$OMP END DO
enddo
!$OMP END PARALLEL

! write T to disk

deallocate(T,T_old)

end program test