gpt4 book ai didi

c - 多个节点上的 MPI_Bcast 错误

转载 作者:太空狗 更新时间:2023-10-29 12:16:15 25 4
gpt4 key购买 nike

背景:我正在编写 I/O 系统调用的 MPI 版本,它基于 collfs项目。

代码在单个节点上的多个处理器上运行无误。

但是,在多个节点上运行会导致段错误...具有 2 个进程的错误消息,每个节点 1 个进程如下:

$ qsub test.sub
$ cat test.e291810
0: pasc_open(./libSDL.so, 0, 0)
1: pasc_open(./libSDL.so, 0, 0)
1: mptr[0]=0 mptr[len-1]=0
1: MPI_Bcast(mptr=eed11000, len=435104, MPI_BYTE, 0, MPI_COMM_WORLD)
0: mptr[0]=127 mptr[len-1]=0
0: MPI_Bcast(mptr=eeb11000, len=435104, MPI_BYTE, 0, MPI_COMM_WORLD)
_pmiu_daemon(SIGCHLD): [NID 00632] [c3-0c0s14n0] [Sun May 18 13:10:30 2014] PE RANK 0 exit signal Segmentation fault
[NID 00632] 2014-05-18 13:10:30 Apid 8283706: initiated application termination

The function where the error occurs is the following:

static int nextfd = BASE_FD;
#define next_fd() (nextfd++)

int pasc_open(const char *pathname, int flags, mode_t mode)
{
int rank;
int err;

if(!init)
return ((pasc_open_fp) def.open)(pathname, flags, mode);

if(MPI_Comm_rank(MPI_COMM_WORLD, &rank) != MPI_SUCCESS)
return -1;
dprintf("%d: %s(%s, %x, %x)\n", rank, __FUNCTION__, pathname, flags, mode);

/* Handle just read-only access for now. */
if(flags == O_RDONLY || flags == (O_RDONLY | O_CLOEXEC)) {
int fd, len, xlen, mptr_is_null;
void *mptr;
struct mpi_buf { int len, en; } buf;
struct file_entry *file;

if(rank == 0) {
len = -1;
fd = ((pasc_open_fp) def.open)(pathname, flags, mode);
/* Call stat to get file size and check for errors */
if(fd >= 0) {
struct stat st;
if(fstat(fd, &st) >= 0)
len = st.st_size;
else
((pasc_close_fp) def.close)(fd);
}
/* Record them */
buf.len = len;
buf.en = errno;
}
/* Propagate file size and errno */
if(MPI_Bcast(&buf, 2, MPI_INT, 0, MPI_COMM_WORLD) != MPI_SUCCESS)
return -1;
len = buf.len;
if(len < 0) {
dprintf("error opening file, len < 0");
return -1;
}
/* Get the page-aligned size */
xlen = page_extend(len);
/* `mmap` the file into memory */
if(rank == 0) {
mptr = ((pasc_mmap_fp) def.mmap)(0, xlen, PROT_READ, MAP_PRIVATE,
fd, 0);
} else {
fd = next_fd();
mptr = ((pasc_mmap_fp) def.mmap)(0, xlen, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, fd, 0);
}
((pasc_lseek_fp) def.lseek)(fd, 0, SEEK_SET);
/* Ensure success on all aux. processes */
if(rank != 0)
mptr_is_null = !mptr;
MPI_Allreduce(MPI_IN_PLACE, &mptr_is_null, 1, MPI_INT, MPI_LAND,
MPI_COMM_WORLD);
if(mptr_is_null) {
if(mptr)
((pasc_munmap_fp) def.munmap)(mptr, xlen);
dprintf("%d: error: mmap/malloc error\n", rank);
return -1;
}
dprintf("%d: mptr[0]=%d mptr[len-1]=%d\n", rank, ((char*)mptr)[0], ((char*)mptr)[len-1]);
/* Propagate file contents */
dprintf("%d: MPI_Bcast(mptr=%x, len=%d, MPI_BYTE, 0, MPI_COMM_WORLD)\n",
rank, mptr, len);
if(MPI_Bcast(mptr, len, MPI_BYTE, 0, MPI_COMM_WORLD) != MPI_SUCCESS)
return -1;
if(rank != 0)
fd = next_fd();
/* Register the file in the linked list */
file = malloc(sizeof(struct file_entry));
file->fd = fd;
file->refcnt = 1;
strncpy(file->fn, pathname, PASC_FNMAX);
file->mptr = mptr;
file->len = len;
file->xlen = xlen;
file->offset = 0;
/* Reverse stack */
file->next = open_files;
open_files = file;
return fd;

}
/* Fall back to independent access */
return ((pasc_open_fp) def.open)(pathname, flags, mode);
}

错误发生在最后的 MPI_Bcast 调用中。我不知道为什么会这样:我可以很好地取消引用它复制的内存。

我在运行 SUSE Linux x86_64 的自定义 Cray XC30 机器上使用 MPICH。

谢谢!


编辑:我尝试用 MPI_Send/MPI_Recv 对替换 MPI_Bcast 调用,结果是一样的。

最佳答案

出于性能原因,Cray MPI 实现可能有一些神奇之处。在不了解内部结构的情况下,大部分答案都是猜测。

节点间通信可能不使用网络堆栈,依赖于某种共享内存通信。当您尝试通过网络堆栈发送 mmap-ed 缓冲区时,某处出现问题 - DMA 引擎(我在这里疯狂猜测)无法处理这种情况。

您可以尝试对 mmaped 缓冲区进行页锁定 - 也许 mlock 可以正常工作。如果失败,则继续将数据复制到 malloced 缓冲区中。

关于c - 多个节点上的 MPI_Bcast 错误,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/23722165/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com