gpt4 book ai didi

c++ - mpirun 无法找到指定的可执行文件

转载 作者:太空宇宙 更新时间:2023-11-04 13:41:28 27 4
gpt4 key购买 nike

我在使用 OpenMPI 编译此代码时遇到问题。由于我对使用 OpenMPI 的概念有点陌生,如果你们中的任何人能给我提示以指出此处的错误,那就太好了。编译工作正常,但如果我运行代码,我会收到此消息:

mpirun was unable to find the specified executable file, and therefore
did not launch the job. This error was first reported for process
rank 0; it may have occurred for other processes as well.

NOTE: A common cause for this error is misspelling a mpirun command
line parameter option (remember that mpirun interprets the first
unrecognized command line token as the executable).

我正在编译使用:

mpic++ matmult.cpp -o matmult

并运行它:

mpirun -n 2 matmult

...这是使用的代码:

#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#define MASTER 0
#define FROM_MASTER 1
#define FROM_WORKER 2

// ---------------------------------------------------------------------------
// allocate space for empty matrix A[row][col]
// access to matrix elements possible with:
// - A[row][col]
// - A[0][row*col]

float **alloc_mat(int row, int col)
{
float **A1, *A2;

A1 = (float **)calloc(row, sizeof(float *)); // pointer on rows
A2 = (float *)calloc(row*col, sizeof(float)); // all matrix elements
for (int i = 0; i < row; i++)
A1[i] = A2 + i*col;

return A1;
}

// ---------------------------------------------------------------------------
// random initialisation of matrix with values [0..9]

void init_mat(float **A, int row, int col)
{
for (int i = 0; i < row*col; i++)
A[0][i] = (float)(rand() % 10);
}

// ---------------------------------------------------------------------------
// DEBUG FUNCTION: printout of all matrix elements

void print_mat(float **A, int row, int col, char *tag)
{
int i, j;

printf("Matrix %s:\n", tag);
for (i = 0; i < row; i++)
{
for (j = 0; j < col; j++)
printf("%6.1f ", A[i][j]);
printf("\n");
}
}

// ---------------------------------------------------------------------------

int main(int argc, char *argv[]) {
int numtasks;
int taskid;
int numworkers;
int source;
int dest;
int mtype;
int rows;
int averow, extra, offset;
double starttime, endtime;
float **A, **B, **C; // matrices
int d1, d2, d3; // dimensions of matrices
int i, j, k, rc; // loop variables


MPI_Status status;
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD,&taskid);
MPI_Comm_size(MPI_COMM_WORLD,&numtasks);

if (argc != 4) {
printf ("Matrix multiplication: C = A x B\n");
printf ("Usage: %s <NumRowA> <NumColA> <NumColB>\n", argv[0]);
return 0;
}

if (numtasks < 2 ) {
printf("Need at least two MPI tasks. Quitting...\n");
MPI_Abort(MPI_COMM_WORLD,rc);
exit(1);
}

/* read user input */
d1 = atoi(argv[1]); // rows of A and C d1
d2 = atoi(argv[2]); // cols of A and rows of B d2
d3 = atoi(argv[3]); // cols of B and C d3

printf("Matrix sizes C[%d][%d] = A[%d][%d] x B[%d][%d]\n", d1, d3, d1, d2, d2, d3);

/* prepare matrices */
A = alloc_mat(d1, d2);
init_mat(A, d1, d2);
B = alloc_mat(d2, d3);
init_mat(B, d2, d3);
C = alloc_mat(d1, d3);


/* Code für den Manager */
if (taskid == MASTER) {
/*printf("matrix multiplikation withMPI\n");
printf("initializing arrays ...\n");
for (i=0; i<d1; i++)
for (j=0; j<d2; j++)
A[i][j]=i+j;


for (i=0; i<d2; i++)
for (j=0; j<d3; j++)
B[i][j]=i*j;*/



/* Matrizen versenden */
averow = d1/numworkers;
extra = d1%numworkers;
offset = 0;
mtype = FROM_MASTER;

starttime=MPI_Wtime();

for (dest=1;dest<=numworkers;dest++) {
rows = (dest <= extra) ? averow+1 :averow;
printf("Sending %drows to task %doffset=%d\n",rows,dest,offset);
MPI_Send(&offset, 1, MPI_INT,dest,mtype, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT,dest,mtype, MPI_COMM_WORLD);
MPI_Send(&A[offset][0],rows*d2, MPI_DOUBLE,dest,mtype, MPI_COMM_WORLD);
MPI_Send(&B, d2*d3, MPI_DOUBLE,dest,mtype, MPI_COMM_WORLD);
offset =offset+rows;
}

/* Ergebnisse empfangen */
mtype = FROM_WORKER;

for (i=1; i<=numworkers; i++) {
source = i;
MPI_Recv(&offset, 1, MPI_INT,source,mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&rows, 1, MPI_INT,source,mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&C[offset][0],rows*d3,
MPI_DOUBLE,source,mtype,MPI_COMM_WORLD,&status);
printf("Received results from task %d\n",source);
}

endtime=MPI_Wtime();
printf("\nIt took %fseconds.\n",endtime-starttime);
}

/* Code für die Arbeiter */

if (taskid > MASTER) {
mtype = FROM_MASTER;

MPI_Recv(&offset, 1, MPI_INT, MASTER,mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&d1, 1, MPI_INT, MASTER,mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&A,rows*d2, MPI_DOUBLE, MASTER,mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&B, d2*d3, MPI_DOUBLE, MASTER,mtype, MPI_COMM_WORLD, &status);

/* print user instruction */


// no initialisation of C, because it gets filled by matmult

/* serial version of matmult */
printf("Perform matrix multiplication...\n");
for (i = 0; i < d1; i++)
for (j = 0; j < d3; j++)
for (k = 0; k < d2; k++)
C[i][j] += A[i][k] * B[k][j];

mtype = FROM_WORKER;
MPI_Send(&offset, 1, MPI_INT, MASTER,mtype, MPI_COMM_WORLD);
MPI_Send(&d1, 1, MPI_INT, MASTER,mtype, MPI_COMM_WORLD);
MPI_Send(&C,rows*d3, MPI_DOUBLE, MASTER,mtype, MPI_COMM_WORLD);

}

MPI_Finalize();


/* test output
print_mat(A, d1, d2, "A");
print_mat(B, d2, d3, "B");
print_mat(C, d1, d3, "C"); */

printf ("\nDone.\n");


//return 0;
}


运行结果 mpirun matmult (默认设置,单进程):

mpirun has exited due to process rank 0 with PID 77202 on node juliuss-mbp-3 exiting improperly. There are three reasons this could occur:

  1. this process did not call "init" before exiting, but others in the job did. This can cause a job to hang indefinitely while it waits for all processes to call "init". By rule, if one process calls "init", then ALL processes must call "init" prior to termination.

  2. this process called "init", but exited without calling "finalize". By rule, all processes that call "init" MUST call "finalize" prior to exiting or it will be considered an "abnormal termination"

  3. this process called "MPI_Abort" or "orte_abort" and the mca parameter orte_create_session_dirs is set to false. In this case, the run-time cannot detect that the abort call was an abnormal termination. Hence, the only error message you will receive is this one. This may have caused other processes in the application to be terminated by signals sent by mpirun (as reported here). You can avoid this message by specifying -quiet on the mpirun command line.

最佳答案

次要问题(仍然很重要):

您的程序需要 4 个参数,即。程序名 + 传入的 3 个参数,来自这段代码:

if (argc != 4) {
printf ("Matrix multiplication: C = A x B\n");
printf ("Usage: %s <NumRowA> <NumColA> <NumColB>\n", argv[0]);
return 0;
}

由于此条件返回 0 而未调用正确的 MPI_Abort(...)MPI_Finalize() 那么您将收到 mpi 错误:

mpirun has exited due to process rank 0 with PID 77202 on node juliuss-mbp-3 exiting improperly.

通过在 return 0 之前添加 MPI_Abort(MPI_COMM_WORLD,rc);,我相信您的程序会很清楚。

if (argc != 4) {
printf ("Matrix multiplication: C = A x B\n");
printf ("Usage: %s <NumRowA> <NumColA> <NumColB>\n", argv[0]);
MPI_Abort(MPI_COMM_WORLD,rc);
return 0;
}


主要问题:

但是我们应该解决问题的主要原因,即:当您运行 mpirun -np 2 matmultmpirun matmult 时,您需要将 3 个参数传递给您的程序>。应该采用这种格式:

mpirun -np 2 matmult parameter1 parameter2 parameter3

mpirun matmult parameter1 parameter2 parameter3

根据您的代码,参数(arguments)应该是:

参数 1 = A 和 C 的行数
参数 2 = A 的列数和 B 的行数
参数 3 = B 和 C 的列

您的运行命令可能如下所示:

mpirun -np 2 matmult 2 2 2

关于c++ - mpirun 无法找到指定的可执行文件,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/27539807/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com