gpt4 book ai didi

matrix - MPI block 矩阵乘法

转载 作者:行者123 更新时间:2023-12-04 21:16:25 24 4
gpt4 key购买 nike

我试图生成两个大小为 n 的矩阵 A&B,将它们划分为 s*s 子矩阵,并在将它们分散通过处理器后,在块矩阵之间执行乘法。我已经能够通过处理器成功生成和分散子矩阵;但是,我一直在对每个处理器的子矩阵进行乘法运算。我的代码与以下帖子中的代码(答案部分中的代码)非常相似,但我针对两个矩阵对其进行了修改:
MPI partition matrix into blocks

你能告诉我如何修改它来执行乘法吗?

我保留了相同的标签,以便于跟进。

    #include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <time.h>

#define COLSa 10
#define ROWSa 10

#define COLSb 10
#define ROWSb 10
#define s 2

int main(int argc, char **argv) {

MPI_Init(&argc, &argv);
int p, rank;
MPI_Comm_size(MPI_COMM_WORLD, &p);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);

char i;
char j;

char a[ROWSa*COLSa];
char b[ROWSb*COLSb];
char c[ROWSa*COLSb]; // c=a*b

const int NPROWS=s; /* number of rows in _decomposition_ */
const int NPCOLS=s; /* number of cols in _decomposition_ */

const int BLOCKROWSa = ROWSa/NPROWS; /* number of rows in _block_ */
const int BLOCKCOLSa = COLSa/NPCOLS; /* number of cols in _block_ */

const int BLOCKROWSb = ROWSb/NPROWS; /* number of rows in _block_ */
const int BLOCKCOLSb= COLSb/NPCOLS; /* number of cols in _block_ */

if (rank == 0) {

for (int ii=0; ii<ROWSa*COLSa; ii++) {
a[ii]=rand() %10 ;
}

for (int ii=0; ii<ROWSb*COLSb; ii++) {
b[ii]=rand() %10 ;

}
}

char BLa[BLOCKROWSa*BLOCKCOLSa];
for (int ii=0; ii<BLOCKROWSa*BLOCKCOLSa; ii++)
BLa[ii] = 0;

char BLb[BLOCKROWSb*BLOCKCOLSb];
for (int ii=0; ii<BLOCKROWSb*BLOCKCOLSb; ii++)
BLb[ii] = 0;

char BLc[BLOCKROWSa*BLOCKCOLSb];
for (int ii=0; ii<BLOCKROWSa*BLOCKCOLSb; ii++)
BLc[ii] = 0;

MPI_Datatype blocktype;
MPI_Datatype blocktype2;

MPI_Type_vector(BLOCKROWSa, BLOCKCOLSa, COLSa, MPI_CHAR, &blocktype2);
MPI_Type_vector(BLOCKROWSb, BLOCKCOLSb, COLSb, MPI_CHAR, &blocktype2);

MPI_Type_create_resized( blocktype2, 0, sizeof(char), &blocktype);
MPI_Type_commit(&blocktype);

int dispsa[NPROWS*NPCOLS];
int countsa[NPROWS*NPCOLS];
int dispsb[NPROWS*NPCOLS];
int countsb[NPROWS*NPCOLS];

//*******************************Start Time Record****************//

clock_t t;
t=clock();

for (int ii=0; ii<NPROWS; ii++) {
for (int jj=0; jj<NPCOLS; jj++) {
dispsa[ii*NPCOLS+jj] = ii*COLSa*BLOCKROWSa+jj*BLOCKCOLSa;
countsa [ii*NPCOLS+jj] = 1;
}
}

MPI_Scatterv(a, countsa, dispsa, blocktype, BLa, BLOCKROWSa*BLOCKCOLSa, MPI_CHAR, 0, MPI_COMM_WORLD);


for (int ii=0; ii<NPROWS; ii++) {
for (int jj=0; jj<NPCOLS; jj++) {
dispsb[ii*NPCOLS+jj] = ii*COLSb*BLOCKROWSb+jj*BLOCKCOLSb;
countsb [ii*NPCOLS+jj] = 1;
}
}

MPI_Scatterv(b, countsb, dispsb, blocktype, BLb, BLOCKROWSb*BLOCKCOLSb, MPI_CHAR, 0, MPI_COMM_WORLD);




for (int proc=0; proc<p; proc++) {
if (proc == rank) {

printf("Rank = %d\n", rank);

if (rank == 0) {
printf("Global matrix A : \n");

for (int ii=0; ii<ROWSa; ii++) {
for (int jj=0; jj<COLSa; jj++) {
printf("%3d ",(int)a[ii*COLSa+jj]);
}
printf("\n");
}
printf("\n");
printf("Global matrix B : \n");

for (int ii=0; ii<ROWSb; ii++) {
for (int jj=0; jj<COLSb; jj++) {
printf("%3d ",(int)b[ii*COLSb+jj]);
}
printf("\n");
}
printf("\n");
printf("Local Matrix A:\n");
for (int ii=0; ii<BLOCKROWSa; ii++) {
for (int jj=0; jj<BLOCKCOLSa; jj++) {
printf("%3d ",(int)BLa[ii*BLOCKCOLSa+jj]);

}

printf("\n");
}

printf("\n");
printf("Local Matrix B:\n");
for (int ii=0; ii<BLOCKROWSb; ii++) {
for (int jj=0; jj<BLOCKCOLSb; jj++) {
printf("%3d ",(int)BLb[ii*BLOCKCOLSb+jj]);

}

printf("\n");
}
}


printf("Local Matrix A:\n");
for (int ii=0; ii<BLOCKROWSa; ii++) {
for (int jj=0; jj<BLOCKCOLSa; jj++) {
printf("%3d ",(int)BLa[ii*BLOCKCOLSa+jj]);
}

printf("\n");
}

printf("Local Matrix B:\n");
for (int ii=0; ii<BLOCKROWSb; ii++) {
for (int jj=0; jj<BLOCKCOLSb; jj++) {
printf("%3d ",(int)BLb[ii*BLOCKCOLSb+jj]);
}

printf("\n");
}

//**********************Multiplication***********************//

for (int i = 0; i < BLOCKROWSa; i++) {
for (j = 0; j < BLOCKCOLSb; j++) {

for (k = 0; k < BLOCKCOLSb; k++) { //I am considering square matrices with the same sizes
BLc[i + j*BLOCKROWSa] += BLa[i + k*BLOCKROWSa]*BLb[k + BLOCKCOLb*j];
printf("%3d ",(int)BLc[i+j*BLOCKROWSa]);
}
printf("\n");

}

printf("\n");

}

}

MPI_Barrier(MPI_COMM_WORLD);
}

MPI_Finalize();

//**********************End Time Record************************//

t=clock()-t;
printf("It took %f seconds (%d clicks).\n",t,((float)t)/CLOCKS_PER_SEC);


return 0;
}

最佳答案

要将块返回到 proc 0 上的矩阵中,您可以使用 MPI_Scatterv() 的“相反”被称为 MPI_Gatherv() http://www.mpich.org/static/docs/latest/www3/MPI_Gatherv.html :

MPI_Gatherv(BLc, BLOCKROWSb*BLOCKCOLSb,MPI_CHAR, c, countsb, dispsb,blocktype, 0, MPI_COMM_WORLD);

if (rank == 0) {
printf("Global matrix C : \n");

for (int ii=0; ii<ROWSa; ii++) {
for (int jj=0; jj<COLSa; jj++) {
printf("%3d ",(int)c[ii*COLSa+jj]);
}
printf("\n");
}
}

请记住,您正在执行分块乘法,这与矩阵乘法不同。

再见,

弗朗西斯

关于matrix - MPI block 矩阵乘法,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/22432342/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com