gpt4 book ai didi

c - Strassen 矩阵乘法的内存管理

转载 作者:太空宇宙 更新时间:2023-11-04 02:00:23 25 4
gpt4 key购买 nike

作为作业的一部分,我试图找出 Strassen 矩阵乘法和朴素乘法算法的交叉点。但同样,当矩阵变为 256x256 时,我无法继续。有人可以建议我适当的内存管理技术,以便能够处理更大的输入。

C语言代码如下:

#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<time.h>

void strassenMul(double* X, double* Y, double* Z, int m);
void matMul(double* A, double* B, double* C, int n);
void matAdd(double* A, double* B, double* C, int m);
void matSub(double* A, double* B, double* C, int m);

int idx = 0;

int main()
{
int N;
int count = 0;
int i, j;
clock_t start, end;
double elapsed;

int total = 15;
double tnaive[total];
double tstrassen[total];
printf("-------------------------------------------------------------------------\n\n");
for (count = 0; count < total; count++)
{
N = pow(2, count);
printf("Matrix size = %2d\t",N);
double X[N][N], Y[N][N], Z[N][N], W[N][N];
srand(time(NULL));
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
X[i][j] = rand()/(RAND_MAX + 1.);
Y[i][j] = rand()/(RAND_MAX + 1.);
}
}
start = clock();
matMul((double *)X, (double *)Y, (double *)W, N);
end = clock();
elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
tnaive[count] = elapsed;
printf("naive = %5.4f\t\t",tnaive[count]);

start = clock();
strassenMul((double *)X, (double *)Y, (double *)Z, N);
end = clock();
elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
tstrassen[count] = elapsed;
printf("strassen = %5.4f\n",tstrassen[count]);
}
printf("-------------------------------------------------------------------\n\n\n");

while (tnaive[idx+1] <= tstrassen[idx+1] && idx < 14) idx++;

printf("Optimum input size to switch from normal multiplication to Strassen's is above %d\n\n", idx);

printf("Please enter the size of array as a power of 2\n");
scanf("%d",&N);
double A[N][N], B[N][N], C[N][N];
srand(time(NULL));
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
A[i][j] = rand()/(RAND_MAX + 1.);
B[i][j] = rand()/(RAND_MAX + 1.);
}
}

printf("------------------- Input Matrices A and B ---------------------------\n\n");
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
printf("%5.4f ",A[i][j]);
printf("\n");
}
printf("\n");

for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
printf("%5.4f ",B[i][j]);
printf("\n");
}
printf("\n------- Output matrix by Strassen's method after optimization -----------\n\n");

strassenMul((double *)A, (double *)B, (double *)C, N);

for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
printf("%5.4f ",C[i][j]);
printf("\n");
}
return(0);
}

void strassenMul(double *X, double *Y, double *Z, int m)
{
if (m <= idx)
{
matMul((double *)X, (double *)Y, (double *)Z, m);
return;
}
if (m == 1)
{
*Z = *X * *Y;
return;
}
int row = 0, col = 0;
int n = m/2;
int i = 0, j = 0;
double x11[n][n], x12[n][n], x21[n][n], x22[n][n];
double y11[n][n], y12[n][n], y21[n][n], y22[n][n];
double P1[n][n], P2[n][n], P3[n][n], P4[n][n], P5[n][n], P6[n][n], P7[n][n];
double C11[n][n], C12[n][n], C21[n][n], C22[n][n];
double S1[n][n], S2[n][n], S3[n][n], S4[n][n], S5[n][n], S6[n][n], S7[n][n];
double S8[n][n], S9[n][n], S10[n][n], S11[n][n], S12[n][n], S13[n][n], S14[n][n];

for (row = 0, i = 0; row < n; row++, i++)
{
for (col = 0, j = 0; col < n; col++, j++)
{
x11[i][j] = *((X+row*m)+col);
y11[i][j] = *((Y+row*m)+col);
}
for (col = n, j = 0; col < m; col++, j++)
{
x12[i][j] = *((X+row*m)+col);
y12[i][j] = *((Y+row*m)+col);
}
}

for (row = n, i = 0; row < m; row++, i++)
{
for (col = 0, j = 0; col < n; col++, j++)
{
x21[i][j] = *((X+row*m)+col);
y21[i][j] = *((Y+row*m)+col);
}
for (col = n, j = 0; col < m; col++, j++)
{
x22[i][j] = *((X+row*m)+col);
y22[i][j] = *((Y+row*m)+col);
}
}

// Calculating P1
matAdd((double *)x11, (double *)x22, (double *)S1, n);
matAdd((double *)y11, (double *)y22, (double *)S2, n);
strassenMul((double *)S1, (double *)S2, (double *)P1, n);

// Calculating P2
matAdd((double *)x21, (double *)x22, (double *)S3, n);
strassenMul((double *)S3, (double *)y11, (double *)P2, n);

// Calculating P3
matSub((double *)y12, (double *)y22, (double *)S4, n);
strassenMul((double *)x11, (double *)S4, (double *)P3, n);

// Calculating P4
matSub((double *)y21, (double *)y11, (double *)S5, n);
strassenMul((double *)x22, (double *)S5, (double *)P4, n);

// Calculating P5
matAdd((double *)x11, (double *)x12, (double *)S6, n);
strassenMul((double *)S6, (double *)y22, (double *)P5, n);

// Calculating P6
matSub((double *)x21, (double *)x11, (double *)S7, n);
matAdd((double *)y11, (double *)y12, (double *)S8, n);
strassenMul((double *)S7, (double *)S8, (double *)P6, n);

// Calculating P7
matSub((double *)x12, (double *)x22, (double *)S9, n);
matAdd((double *)y21, (double *)y22, (double *)S10, n);
strassenMul((double *)S9, (double *)S10, (double *)P7, n);

// Calculating C11
matAdd((double *)P1, (double *)P4, (double *)S11, n);
matSub((double *)S11, (double *)P5, (double *)S12, n);
matAdd((double *)S12, (double *)P7, (double *)C11, n);

// Calculating C12
matAdd((double *)P3, (double *)P5, (double *)C12, n);

// Calculating C21
matAdd((double *)P2, (double *)P4, (double *)C21, n);

// Calculating C22
matAdd((double *)P1, (double *)P3, (double *)S13, n);
matSub((double *)S13, (double *)P2, (double *)S14, n);
matAdd((double *)S14, (double *)P6, (double *)C22, n);

for (row = 0, i = 0; row < n; row++, i++)
{
for (col = 0, j = 0; col < n; col++, j++)
*((Z+row*m)+col) = C11[i][j];
for (col = n, j = 0; col < m; col++, j++)
*((Z+row*m)+col) = C12[i][j];
}
for (row = n, i = 0; row < m; row++, i++)
{
for (col = 0, j = 0; col < n; col++, j++)
*((Z+row*m)+col) = C21[i][j];
for (col = n, j = 0; col < m; col++, j++)
*((Z+row*m)+col) = C22[i][j];
}
}

void matMul(double *A, double *B, double *C, int n)
{
int i = 0, j = 0, k = 0, row = 0, col = 0;
double sum;
for (i = 0; i < n; i++)
{
for (j = 0; j < n; j++)
{
sum = 0.0;
for (k = 0; k < n; k++)
{
sum += *((A+i*n)+k) * *((B+k*n)+j);
}
*((C+i*n)+j) = sum;
}
}
}

void matAdd(double *A, double *B, double *C, int m)
{
int row = 0, col = 0;
for (row = 0; row < m; row++)
for (col = 0; col < m; col++)
*((C+row*m)+col) = *((A+row*m)+col) + *((B+row*m)+col);
}

void matSub(double *A, double *B, double *C, int m)
{
int row = 0, col = 0;
for (row = 0; row < m; row++)
for (col = 0; col < m; col++)
*((C+row*m)+col) = *((A+row*m)+col) - *((B+row*m)+col);
}

稍后添加 如果我尝试使用 malloc 语句进行内存分配,代码如下。但问题是它在朴素的矩阵乘法方法之后停止,甚至没有进行到 N=1 的 Strassen 方法。它显示了关闭程序的提示。

for (count = 0; count < total; count++)
{
N = pow(2, count);
printf("Matrix size = %2d\t",N);
//double X[N][N], Y[N][N], Z[N][N], W[N][N];
double **X, **Y, **Z, **W;
X = malloc(N * sizeof(double*));
if (X == NULL){
perror("Failed malloc() in X");
return 1;
}
Y = malloc(N * sizeof(double*));
if (Y == NULL){
perror("Failed malloc() in Y");
return 1;
}
Z = malloc(N * sizeof(double*));
if (Z == NULL){
perror("Failed malloc() in Z");
return 1;
}
W = malloc(N * sizeof(double*));
if (W == NULL){
perror("Failed malloc() in W");
return 1;
}
for (j = 0; j < N; j++)
{
X[j] = malloc(N * sizeof(double*));
if (X[j] == NULL){
perror("Failed malloc() in X[j]");
return 1;
}
Y[j] = malloc(N * sizeof(double*));
if (Y[j] == NULL){
perror("Failed malloc() in Y[j]");
return 1;
}
Z[j] = malloc(N * sizeof(double*));
if (Z[j] == NULL){
perror("Failed malloc() in Z[j]");
return 1;
}
W[j] = malloc(N * sizeof(double*));
if (W[j] == NULL){
perror("Failed malloc() in W[j]");
return 1;
}
}
srand(time(NULL));
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
{
X[i][j] = rand()/(RAND_MAX + 1.);
Y[i][j] = rand()/(RAND_MAX + 1.);
}
}
start = clock();
matMul((double *)X, (double *)Y, (double *)W, N);
end = clock();
elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
tnaive[count] = elapsed;
printf("naive = %5.4f\t\t",tnaive[count]);

start = clock();
strassenMul((double *)X, (double *)Y, (double *)Z, N);
end = clock();
elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
tstrassen[count] = elapsed;
for (j = 0; j < N; j++)
{
free(X[j]);
free(Y[j]);
free(Z[j]);
free(W[j]);
}

free(X); free(Y); free(Z); free(W);

printf("strassen = %5.4f\n",tstrassen[count]);
}

最佳答案

我已经重写了答案。我之前逐行分配内存的答案不起作用,因为 OP 在传递给函数时已将二维数组转换为一维数组。这是我重新编写的代码,并进行了一些简化,例如将所有矩阵数组保持为一维。

我不确定 Strassen 的方法到底做了什么,尽管递归将矩阵维度减半。所以我想知道是否打算在访问传递的数组时使用 row*2col*2

我希望这些技术对您有用 - 即使它有效!所有矩阵数组现在都在堆上。

#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<time.h>

#define total 4 //15

void strassenMul(double* X, double* Y, double* Z, int m);
void matMul(double* A, double* B, double* C, int n);
void matAdd(double* A, double* B, double* C, int m);
void matSub(double* A, double* B, double* C, int m);

enum array { x11, x12, x21, x22, y11, y12, y21, y22,
P1, P2, P3, P4, P5, P6, P7, C11, C12, C21, C22,
S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, arrs };

int idx = 0;

int main()
{
int N;
int count = 0;
int i, j;
clock_t start, end;
double elapsed;

double tnaive[total];
double tstrassen[total];
double *X, *Y, *Z, *W, *A, *B, *C;

printf("-------------------------------------------------------------------------\n\n");
for (count = 0; count < total; count++)
{
N = (int)pow(2, count);
printf("Matrix size = %2d\t",N);
X = malloc(N*N*sizeof(double));
Y = malloc(N*N*sizeof(double));
Z = malloc(N*N*sizeof(double));
W = malloc(N*N*sizeof(double));
if (X==NULL || Y==NULL || Z==NULL || W==NULL) {
printf("Out of memory (1)\n");
return 1;
}
srand((unsigned)time(NULL));
for (i=0; i<N*N; i++)
{
X[i] = rand()/(RAND_MAX + 1.);
Y[i] = rand()/(RAND_MAX + 1.);
}
start = clock();
matMul(X, Y, W, N);
end = clock();
elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
tnaive[count] = elapsed;
printf("naive = %5.4f\t\t",tnaive[count]);

start = clock();
strassenMul(X, Y, Z, N);
free(W);
free(Z);
free(Y);
free(X);
end = clock();
elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
tstrassen[count] = elapsed;
printf("strassen = %5.4f\n",tstrassen[count]);
}
printf("-------------------------------------------------------------------\n\n\n");

while (tnaive[idx+1] <= tstrassen[idx+1] && idx < 14) idx++;

printf("Optimum input size to switch from normal multiplication to Strassen's is above %d\n\n", idx);

printf("Please enter the size of array as a power of 2\n");
scanf("%d",&N);
A = malloc(N*N*sizeof(double));
B = malloc(N*N*sizeof(double));
C = malloc(N*N*sizeof(double));
if (A==NULL || B==NULL || C==NULL) {
printf("Out of memory (2)\n");
return 1;
}
srand((unsigned)time(NULL));
for (i=0; i<N*N; i++)
{
A[i] = rand()/(RAND_MAX + 1.);
B[i] = rand()/(RAND_MAX + 1.);
}

printf("------------------- Input Matrices A and B ---------------------------\n\n");
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
printf("%5.4f ",A[i*N+j]);
printf("\n");
}
printf("\n");

for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
printf("%5.4f ",B[i*N+j]);
printf("\n");
}
printf("\n------- Output matrix by Strassen's method after optimization -----------\n\n");

strassenMul(A, B, C, N);

for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
printf("%5.4f ",C[i*N+j]);
printf("\n");
}
free(C);
free(B);
free(A);
return(0);
}

void strassenMul(double *X, double *Y, double *Z, int m)
{
int row = 0, col = 0;
int n = m/2;
int i = 0, j = 0;
double *arr[arrs]; // each matrix mem ptr

if (m <= idx)
{
matMul(X, Y, Z, m);
return;
}
if (m == 1)
{
*Z = *X * *Y;
return;
}

for (i=0; i<arrs; i++) { // memory for arrays
arr[i] = malloc(n*n*sizeof(double));
if (arr[i] == NULL) {
printf("Out of memory (1)\n");
exit (1); // brutal
}
}

for (row = 0, i = 0; row < n; row++, i++)
{
for (col = 0, j = 0; col < n; col++, j++)
{
arr[x11][i*n+j] = X[row*m+col];
arr[y11][i*n+j] = Y[row*m+col];
}
for (col = n, j = 0; col < m; col++, j++)
{
arr[x12][i*n+j] = X[row*m+col];
arr[y12][i*n+j] = Y[row*m+col];
}
}

for (row = n, i = 0; row < m; row++, i++)
{
for (col = 0, j = 0; col < n; col++, j++)
{
arr[x21][i*n+j] = X[row*m+col];
arr[y21][i*n+j] = Y[row*m+col];
}
for (col = n, j = 0; col < m; col++, j++)
{
arr[x22][i*n+j] = X[row*m+col];
arr[y22][i*n+j] = Y[row*m+col];
}
}

// Calculating P1
matAdd(arr[x11], arr[x22], arr[S1], n);
matAdd(arr[y11], arr[y22], arr[S2], n);
strassenMul(arr[S1], arr[S2], arr[P1], n);

// Calculating P2
matAdd(arr[x21], arr[x22], arr[S3], n);
strassenMul(arr[S3], arr[y11], arr[P2], n);

// Calculating P3
matSub(arr[y12], arr[y22], arr[S4], n);
strassenMul(arr[x11], arr[S4], arr[P3], n);

// Calculating P4
matSub(arr[y21], arr[y11], arr[S5], n);
strassenMul(arr[x22], arr[S5], arr[P4], n);

// Calculating P5
matAdd(arr[x11], arr[x12], arr[S6], n);
strassenMul(arr[S6], arr[y22], arr[P5], n);

// Calculating P6
matSub(arr[x21], arr[x11], arr[S7], n);
matAdd(arr[y11], arr[y12], arr[S8], n);
strassenMul(arr[S7], arr[S8], arr[P6], n);

// Calculating P7
matSub(arr[x12], arr[x22], arr[S9], n);
matAdd(arr[y21], arr[y22], arr[S10], n);
strassenMul(arr[S9], arr[S10], arr[P7], n);

// Calculating C11
matAdd(arr[P1], arr[P4], arr[S11], n);
matSub(arr[S11], arr[P5], arr[S12], n);
matAdd(arr[S12], arr[P7], arr[C11], n);

// Calculating C12
matAdd(arr[P3], arr[P5], arr[C12], n);

// Calculating C21
matAdd(arr[P2], arr[P4], arr[C21], n);

// Calculating C22
matAdd(arr[P1], arr[P3], arr[S13], n);
matSub(arr[S13], arr[P2], arr[S14], n);
matAdd(arr[S14], arr[P6], arr[C22], n);

for (row = 0, i = 0; row < n; row++, i++)
{
for (col = 0, j = 0; col < n; col++, j++)
Z[row*m+col] = arr[C11][i*n+j];
for (col = n, j = 0; col < m; col++, j++)
Z[row*m+col] = arr[C12][i*n+j];
}
for (row = n, i = 0; row < m; row++, i++)
{
for (col = 0, j = 0; col < n; col++, j++)
Z[row*m+col] = arr[C21][i*n+j];
for (col = n, j = 0; col < m; col++, j++)
Z[row*m+col] = arr[C22][i*n+j];
}

for (i=0; i<arrs; i++)
free (arr[i]);
}

void matMul(double *A, double *B, double *C, int n)
{
int i = 0, j = 0, k = 0, row = 0, col = 0;
double sum;
for (i = 0; i < n; i++)
{
for (j = 0; j < n; j++)
{
sum = 0.0;
for (k = 0; k < n; k++)
{
sum += A[i*n+k] * B[k*n+j];
}
C[i*n+j] = sum;
}
}
}

void matAdd(double *A, double *B, double *C, int m)
{
int row = 0, col = 0;
for (row = 0; row < m; row++)
for (col = 0; col < m; col++)
C[row*m+col] = A[row*m+col] + B[row*m+col];
}

void matSub(double *A, double *B, double *C, int m)
{
int row = 0, col = 0;
for (row = 0; row < m; row++)
for (col = 0; col < m; col++)
C[row*m+col] = A[row*m+col] - B[row*m+col];
}

关于c - Strassen 矩阵乘法的内存管理,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/27908663/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com