c - Strassen 矩阵乘法的内存管理-6ren

c - Strassen 矩阵乘法的内存管理

转载作者：太空宇宙更新时间：2023-11-04 02:00:23

作为作业的一部分，我试图找出 Strassen 矩阵乘法和朴素乘法算法的交叉点。但同样，当矩阵变为 256x256 时，我无法继续。有人可以建议我适当的内存管理技术，以便能够处理更大的输入。

C语言代码如下:

#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<time.h>

void strassenMul(double* X, double* Y, double* Z, int m);
void matMul(double* A, double* B, double* C, int n);
void matAdd(double* A, double* B, double* C, int m);
void matSub(double* A, double* B, double* C, int m);

int idx = 0;

int main()
{
    int N;
    int count = 0;
    int i, j;
    clock_t start, end;
    double elapsed;

    int total = 15;
    double tnaive[total];
    double tstrassen[total];
    printf("-------------------------------------------------------------------------\n\n");
    for (count = 0; count < total; count++)
    {
        N = pow(2, count);
        printf("Matrix size = %2d\t",N);
        double X[N][N], Y[N][N], Z[N][N], W[N][N];
        srand(time(NULL));
        for (i = 0; i < N; i++)
        {
            for (j = 0; j < N; j++)
            {
                X[i][j] = rand()/(RAND_MAX + 1.);
                Y[i][j] = rand()/(RAND_MAX + 1.);
            }
        }
        start = clock();
        matMul((double *)X, (double *)Y, (double *)W, N);
        end = clock();
        elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
        tnaive[count] = elapsed;
        printf("naive = %5.4f\t\t",tnaive[count]);

        start = clock();
        strassenMul((double *)X, (double *)Y, (double *)Z, N);
        end = clock();
        elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
        tstrassen[count] = elapsed;
        printf("strassen = %5.4f\n",tstrassen[count]);
    }
    printf("-------------------------------------------------------------------\n\n\n");

    while (tnaive[idx+1] <= tstrassen[idx+1] && idx < 14) idx++;

    printf("Optimum input size to switch from normal multiplication to Strassen's is above %d\n\n", idx);

    printf("Please enter the size of array as a power of 2\n");
    scanf("%d",&N);
    double A[N][N], B[N][N], C[N][N];
    srand(time(NULL));
    for (i = 0; i < N; i++)
    {
        for (j = 0; j < N; j++)
        {
            A[i][j] = rand()/(RAND_MAX + 1.);
            B[i][j] = rand()/(RAND_MAX + 1.);
        }
    }

printf("------------------- Input Matrices A and B ---------------------------\n\n");
    for (i = 0; i < N; i++)
    {
        for (j = 0; j < N; j++)
            printf("%5.4f  ",A[i][j]);
        printf("\n");
    }
    printf("\n");

    for (i = 0; i < N; i++)
    {
        for (j = 0; j < N; j++)
            printf("%5.4f  ",B[i][j]);
        printf("\n");
    }
    printf("\n------- Output matrix by Strassen's method after optimization -----------\n\n");

    strassenMul((double *)A, (double *)B, (double *)C, N);

    for (i = 0; i < N; i++)
    {
        for (j = 0; j < N; j++)
            printf("%5.4f  ",C[i][j]);
        printf("\n");
    }
    return(0);
}

void strassenMul(double *X, double *Y, double *Z, int m)
{
    if (m <= idx)
    {
        matMul((double *)X, (double *)Y, (double *)Z, m);
        return;
    }
    if (m == 1)
    {
        *Z = *X * *Y;
        return;
    }
    int row = 0, col = 0;
    int n = m/2;
    int i = 0, j = 0;
    double x11[n][n], x12[n][n], x21[n][n], x22[n][n];
    double y11[n][n], y12[n][n], y21[n][n], y22[n][n];
    double P1[n][n], P2[n][n], P3[n][n], P4[n][n], P5[n][n], P6[n][n], P7[n][n];
    double C11[n][n], C12[n][n], C21[n][n], C22[n][n];
    double S1[n][n], S2[n][n], S3[n][n], S4[n][n], S5[n][n], S6[n][n], S7[n][n];
    double S8[n][n], S9[n][n], S10[n][n], S11[n][n], S12[n][n], S13[n][n], S14[n][n];

    for (row = 0, i = 0; row < n; row++, i++)
    {
        for (col = 0, j = 0; col < n; col++, j++)
        {
            x11[i][j] = *((X+row*m)+col);
            y11[i][j] = *((Y+row*m)+col);
        }
        for (col = n, j = 0; col < m; col++, j++)
        {
            x12[i][j] = *((X+row*m)+col);
            y12[i][j] = *((Y+row*m)+col);
        }
    }

    for (row = n, i = 0; row < m; row++, i++)
    {
        for (col = 0, j = 0; col < n; col++, j++)
        {
            x21[i][j] = *((X+row*m)+col);
            y21[i][j] = *((Y+row*m)+col);
        }
        for (col = n, j = 0; col < m; col++, j++)
        {
            x22[i][j] = *((X+row*m)+col);
            y22[i][j] = *((Y+row*m)+col);
        }
    }

    // Calculating P1
    matAdd((double *)x11, (double *)x22, (double *)S1, n);
    matAdd((double *)y11, (double *)y22, (double *)S2, n);
    strassenMul((double *)S1, (double *)S2, (double *)P1, n);

    // Calculating P2
    matAdd((double *)x21, (double *)x22, (double *)S3, n);
    strassenMul((double *)S3, (double *)y11, (double *)P2, n);

    // Calculating P3
    matSub((double *)y12, (double *)y22, (double *)S4, n);
    strassenMul((double *)x11, (double *)S4, (double *)P3, n);

    // Calculating P4
    matSub((double *)y21, (double *)y11, (double *)S5, n);
    strassenMul((double *)x22, (double *)S5, (double *)P4, n);

    // Calculating P5
    matAdd((double *)x11, (double *)x12, (double *)S6, n);
    strassenMul((double *)S6, (double *)y22, (double *)P5, n);

    // Calculating P6
    matSub((double *)x21, (double *)x11, (double *)S7, n);
    matAdd((double *)y11, (double *)y12, (double *)S8, n);
    strassenMul((double *)S7, (double *)S8, (double *)P6, n);

    // Calculating P7
    matSub((double *)x12, (double *)x22, (double *)S9, n);
    matAdd((double *)y21, (double *)y22, (double *)S10, n);
    strassenMul((double *)S9, (double *)S10, (double *)P7, n);

    // Calculating C11
    matAdd((double *)P1, (double *)P4, (double *)S11, n);
    matSub((double *)S11, (double *)P5, (double *)S12, n);
    matAdd((double *)S12, (double *)P7, (double *)C11, n);

    // Calculating C12
    matAdd((double *)P3, (double *)P5, (double *)C12, n);

    // Calculating C21
    matAdd((double *)P2, (double *)P4, (double *)C21, n);

    // Calculating C22
    matAdd((double *)P1, (double *)P3, (double *)S13, n);
    matSub((double *)S13, (double *)P2, (double *)S14, n);
    matAdd((double *)S14, (double *)P6, (double *)C22, n);

    for (row = 0, i = 0; row < n; row++, i++)
    {
        for (col = 0, j = 0; col < n; col++, j++)
            *((Z+row*m)+col) = C11[i][j];
        for (col = n, j = 0; col < m; col++, j++)
            *((Z+row*m)+col) = C12[i][j];
    }
    for (row = n, i = 0; row < m; row++, i++)
    {
        for (col = 0, j = 0; col < n; col++, j++)
            *((Z+row*m)+col) = C21[i][j];
        for (col = n, j = 0; col < m; col++, j++)
            *((Z+row*m)+col) = C22[i][j];
    }
}

void matMul(double *A, double *B, double *C, int n)
{
    int i = 0, j = 0, k = 0, row = 0, col = 0;
    double sum;
    for (i = 0; i < n; i++)
    {
        for (j = 0; j < n; j++)
        {
            sum = 0.0;
            for (k = 0; k < n; k++)
            {
                sum += *((A+i*n)+k) * *((B+k*n)+j);
            }
            *((C+i*n)+j) = sum;
        }
    }
}

void matAdd(double *A, double *B, double *C, int m)
{
    int row = 0, col = 0;
    for (row = 0; row < m; row++)
        for (col = 0; col < m; col++)
            *((C+row*m)+col) = *((A+row*m)+col) + *((B+row*m)+col);
}

void matSub(double *A, double *B, double *C, int m)
{
    int row = 0, col = 0;
    for (row = 0; row < m; row++)
        for (col = 0; col < m; col++)
            *((C+row*m)+col) = *((A+row*m)+col) - *((B+row*m)+col);
}

稍后添加 如果我尝试使用 malloc 语句进行内存分配，代码如下。但问题是它在朴素的矩阵乘法方法之后停止，甚至没有进行到 N=1 的 Strassen 方法。它显示了关闭程序的提示。

for (count = 0; count < total; count++)
{
    N = pow(2, count);
    printf("Matrix size = %2d\t",N);
    //double X[N][N], Y[N][N], Z[N][N], W[N][N];
    double **X, **Y, **Z, **W;
    X = malloc(N * sizeof(double*));
    if (X == NULL){
        perror("Failed malloc() in X");
        return 1;
    }
    Y = malloc(N * sizeof(double*));
            if (Y == NULL){
                perror("Failed malloc() in Y");
                return 1;
    }
    Z = malloc(N * sizeof(double*));
            if (Z == NULL){
                perror("Failed malloc() in Z");
                return 1;
    }
    W = malloc(N * sizeof(double*));
            if (W == NULL){
                perror("Failed malloc() in W");
                return 1;
    }
    for (j = 0; j < N; j++)
    {
        X[j] = malloc(N * sizeof(double*));
        if (X[j] == NULL){
            perror("Failed malloc() in X[j]");
            return 1;
        }
        Y[j] = malloc(N * sizeof(double*));
                    if (Y[j] == NULL){
                        perror("Failed malloc() in Y[j]");
                        return 1;
        }
        Z[j] = malloc(N * sizeof(double*));
                    if (Z[j] == NULL){
                        perror("Failed malloc() in Z[j]");
                        return 1;
        }
        W[j] = malloc(N * sizeof(double*));
                    if (W[j] == NULL){
                        perror("Failed malloc() in W[j]");
                        return 1;
        }
    }
    srand(time(NULL));
    for (i = 0; i < N; i++)
    {
        for (j = 0; j < N; j++)
        {
            X[i][j] = rand()/(RAND_MAX + 1.);
            Y[i][j] = rand()/(RAND_MAX + 1.);
        }
    }
    start = clock();
    matMul((double *)X, (double *)Y, (double *)W, N);
    end = clock();
    elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
    tnaive[count] = elapsed;
    printf("naive = %5.4f\t\t",tnaive[count]);

    start = clock();
    strassenMul((double *)X, (double *)Y, (double *)Z, N);
    end = clock();
    elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
    tstrassen[count] = elapsed;
    for (j = 0; j < N; j++)
    {
        free(X[j]);
        free(Y[j]);
        free(Z[j]);
        free(W[j]);
    }

    free(X); free(Y); free(Z); free(W);

    printf("strassen = %5.4f\n",tstrassen[count]);
}

最佳答案

我已经重写了答案。我之前逐行分配内存的答案不起作用，因为 OP 在传递给函数时已将二维数组转换为一维数组。这是我重新编写的代码，并进行了一些简化，例如将所有矩阵数组保持为一维。

我不确定 Strassen 的方法到底做了什么，尽管递归将矩阵维度减半。所以我想知道是否打算在访问传递的数组时使用 row*2 和 col*2。

我希望这些技术对您有用 - 即使它有效!所有矩阵数组现在都在堆上。

#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<time.h>

#define total   4       //15

void strassenMul(double* X, double* Y, double* Z, int m);
void matMul(double* A, double* B, double* C, int n);
void matAdd(double* A, double* B, double* C, int m);
void matSub(double* A, double* B, double* C, int m);

enum array { x11, x12, x21, x22, y11, y12, y21, y22,
    P1, P2, P3, P4, P5, P6, P7, C11, C12, C21, C22,
    S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, arrs };

int idx = 0;

int main()
{
    int N;
    int count = 0;
    int i, j;
    clock_t start, end;
    double elapsed;

    double tnaive[total];
    double tstrassen[total];
    double *X, *Y, *Z, *W, *A, *B, *C;

    printf("-------------------------------------------------------------------------\n\n");
    for (count = 0; count < total; count++)
    {
        N = (int)pow(2, count);
        printf("Matrix size = %2d\t",N);
        X = malloc(N*N*sizeof(double));
        Y = malloc(N*N*sizeof(double));
        Z = malloc(N*N*sizeof(double));
        W = malloc(N*N*sizeof(double));
        if (X==NULL || Y==NULL || Z==NULL || W==NULL) {
            printf("Out of memory (1)\n");
            return 1;
        }
        srand((unsigned)time(NULL));
        for (i=0; i<N*N; i++)
        {
            X[i] = rand()/(RAND_MAX + 1.);
            Y[i] = rand()/(RAND_MAX + 1.);
        }
        start = clock();
        matMul(X, Y, W, N);
        end = clock();
        elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
        tnaive[count] = elapsed;
        printf("naive = %5.4f\t\t",tnaive[count]);

        start = clock();
        strassenMul(X, Y, Z, N);
        free(W); 
        free(Z); 
        free(Y); 
        free(X); 
        end = clock();
        elapsed = ((double) (end - start))*100/ CLOCKS_PER_SEC;
        tstrassen[count] = elapsed;
        printf("strassen = %5.4f\n",tstrassen[count]);
    }
    printf("-------------------------------------------------------------------\n\n\n");

    while (tnaive[idx+1] <= tstrassen[idx+1] && idx < 14) idx++;

    printf("Optimum input size to switch from normal multiplication to Strassen's is above %d\n\n", idx);

    printf("Please enter the size of array as a power of 2\n");
    scanf("%d",&N);
    A = malloc(N*N*sizeof(double));
    B = malloc(N*N*sizeof(double));
    C = malloc(N*N*sizeof(double));
    if (A==NULL || B==NULL || C==NULL) {
        printf("Out of memory (2)\n");
        return 1;
    }
    srand((unsigned)time(NULL));
    for (i=0; i<N*N; i++)
    {
        A[i] = rand()/(RAND_MAX + 1.);
        B[i] = rand()/(RAND_MAX + 1.);
    }

    printf("------------------- Input Matrices A and B ---------------------------\n\n");
    for (i = 0; i < N; i++)
    {
        for (j = 0; j < N; j++)
            printf("%5.4f  ",A[i*N+j]);
        printf("\n");
    }
    printf("\n");

    for (i = 0; i < N; i++)
    {
        for (j = 0; j < N; j++)
            printf("%5.4f  ",B[i*N+j]);
        printf("\n");
    }
    printf("\n------- Output matrix by Strassen's method after optimization -----------\n\n");

    strassenMul(A, B, C, N);

    for (i = 0; i < N; i++)
    {
        for (j = 0; j < N; j++)
            printf("%5.4f  ",C[i*N+j]);
        printf("\n");
    }
    free(C); 
    free(B); 
    free(A); 
    return(0);
}

void strassenMul(double *X, double *Y, double *Z, int m)
{
    int row = 0, col = 0;
    int n = m/2;
    int i = 0, j = 0;
    double *arr[arrs];                      // each matrix mem ptr

    if (m <= idx)
    {
        matMul(X, Y, Z, m);
        return;
    }
    if (m == 1)
    {
        *Z = *X * *Y;
        return;
    }

    for (i=0; i<arrs; i++) {                // memory for arrays
        arr[i] = malloc(n*n*sizeof(double));
        if (arr[i] == NULL) {
            printf("Out of memory (1)\n");
            exit (1);                       // brutal
        }
    }

    for (row = 0, i = 0; row < n; row++, i++)
    {
        for (col = 0, j = 0; col < n; col++, j++)
        {
            arr[x11][i*n+j] = X[row*m+col];
            arr[y11][i*n+j] = Y[row*m+col];
        }
        for (col = n, j = 0; col < m; col++, j++)
        {
            arr[x12][i*n+j] = X[row*m+col];
            arr[y12][i*n+j] = Y[row*m+col];
        }
    }

    for (row = n, i = 0; row < m; row++, i++)
    {
        for (col = 0, j = 0; col < n; col++, j++)
        {
            arr[x21][i*n+j] = X[row*m+col];
            arr[y21][i*n+j] = Y[row*m+col];
        }
        for (col = n, j = 0; col < m; col++, j++)
        {
            arr[x22][i*n+j] = X[row*m+col];
            arr[y22][i*n+j] = Y[row*m+col];
        }
    }

    // Calculating P1
    matAdd(arr[x11], arr[x22], arr[S1], n);
    matAdd(arr[y11], arr[y22], arr[S2], n);
    strassenMul(arr[S1], arr[S2], arr[P1], n);

    // Calculating P2
    matAdd(arr[x21], arr[x22], arr[S3], n);
    strassenMul(arr[S3], arr[y11], arr[P2], n);

    // Calculating P3
    matSub(arr[y12], arr[y22], arr[S4], n);
    strassenMul(arr[x11], arr[S4], arr[P3], n);

    // Calculating P4
    matSub(arr[y21], arr[y11], arr[S5], n);
    strassenMul(arr[x22], arr[S5], arr[P4], n);

    // Calculating P5
    matAdd(arr[x11], arr[x12], arr[S6], n);
    strassenMul(arr[S6], arr[y22], arr[P5], n);

    // Calculating P6
    matSub(arr[x21], arr[x11], arr[S7], n);
    matAdd(arr[y11], arr[y12], arr[S8], n);
    strassenMul(arr[S7], arr[S8], arr[P6], n);

    // Calculating P7
    matSub(arr[x12], arr[x22], arr[S9], n);
    matAdd(arr[y21], arr[y22], arr[S10], n);
    strassenMul(arr[S9], arr[S10], arr[P7], n);

    // Calculating C11
    matAdd(arr[P1], arr[P4], arr[S11], n);
    matSub(arr[S11], arr[P5], arr[S12], n);
    matAdd(arr[S12], arr[P7], arr[C11], n);

    // Calculating C12
    matAdd(arr[P3], arr[P5], arr[C12], n);

    // Calculating C21
    matAdd(arr[P2], arr[P4], arr[C21], n);

    // Calculating C22
    matAdd(arr[P1], arr[P3], arr[S13], n);
    matSub(arr[S13], arr[P2], arr[S14], n);
    matAdd(arr[S14], arr[P6], arr[C22], n);

    for (row = 0, i = 0; row < n; row++, i++)
    {
        for (col = 0, j = 0; col < n; col++, j++)
            Z[row*m+col] = arr[C11][i*n+j];
        for (col = n, j = 0; col < m; col++, j++)
            Z[row*m+col] = arr[C12][i*n+j];
    }
    for (row = n, i = 0; row < m; row++, i++)
    {
        for (col = 0, j = 0; col < n; col++, j++)
            Z[row*m+col] = arr[C21][i*n+j];
        for (col = n, j = 0; col < m; col++, j++)
            Z[row*m+col] = arr[C22][i*n+j];
    }

    for (i=0; i<arrs; i++)
        free (arr[i]);
}

void matMul(double *A, double *B, double *C, int n)
{
    int i = 0, j = 0, k = 0, row = 0, col = 0;
    double sum;
    for (i = 0; i < n; i++)
    {
        for (j = 0; j < n; j++)
        {
            sum = 0.0;
            for (k = 0; k < n; k++)
            {
                sum += A[i*n+k] * B[k*n+j];
            }
            C[i*n+j] = sum;
        }
    }
}

void matAdd(double *A, double *B, double *C, int m)
{
    int row = 0, col = 0;
    for (row = 0; row < m; row++)
        for (col = 0; col < m; col++)
            C[row*m+col] = A[row*m+col] + B[row*m+col];
}

void matSub(double *A, double *B, double *C, int m)
{
    int row = 0, col = 0;
    for (row = 0; row < m; row++)
        for (col = 0; col < m; col++)
            C[row*m+col] = A[row*m+col] - B[row*m+col];
}

关于c - Strassen 矩阵乘法的内存管理，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/27908663/

文章推荐： c - typedef 结构中的指针

文章推荐： css - 导致空白和水平滚动条的图像

文章推荐： html - amp-img 和在媒体大小的上下文中定义大小

文章推荐： c++ - 链接时不包含宏定义

java - Strassen 算法的矩阵划分
假设我有一个 NxN 矩阵，其中充满 1 到 10 范围内的随机整数。现在我想打电话PROC(A(1:n/2, 1:n/2)+A(n/2+1:n, n/2+1:n)... 其中 n 是矩阵的大小。换句
c - Strassen 矩阵乘法的内存管理
作为作业的一部分，我试图找出 Strassen 矩阵乘法和朴素乘法算法的交叉点。但同样，当矩阵变为 256x256 时，我无法继续。有人可以建议我适当的内存管理技术，以便能够处理更大的输入。 C语言代
java - Strassen 的算法归零
我们的想法是创建一个计时器，该计时器将返回执行特定功能所需的时间。我坐下来编写了一个矩阵类和一个 Strass 函数，应该将我输入其中的值相乘。定时器函数工作正常，因为它返回执行 Strass 函数
algorithm - Strassen 的算法效率帮助
您好，我正在尝试提高 Strassen 算法的效率，但需要一些帮助。该算法的递归关系如下: A(n) = 7A(n/2)+18(n/2)^2, for n>1, A(1) = 0. 我已经解决了这个问
algorithm - Strassen 计算矩阵平方的方法有什么问题？
使用与 Strassen's 相同的方法仅 5 次乘法就足以计算矩阵的平方。如果 A[2][2] = [a, b, c, d]，则乘法为 a * a、d * d、b * (a + d)、c * (a
c - Strassen 奇数矩阵的优化静态填充
我正在尝试解决 Strassen 算法的奇数矩阵问题。我的实现在某个点截断递归，称之为 Q，然后切换到标准实现。因此，在进行静态填充时，我实际上不需要填充到 2 的下一个幂。我只需要填充到至少大于输入
algorithm - Strassen 的算法证明
我一直在阅读关于矩阵乘法的 Strassen 算法。正如 Cormen 在算法导论中提到的，该算法并不直观。但是，我很想知道是否存在任何严格的算法数学证明以及算法设计中实际采用的内容。我尝试在 G
algorithm - Strassen 算法不是最快的？
我从某处复制了 strassen 的算法，然后执行了它。这是输出 n = 256 classical took 360ms strassen 1 took 33609ms strassen2 took
algorithm - strassen 的矩阵乘法在哪里有用？
Strassen 的矩阵乘法算法仅比传统的 O(N^3) 算法略有改进。它具有更高的常数因子并且更难实现。考虑到这些缺点，strassens 算法是否真的有用，它是否在任何用于矩阵乘法的库中实现？此外
c++ - Strassen 算法中的递归
我想知道您将如何在 Strassen 算法中进行递归调用，以及它们究竟在哪里需要。我知道 7 个乘法器比 8 个乘法器更有效，但我对如何递归计算这些乘法器感到困惑。特别是，如果我们遵循分而治之的范式
python - Schonhage-Strassen 乘法实现错误
我正在尝试使用 NTT 实现 Schonhage-Strassen 乘法算法，但遇到了一个问题，即最终生成的向量实际上并不等于它应有的值。对于两个输入向量 a 和 b，每个向量由 N 个“数字”组成
algorithm - 带递归的 Strassen 子三次矩阵乘法算法
我很难构思如何实现 Strassen 版本的该算法。对于背景，我有以下迭代版本的伪代码: def Matrix(a,b): result = [] for i in range(0,
c - 如何提高此 Strassen 算法实现的速度？
这个问题不太可能帮助任何 future 的访问者；它只与一个小的地理区域、一个特定的时间点或一个非常狭窄的情况有关，这些情况并不普遍适用于互联网的全局受众。为了帮助使这个问题更广泛地适用，visit
algorithm - 交叉点 : Strassen's Algorithm
就效率而言，Strassen 算法应该停止递归并应用乘法的最佳交叉点是多少？我知道这与具体的实现和硬件密切相关，但对于一般情况应该有某种指南或某人的一些实验结果。在网上搜索了一下，问了一些他们认为
c++ - 任何输入的 strassen 算法
我们怎样才能改变 Strassen algorithm以便它适用于任何大小的矩阵(例如 n=5)？最佳答案您所要做的就是用 0 的行和列填充矩阵，直到它们成为大小为 2 的幂的方阵。或者换句话说:
c++ - Strassen-Winograd 算法
我接到了一项任务，要用 C++ 编写 Strassen-Winograd 算法。我已经写了两次，但我的代码的第一个版本不起作用。结果矩阵左下角的结果是正确的。我的第二个版本运行速度比原始算法慢，即使
c++ - 为什么 Strassen 矩阵乘法比标准矩阵乘法慢得多？
我用 C++、Python 和 Java 编写了矩阵乘法程序，并测试了它们对两个 2000 x 2000 矩阵相乘的速度(参见 post)。标准 ikj 实现 - 在中- 拍摄: C++:15 秒(
c++ - 为什么我的 Strassen 矩阵乘法很慢？
我用 C++ 编写了两个矩阵乘法程序:Regular MM (source) , 和 Strassen 的 MM (source) ，它们都在大小为 2^k x 2^k 的方阵上运行(换句话说，是偶数
python - Strassen 矩阵乘法——接近，但仍然存在错误
我正在尝试在 Python 中实现 Strassen 矩阵乘法。我已经让它发挥了一些作用。这是我的代码: a = [[1,1,1,1],[2,2,2,2],[3,3,3,3],[4,4,4,4]] b
python - python 实现中的 Strassen 算法错误
我通过 Strassen 算法和 Python 3 中的朴素嵌套 for 循环实现得到了不同的矩阵乘法输出。代码: def new_matrix(r, c): """Create a new

太空宇宙

个人简介

我是一名优秀的程序员,十分优秀！

作者热门文章

滴滴打车优惠券免费领取

全站热门文章

首页

博学

6Ren·AI

商城

c - Strassen 矩阵乘法的内存管理