gpt4 book ai didi

c - mex 函数执行时间的巨大差异取决于内存分配的位置

转载 作者:行者123 更新时间:2023-11-30 16:41:31 25 4
gpt4 key购买 nike

我有一个 Matlab mex 函数,它重复调用名为 calculate(). 的 C 函数我制作了两个版本的函数:

版本A:每次mex()来电 calculate() ,它只传递输入参数,以及calculate()所需的所有内存。在 calculate() 内分配和释放 - 每一次!

版本B:calculate()所需的内存分配在 mex() 的开头,并且指针被传递到 calculate() 。仅在 mex() 结束时释放内存。换句话说,整个业务的分配/释放只进行一次。

我创建版本 B 的天真的假设是,根据内存分配需要时间的概念,这应该会提高速度。但实际上它的执行时间增加了大约 5 倍!那里发生了什么?

这是版本 B 的实际代码 - 您可以从注释中看到我最初为版本 A 分配的内存。

/* MEX business*/
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]){
/* mexPrintf("Inside the mex function of CalcMz2.\n");
mexEvalString("drawnow;");*/

/*declare local variables*/
struct InputParameters voxelPars;
InputParameters *point2pars;
struct Zcontrast contrast;
struct Zcontrast* ptr2contrast;
ptr2contrast = &contrast;
mxArray *in, *out;
mxArray *temp;
double *output;
int nFields;
int i,j,k;

in = mxDuplicateArray(prhs[0]); /*'in' now is a copy of the input struct*/
nFields = mxGetNumberOfFields(prhs[0]);
if (nFields != 39) mexPrintf("Error: the number of fields in the input struct is incorrect.\n");

//associate outputs
out = plhs[0] = mxCreateDoubleScalar(0.0);
output = mxGetPr(out);

/*Passed variables memory allocation*/
double **pntrA;
pntrA = (double **)mxCalloc(18, sizeof(double*));
for (i=0; i <18; i++){
pntrA[i] = (double *) mxCalloc(18, sizeof(double));
}
double *pntrAinvB;
pntrAinvB = (double *) mxCalloc(18, sizeof(double));


/*used by MatrInv()*/
double **CopyOfMatrix;
CopyOfMatrix = (double**)mxCalloc(18, sizeof(double*));
for (i=0; i<18; i++){
CopyOfMatrix[i] = (double*)mxCalloc(18, sizeof(double));
}

double *vector;
double *col;
int *indx;
vector = (double *)mxCalloc(18, sizeof(double));
indx = (int *)mxCalloc(18, sizeof(int));
col = (double *)mxCalloc(18, sizeof(double));

/*used by CalculateY()*/
double *Aat;
Aat = (double*)mxCalloc(18*18, sizeof(double));

double *Aate;
Aate = (double*)mxCalloc(18*18, sizeof(double));

double *sum;
sum = (double*)mxCalloc(18, sizeof(double));
double *product;
product = (double*)mxCalloc(18, sizeof(double));

/*Reassign values passed from input struct*/
temp = mxGetFieldByNumber(prhs[0],0,0);
voxelPars.ampl = mxGetPr(temp);
temp = mxGetFieldByNumber(prhs[0],0,1);
voxelPars.phi = mxGetPr(temp);
temp = mxGetFieldByNumber(prhs[0],0,2);
voxelPars.count1 = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,3);
voxelPars.pw1 = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,4);
voxelPars.b1 = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,5);
voxelPars.puloffsetppm = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,6);
voxelPars.pw1dc = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,7);
voxelPars.cf = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,8);
voxelPars.M0w = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,9);
voxelPars.T1a = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,10);
voxelPars.T2a = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,11);
voxelPars.offsetappm = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,12);
voxelPars.bwfraction = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,13);
voxelPars.M0a = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,14);
voxelPars.M0bw = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,15);
voxelPars.T1bw = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,16);
voxelPars.T2bw = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,17);
voxelPars.offsetbwppm = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,18);
voxelPars.exratebw = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,19);
voxelPars.M0b = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,20);
voxelPars.T1b = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,21);
voxelPars.T2b = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,22);
voxelPars.offsetbppm = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,23);
voxelPars.exrateb = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,24);
voxelPars.M0c = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,25);
voxelPars.T1c = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,26);
voxelPars.T2c = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,27);
voxelPars.offsetcppm = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,28);
voxelPars.exratec = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,29);
voxelPars.M0d = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,30);
voxelPars.T1d = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,31);
voxelPars.T2d = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,32);
voxelPars.offsetdppm = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,33);
voxelPars.exrated = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,34);
voxelPars.M0e = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,35);
voxelPars.T1e = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,36);
voxelPars.T2e = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,37);
voxelPars.offseteppm = mxGetScalar(temp);
temp = mxGetFieldByNumber(prhs[0],0,38);
voxelPars.exratee = mxGetScalar(temp);

point2pars = &voxelPars;


/*Initialize contrast's values to zero*/
ptr2contrast->SimMza = ptr2contrast->SimMzb = ptr2contrast->contrastz = 1.0;

/* call calculate()*/
calculate(point2pars, ptr2contrast, pntrA, pntrAinvB, CopyOfMatrix,vector, col, indx, Aat, Aate, sum, product);
*output = ptr2contrast->contrastz;
//mexPrintf("The contrast has been calculated as :%f.\n", ptr2contrast->contrastz);


/*Free memory
mxFree(pntrA); mxFree(pntrAinvB);*/
mxFree(CopyOfMatrix); mxFree(Aat); mxFree(Aate);
mxFree(vector);mxFree(indx);mxFree(col);
mxFree(sum); mxFree(product);
}//end mex function



void calculate (struct InputParameters *voxelPars1, struct Zcontrast *contrast, double **pntrA,double *pntrAinvB, double **CopyOfMatrix, double *vector, double *col,int *indx, double *Aat, double *Aate, double *sum, double *product){

/*Local variables*/
int i, j, k, n1, m;
double pwms, pw1delay;
int npul;
double timeStepSize;
double W;
double Wa, Wbw, Wb, Wc, Wd, We;
double Cbw, Cb, Cc, Cd, Ce;
double M0a, M0bw, M0b, M0c, M0d, M0e; //declaring these as locals just to make life easier
double pa, pbw, pb, pc, pd, pe;
double Cabw, Cab, Cac, Cad, Cae;
double k1a, k1bw, k1b, k1c, k1d, k1e;
double k2a, k2bw, k2b, k2c, k2d, k2e;

// double **pntrA;
// pntrA = (double **)mxCalloc(18, sizeof(double*));
// for (i=0; i <18; i++){
// pntrA[i] = (double *) mxCalloc(18, sizeof(double));


// }
//
double AdinvB[18];
double AinvB[18];
for(i=0; i<18; i++){
AdinvB[i] = 0.0;
AinvB[i] = 0.0;
}
//
// /*Passed variables memory allocation*/
// double **CopyOfMatrix; /*used by MatriInv()*/
// CopyOfMatrix = (double**)mxCalloc(18, sizeof(double*));
// for (i=0; i<18; i++){
// CopyOfMatrix[i] = (double*)mxCalloc(18, sizeof(double));
// }
//
// double *vector;
// double *col;
// int *indx;
// vector = (double *)mxCalloc(18, sizeof(double));
// indx = (int *)mxCalloc(18, sizeof(int));
// col = (double *)mxCalloc(18, sizeof(double));
//
// /*used by CalculateY()*/
// double *Aat;
// Aat = (double*)mxCalloc(18*18, sizeof(double));
// /* for (i=0; i<18; i++){
// Aat[i] = (double*)mxCalloc(18, sizeof(double));
// }*/
//
// double *Aate;
// Aate = (double*)mxCalloc(18*18, sizeof(double));
// /* for (i=0; i<18; i++){
// Aate[i] = (double*)mxCalloc(18, sizeof(double));
// } */
//
// double *sum;
// sum = (double*)mxCalloc(18, sizeof(double));
// double *product;
// product = (double*)mxCalloc(18, sizeof(double));

double W1[voxelPars1->count1]; //Now THIS might require a pointer, since count1 is only passed in the argument struct. We'll see.
double W1x[voxelPars1->count1];
double W1y[voxelPars1->count1];
/* mexPrintf("Declared all local variables in calculate().\n");
mexEvalString("drawnow;");*/

/*Fill local variables with values from input struct voxelPars*/

....计算之后继续进行

最佳答案

我最近也遇到了类似的问题。虽然我对 matlab 内存管理工具没有任何了解,但我可以说,对我来说,切换到 C 原生 malloc/free 大约可实现 5 倍的加速。

看来在你的情况下,也不需要管理Matlab堆上的内存(使用mxCalloc和类似的) - 因为内存分配仅供mex函数内部使用。建议您尝试一下:只需将 mxCalloc 替换为 malloc,将 mxFree 替换为 free 即可。

关于c - mex 函数执行时间的巨大差异取决于内存分配的位置,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/46247567/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com