c - 结构中的指针是否会减慢我的代码速度？-6ren

c - 结构中的指针是否会减慢我的代码速度？

转载作者：行者123 更新时间：2023-12-04 11:56:33

我正在寻找一些帮助或提示来加速我的代码。
我已经实现了一个例程，用于从一组球谐系数 C_{n,m} 和 S_{n,m} 计算空间中某个点 (r,phi,lambda) 的引力势。等式显示在下面的链接中:

并且包括与纬度 (phi) 相关的关联勒让德多项式 P_{n,m} 的递归计算，从前两个值 P{0,0} 和 P_{1,1} 开始。
起初，我将其作为 MATLAB C-MEX 代码实现，只有我代码的核心部分是用 C 语言编写的。我想做一个纯C程序，但发现代码运行速度慢了3-5倍，这让我想知道为什么。这可能是我定义结构并使用指向中央代码中的指针的方式吗？
看起来它是需要额外时间的核心计算部分，但该部分没有改变，尽管在我将指针直接传递给变量之前，现在我在结构内部使用指针。
任何帮助表示赞赏!
在下文中，我将尝试解释我的代码并展示一些摘录:
在程序开始时，我定义了三个结构。一个保存球谐系数，C_{n,m} 和 S_{n,m}, (ggm_struct)，一个保存计算坐标 (comp_struct)，一个保存结果 (func_struct):

// Define constant variables
const double deg2rad = M_PI/180.0;                 // degrees to radians conversion factor
const double sfac = 1.0000E-280;                   // scaling factor
const double sqrt2 = 1.414213562373095;            // sqrt(2)
const double sqrt3 = 1.732050807568877;            // sqrt(3)

// Define structure to hold geopotential model
struct ggm_struct {
    char   product_type[100], modelname[100], errors[100], norm[100], tide_system[100];
    double GM, R, *C, *S;
    int    max_degree, ncoef;
};
// Define structure to hold computation coordinates
struct comp_struct {
    double *lat, *lon, *h;
    double *r, *phi;
    int    nlat, nlon;
};
/* Define structure to hold results */
struct func_struct {
    double *rval;
    int    npoints;
};

然后我有一个(子)函数，它首先分配空间，然后从 ascii 文件加载系数作为

int read_gfc(char mfile[100], int *nmax, int *mmax, struct ggm_struct *ggm)
{

    // Set file identifier
    FILE *fid;

    // Declare variables
    char   str[200], var[100];
    int    n, m, nid, l00 = 0, l10 = 0, l11 = 0;
    double c, s;

    // Determine number of coefficients
    ggm->ncoef = (*nmax+2)*(*nmax+1)/2;

    // Allocate memory for coefficients
    ggm->C = (double*) malloc(ggm->ncoef*sizeof(double));
    if (ggm->C == NULL){
        printf("Error: Memory for C not allocated!");
        return -ENOMEM;
    }
    ggm->S = (double*) malloc(ggm->ncoef*sizeof(double));
    if (ggm->S == NULL){
        printf("Error: Memory for S not allocated!");
        return -ENOMEM;
    }

    // Open file
    fid = fopen(mfile,"r");

    // Check that file was opened correctly
    if (fid == NULL){
        printf("Error: opening file %s!",mfile);
        return -ENOMEM;
    }

    // Read file header
    while (fgets(str,200,fid) != NULL && strncmp(str,"end_of_head",11) != 0){

        // Extract model parameters
        if (strncmp(str,"product_type",12) == 0){ sscanf(str,"%s %s",var,ggm->product_type); }
        if (strncmp(str,"modelname",9) == 0){ sscanf(str,"%s %s",var,ggm->modelname); }
        if (strncmp(str,"earth_gravity_constant",22) == 0){ sscanf(str,"%s %lf",var,&ggm->GM); }
        if (strncmp(str,"radius",6) == 0){ sscanf(str,"%s %lf",var,&ggm->R); }
        if (strncmp(str,"max_degree",10) == 0){ sscanf(str,"%s %d",var,&ggm->max_degree); }
        if (strncmp(str,"errors",6) == 0){ sscanf(str,"%s %s",var,ggm->errors); }
        if (strncmp(str,"norm",4) == 0){ sscanf(str,"%s %s",var,ggm->norm); }
        if (strncmp(str,"tide_system",11) == 0){ sscanf(str,"%s %s",var,ggm->tide_system); }

    }

    // Read coefficients
    while (fgets(str,200,fid) != NULL){

        // Extract parameters
        sscanf(str,"%s %d %d %lf %lf",var,&n,&m,&c,&s);

        // Store parameters
        if (n <= *nmax && m <= *mmax) {

            // Determine index
            nid = (n+1)*n/2 + m;

            // Store values
            *(ggm->C+nid) = c;
            *(ggm->S+nid) = s;

        }

    }

    // Close fil
    fclose(fid);

    // Return from function
    return 0;

}

之后，计算网格由七个组件的数组定义。例如，数组 [-90 90 -180 180 1 1 0] 定义了从 -90 到 90 度纬度以 1 度增量和从 -180 到 180 度经度以 1 度增量的网格。高度为零。从这个数组，在(子)函数中生成一个计算网格:

int make_grid(double *grid, struct comp_struct *inp)
{

    // Declare variables
    int n;

    /* Echo routine */
    printf("Creating grid of coordinates\n");
    printf("             [lat1,lat2,dlat] = [%f,%f,%f]\n", *grid, *(grid+1), *(grid+4) );
    printf("             [lon1,lon2,dlon] = [%f,%f,%f]\n", *(grid+2), *(grid+3), *(grid+5) );
    printf("             h                = %f\n", *(grid+6));

    /* Latitude ------------------------------------------------------------- */

    // Determine number of increments
    inp->nlat = ceil( ( *(grid+1) - *grid + *(grid+4) ) / *(grid+4) );

    // Allocate memory
    inp->lat = (double*) malloc(inp->nlat*sizeof(double));
    if (inp->lat== NULL){
        printf("Error: Memory for LATITUDE (inp.lat) points not allocated!");
        return -ENOMEM;
    }

    // Fill in values
    *(inp->lat) = *(grid+1);
    for (n = 1; n < inp->nlat-1; n++) {
        *(inp->lat+n) = *(inp->lat+n-1) - *(grid+4);
    }
    *(inp->lat+inp->nlat-1) = *grid;

    /* Longitude ------------------------------------------------------------ */

    // Determine number of increments
    inp->nlon = ceil( ( *(grid+3) - *(grid+2) + *(grid+5) ) / *(grid+5) );

    // Allocate memory
    inp->lon = (double*) malloc(inp->nlon*sizeof(double));
    if (inp->lon== NULL){
        printf("Error: Memory for LONGITUDE (inp.lon) points not allocated!");
        return -ENOMEM;
    }

    // Fill in values
    *(inp->lon) = *(grid+2);
    for (n = 1; n < inp->nlon-1; n++) {
        *(inp->lon+n) = *(inp->lon+n-1) + *(grid+5);
    }
    *(inp->lon+inp->nlon-1) = *(grid+3);

    /* Height --------------------------------------------------------------- */

    // Allocate memory
    inp->h = (double*) malloc(inp->nlat*sizeof(double));
    if (inp->h== NULL){
        printf("Error: Memory for HEIGHT (inp.h) points not allocated!");
        return -ENOMEM;
    }

    // Fill in values
    for (n = 0; n < inp->nlat; n++) {
        *(inp->h+n) = *(inp->h+n-1) + *(grid+6);
    }

    // Return from function
    return 0;

}

然后将这些地理坐标转换为球面坐标以使用另一个(子)例程进行计算

int geo2sph(struct comp_struct *inp, int *lgrid)
{

    // Declare variables
    double a = 6378137.0, e2 = 6.69437999014E-3; /* WGS84 parameters */
    double x, y, z, sinlat, coslat, sinlon, coslon, R_E;
    int    i, j, nid;

    /* Allocate space ------------------------------------------------------- */
    
    // radius
    inp->r = (double*) malloc(inp->nlat*sizeof(double));
    if (inp->r== NULL){
        printf("Error: Memory for SPHERICAL DISTANCE (inp.r) points not allocated!");
        return -ENOMEM;
    }

    // phi
    inp->phi = (double*) malloc(inp->nlat*sizeof(double));
    if (inp->phi== NULL){
        printf("Error: Memory for SPHERICAL LATITUDE (inp.phi) points not allocated!");
        return -ENOMEM;
    }

    /* Loop over latitude =================================================== */
    for (i = 0; i < inp->nlat; i++) {

        // Compute sine and cosine of latitude
        sinlat = sin(*(inp->lat+i));
        coslat = cos(*(inp->lat+i));

        // Compute radius of curvature
        R_E = a / sqrt( 1.0 - e2*sinlat*sinlat );

        // Compute sine and cosine of longitude
        sinlon = sin(*(inp->lon));
        coslon = cos(*(inp->lon));

        // Compute rectangular coordinates
        x = ( R_E + *(inp->h+i) ) * coslat * coslon;
        y = ( R_E + *(inp->h+i) ) * coslat * sinlon;
        z = ( R_E*(1.0-e2) + *(inp->h+i) ) * sinlat;

        // Compute sqrt( x^2 + y^2 )
        R_E = sqrt( x*x + y*y );

        // Derive radial distance
        *(inp->r+i) = sqrt( R_E * R_E + z*z );

        // Derive spherical latitude
        if (R_E < 1) {
            if (z > 0) { *(inp->phi+i) = M_PI/2.0; }
            else { *(inp->phi+i) = -M_PI/2.0; }
        }
        else {
            *(inp->phi+i) = asin( z / *(inp->r+i) );
        }

    }

    // Return from function
    return 0;

}

最后，引力势是在自己的(子)函数内计算的。这是代码的核心部分，与 MATLAB C-MEX 函数或多或少相同。唯一的区别似乎是之前(在 MATLAB MEX 中)一切都被定义为(简单的)双变量 - 现在变量位于包含指针的结构内。

int gravpot(struct comp_struct *inp, struct ggm_struct *ggm, int *nmax,
    int *mmax, int *lgrid, struct func_struct *out)
{

    // Declare variables
    double GMr, ar, t, u, u2, arn, gnm, hnm, P, Pp1, Pp2, msum;
    double Pmm[*nmax+1], CPnm[*mmax+1], SPnm[*mmax+1];
    int i, j, n, m, id;

    // Allocate memory
    out->rval = (double*) malloc(inp->nlat*inp->nlon*sizeof(double));
    if (out->rval== NULL){
        printf("Error: Memory for OUTPUT (out.rval) not allocated!");
        return -ENOMEM;
    }

    /* Compute sectorial values of associated Legendre polynomials ========== */

    // Define seed values ( divided by u^m )
    Pmm[0] = sfac;
    Pmm[1] = sqrt3 * sfac;

    // Compute sectorial values, [1] eq. 13 and 28 ( divided by u^m )
    for (m = 2; m <= *nmax; m++) {
        Pmm[m] = sqrt( (2.0*m+1.0) / (2.0*m) ) * Pmm[m-1];
    }

    /* ====================================================================== */

    /* Loop over latitude =================================================== */
    for (i = 0; i < inp->nlat; i++) {
      
        // Compute ratios to be used in summation
        GMr = ggm->GM / *(inp->r+i);
        ar  = ggm->R / *(inp->r+i);

        /* ---------------------------------------------------------------------
         * Compute product of Legendre values and spherical harmonic coefficients.
         * Products of similar degree are summed together, resulting in mmax
         * values. The degree terms are latitude dependent, such that these mmax
         * sums are valid for every point with the same latitude.
         * The values of the associated Legendre polynomials, Pnm, are scaled by
         * sfac = 10^(-280) and divided by u^m in order to prevent underflow and
         * overflow of the coefficients.
         * ------------------------------------------------------------------ */

        // Form coefficients for Legendre recursive algorithm
        t   = sin(*(inp->phi+i));
        u   = cos(*(inp->phi+i));
        u2  = u * u;
        arn = ar;

        /* Degree n = 0 terms ----------------------------------------------- */

        // Compute order m = 0 term (S term is zero)
        CPnm[0] = Pmm[0] * *(ggm->C);

        /* Degree n = 1 terms ----------------------------------------------- */

        // Compute (1,1) terms, [1] eq. 3
        CPnm[1] = ar * Pmm[1] * *(ggm->C+2);
        SPnm[1] = ar * Pmm[1] * *(ggm->S+2);

        // Compute (1,0) Legendre value, [1] eq. 18 and 27
        P = t * Pmm[1];

        // Add (1,0) terms to sum (S term is zero), [1] eq. 3
        CPnm[0] = CPnm[0] + ar * P * *(ggm->C+1);

        /* Degree n = [2,n_max] --------------------------------------------- */
        for (n = 2; n <= *nmax; n++) {

            // Compute power term
            arn = arn * ar;

            /* Compute sectorial (m=n) terms ++++++++++++++++++++++++++++++++ */
        
            // Extract associated Legendre value
            Pp1 = Pmm[n];

            // Compute product terms, [1] eq. 3
            if (n <= *mmax) {
                id = (n+1)*n/2 + n;
                CPnm[n] = arn * Pp1 * *(ggm->C+id);
                SPnm[n] = arn * Pp1 * *(ggm->S+id);
            }

            /* Compute first non-sectorial terms (m=n-1) ++++++++++++++++++++ */

            // Compute associated Legendre value, [1] eq. 18 and 27
            gnm = sqrt( 2.0*n );
            P   = gnm * t * Pp1;
        
            // Add terms to summation, eq. 3 in [1]
            if (n-1 <= *mmax) {
                id = (n+1)*n/2 + n - 1;
                CPnm[n-1] = CPnm[n-1] + arn * P * *(ggm->C+id);
                SPnm[n-1] = SPnm[n-1] + arn * P * *(ggm->S+id);
            }

            /* Compute terms of order m = [n-2,1] +++++++++++++++++++++++++++ */
            for (m = n-2; m > 0; m--) {

                // Set previous values
                Pp2 = Pp1;
                Pp1 = P;
        
                // Compute associated Legendre value, [1] eq. 18, 19 and 27
                gnm = 2.0*(m+1.0) / sqrt( (n-m)*(n+m+1.0) );
                hnm = sqrt( (n+m+2.0)*(n-m-1.0)/(n-m)/(n+m+1.0) );
                P   = gnm * t * Pp1 - hnm * u2 * Pp2;

                // Add product terms to summation, eq. 3 in [1]
                if (m <= *mmax) {
                    id = (n+1)*n/2 + m;
                    CPnm[m] = CPnm[m] + arn * P * *(ggm->C+id);
                    SPnm[m] = SPnm[m] + arn * P * *(ggm->S+id);
                }

            }

            /* Compute zonal terms (m=0) ++++++++++++++++++++++++++++++++++++ */

            // Compute associated Legendre value, [1] eq. 18, 19 and 27
            gnm = 2.0 / sqrt( n*(n+1.0) );
            hnm = sqrt( (n+2.0)*(n-1.0)/n/(n+1.0) );
            P   = ( gnm * t * P - hnm * u2 * Pp1 ) / sqrt2;

            // Add terms to summation (S term is zero), [1] eq. 3
            id = (n+1)*n/2;
            CPnm[0] = CPnm[0] + arn * P * *(ggm->C+id);

        } /* ---------------------------------------------------------------- */

        /* Loop over longitude ============================================== */
        for (j = 0; j < inp->nlon; j++) {
        
            /* -----------------------------------------------------------------
             * All associated Legendre polynomials (latitude dependent) are now
             * computed and multiplied by the corresponding spherical harmonic
             * coefficient. These products are scaled by u^m, meaning that
             * Horner's scheme is used in the following summation.
             * -------------------------------------------------------------- */
      
            // Initialise order-dependent sum
            msum = 0.0;

            // Derive longitude id
            id = j + i * *lgrid;
            
            // Loop over order (m > 0)
            for (m = *mmax; m > 0; m--) {

                // Add to order-dependent sum using Horner's scheme, [1] eq. 2, 3 and 31
                msum = ( msum + cos( m * *(inp->lon+id) ) * CPnm[m] 
                    + sin( m * *(inp->lon+id) ) * SPnm[m] ) * u;
      
            }

            // Add zero order term to sum
            msum = msum + CPnm[0];

            // Rescale value into gravitational potential, [1] eq. 1
            *(out->rval+i+j*inp->nlat) = GMr * msum / sfac;
    
        } /* ================================================================ */

    } /* ==================================================================== */

    // Return from function
    return 0;

}

再次，非常感谢任何帮助，如果相关，可以提供其他信息，但这已经成为一个很长的帖子。我很难接受纯 c 代码的运行速度比 MATLAB C-MEX 代码慢。

最佳答案

简而言之，是的，指针可以防止某些编译器优化，从而导致潜在的减速。至少，对于 ICC 和 GCC 来说显然是这种情况。程序的性能受到 的强烈影响指针别名和矢量化 .
实际上，编译器无法轻易知道所提供的指针是否彼此别名或与所提供数据结构的某些字段的地址别名。结果，编译器趋于保守，并假设指向的值可以随时更改并经常重新加载它们。这可以防止优化，例如在 gravpot 中拆分某些循环。 (使用 GCC——见 this modified code 的第 119 行)。此外，间接和别名往往会阻止热循环的矢量化(即使用目标处理器提供的 SIMD 指令)。矢量化可以强烈影响代码的性能。
举个例子，here是geo2sph的初始代码和 here是一个稍微修改的实现。在第一种情况下，ICC 生成缓慢的标量实现，而在第二种情况下，ICC 生成明显更快的矢量化实现。两种实现的唯一区别是使用了 restrict 关键词。这个关键字告诉编译器，在指针的生命周期内，只有指针本身或直接派生自它的值(如指针+1)将用于访问它指向的对象(更多信息请参见 here信息)。注意使用restrict关键字是危险的，使用它时应该非常小心，因为如果限制提示错误(很难调试)，编译器可能会生成错误的代码。或者，您可以使用 帮助编译器生成矢量化代码。 OpenMP SIMD 指令 #pragma omp simd (结果见 here)。请注意，您应该确保目标代码可以安全地向量化(例如，的迭代必须是独立的)。

关于c - 结构中的指针是否会减慢我的代码速度？，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/68648181/

文章推荐： haskell - 索引单子(monad)的高阶编码如何工作？

文章推荐： c++ - 将指向部分构造对象的指针转换为指向基类的指针是否合法？

c - 我尝试理解 [c 代码 -> 汇编] 代码
我尝试理解[c代码 -> 汇编]代码 void node::Check( data & _data1, vector& _data2) { -> push ebp -> mov ebp,esp ->
c# - 在当前表单(代码)的上下文中从字符串动态运行 C# 代码
我需要在当前表单(代码)的上下文中运行文本文件中的代码。其中一项要求是让代码创建新控件并将其添加到当前窗体。例如，在Form1.cs中: using System.Windows.Forms; ..
c# - c++代码(malloc方法)到c#代码
我有此 C++ 代码并将其转换为 C# (.net Framework 4) 代码。有没有人给我一些关于 malloc、free 和 sprintf 方法的提示？ int monate = ee; d
C 代码，简单的 Web 服务器(代码 OK)
我的网络服务器代码有问题 #include #include #include #include #include #include #include int
html - 将特定列表元素置于斜体的 CSS 代码(不更改 html 代码)
给定以下 html 代码，将列表中的第三个元素(即“美丽”一词)以斜体显示的 CSS 代码是什么？当然，我可以给这个元素一个 id 或一个 class，但 html 代码必须保持不变。谢谢
javascript - 是否有一些库可用于 IQR 代码(不是 QR 代码)？
关闭。这个问题不符合Stack Overflow guidelines .它目前不接受答案。我们不允许提问寻求书籍、工具、软件库等的推荐。您可以编辑问题，以便用事实和引用来回答。关闭 7 年前。
macros - 在 Inno Setup [代码] 部分将宏扩展为 Pascal 代码
我试图制作一个宏来避免重复代码和注释。我试过这个: #define GrowOnPage(any Page, any Component) Component.Width := Page.Surfa
c# - 我正在尝试将我的旧 c++ 代码 "translate"转换为 c# 代码
我正在尝试将我的旧 C++ 代码“翻译”成头条新闻所暗示的 C# 代码。问题是我是 C# 中的新手，并不是所有的东西都像 C++ 中那样。在 C++ 中这些解决方案运行良好，但在 C# 中只是不能。我
r - 让 Visual Studio 代码(自动)格式化 R 代码
在 Windows 10 上工作，R 语言的格式化程序似乎没有在 Visual Studio Code 中完成它的工作。我试过R support for Visual Studio Code和 R-T
dynamic - 是否可以在 Python 脚本中生成和执行 Python 代码？ [动态 Python 代码]
我正在处理一些报告(计数)，我必须获取不同参数的计数。非常简单但乏味。一个参数的示例查询: qCountsEmployee = ( "select count(*) from %s wher
ios - 随机和偶然的网络错误(NSURLErrorDomain 代码=-1001 和 NSURLErrorDomain 代码=-1005)
最近几天我尝试从 d00m 调试网络错误。我开始用尽想法/线索，我希望其他 SO 用户拥有可能有用的宝贵经验。我希望能够提供所有相关信息，但我个人无法控制服务器环境。整个事情始于用户注意到我们应用程
javascript - visual studio 代码 intellisense 不适用于 dojo amd 代码
我有一个 app.js 文件，其中包含如下 dojo amd 模式代码: require(["dojo/dom", ..], function(dom){ dom.byId('someId').i
cuda - 'code=sm_X' 是否仅嵌入二进制(cubin)代码，或 PTX 代码，或两者？
我对“-gencode”语句中的“code=sm_X”选项有点困惑。一个例子:NVCC 编译器选项有什么作用 -gencode arch=compute_13,code=sm_13 嵌入库中？只有
javascript - 在 Javascript 下拉列表中添加 HTML 代码，Javascript 不评估 HTML 代码
我为我的表格使用 X-editable 框架。但是我有一些问题。 $(document).ready(function() { $('.access').editable({
python - 在 linux 中运行 flask/python 代码？基本 flask 代码
我一直在通过本教程学习 flask/python http://blog.miguelgrinberg.com/post/the-flask-mega-tutorial-part-i-hello-wo
vim - G 代码 M 代码 VI 和 EMACS 的 CNC 语法
我想将 Vim 和 EMACS 用于 CNC、G 代码和 M 代码。 Vim 或 EMACS 是否有任何语法或模式来处理这种类型的代码？最佳答案一些快速搜索使我找到了 this vim 和 thi
iphone - 寻找关于将 Pre-Storyboard 代码 (XCode4) 移动到 Storyboard 代码 (XCode5) 的教程
关闭。这个问题不符合Stack Overflow guidelines .它目前不接受答案。想改进这个问题？更新问题，使其成为 on-topic对于堆栈溢出。 7年前关闭。 Improve this
vim - 如何让 Vim 理解 *.md 文件包含 Markdown 代码，而不是 Modula-2 代码？
这个问题在这里已经有了答案: Enabling markdown highlighting in Vim (5 个回答) 6年前关闭。当我在 Vim 中编辑包含 Markdown 代码的 READM
ios - 错误域=AVFoundationErrorDomain 代码=-11800 "The operation could not be completed"{错误域=NSOSStatusErrorDomain 代码=-16976 "(null)"}
我正在 Swift3 iOS 中开发视频应用程序。基本上我必须将视频 Assets 和音频与淡入淡出效果合并为一个并将其保存到 iPhone 画廊。为此，我使用以下方法: private func d
jenkins - 无法通过 Jenkins 管道作业的 jenkinsfile 中的 Groovy 代码(或 java 代码)创建文件
pipeline { agent any stages { stage('Build') { steps { e

行者123

个人简介

我是一名优秀的程序员,十分优秀！

作者热门文章

滴滴打车优惠券免费领取

全站热门文章

首页

博学

6Ren·AI

商城

c - 结构中的指针是否会减慢我的代码速度？