在 C 中计算互补误差函数 erfcinv() 的反函数-6ren

在 C 中计算互补误差函数 erfcinv() 的反函数

转载作者：行者123 更新时间：2023-12-02 13:21:38

ISO C99 引入了erf()和erfc()函数及其精度特定的类似物，但令人费解的是没有添加它们的逆函数。互补误差函数的反函数 erfcinv ，在统计和概率计算中特别有用，例如作为标准正态分布的 CDF 的逆的构建 block ，normcdfinv .

我检查了document archive ISO/IEC JTC1/SC22/WG14 C 工作组的成员，目前仍不支持 erfcinv() ISO C18 中似乎也没有任何计划将其添加到目前正在制定的 ISO C2x 中。

C 程序员只能创建自己的实现。计算erfcinv()准确地说是具有挑战性的，而在数学上erfcinv (x) = erfinv (1 - x) ，当x时，这在有限精度浮点运算中具有较差的数值特性。接近于零。实现 4 ulp 误差范围(与英特尔 vector 数学库中 LA 配置文件的规范相同)似乎是一个合理的精度目标。如果计算允许大量使用常见处理器架构(例如 x86-64、ARM64、PowerPC 和 GPU)提供的融合乘加运算 (FMA)，这也将是有利的。最后，如果该算法适合使用某些架构中可用的快速倒数或倒数平方根计算，那么这将是有用的。

几年前，我在《应用晶体学杂志》上的一篇论文中确定了一个合适的候选人。通过纯单精度计算、FMA 和快速倒数的使用，我能够实现 3.18707 ulp 的最大误差。直到后来我才发现这篇论文的实现是基于 ACM 算法 602 使用的 Fortran 子例程 MERFI 的 C 翻译:

Theodore Fessler、William F. Ford、David A. Smith，“HURRY:标量序列和级数的加速算法”，ACM Transactions on Mathematical Software，卷。 9、第3期，1983年9月，第 355-357 页 ( online )

有什么好的算法可以实现单精度erfcinvf() ，考虑到上述设计目标？

最佳答案

不久前，我发布了一个answer展示如何用 C 语言编写精确且高效的逆误差函数 erfinvf() 的单精度实现。虽然数值问题阻止我们从 计算 erfcinvf erfinvf 在其整个受支持的输入域 [0,2] 上，我们可以重复使用之前针对中心区域中的 erfcinvf() 的答案中的核心近似值 [2.1875e- 3, 1.998125] 对输入参数进行简单的转换。这增加了不到一半的 ulp的错误。

关于 erfcinvf() 尾部的计算，我们观察到函数对称性使我们能够将近似值限制为 [0, 2.1875e-3)，并且对于此区间 erfcinv(x) 非常粗略 sqrt (-log(x)) 。使用它来转换输入参数会产生一个相当容易用多项式近似的函数。极小极大近似可以借助 Remez 算法(正如我在此处所做的那样)或使用常见的数学软件(如 Maple 或 Mathematica)来制作。

在大多数平台上，sqrtf() 直接映射到根据 IEEE-754 或等效软件模拟计算正确舍入平方根的硬件指令。 logf() 的实现之间通常存在更多可变性，因此在下面的示例性 C 代码中，我添加了自己的实现 my_logf() 以帮助再现性。但是，logf() 的任何忠实舍入实现(即误差界限为 1 ulp)应该会产生与下面的 erfcinvf() 代码非常相似的精度，实现了 3.13 ulp 错误界限。除了纯 C 实现之外，我还演示了如何利用 SSE 的倒数和倒数平方根功能来实现 erfcinvf()。

#include <math.h>
#include <stdint.h>

#define PORTABLE  (1)

float my_logf (float a);
#if !PORTABLE
#include "immintrin.h"
float sse_recipf (float a);
float sse_rsqrtf (float a);
#endif // !PORTABLE

/* Compute inverse of the complementary error function. For the central region,
   re-use the polynomial approximation for erfinv. For the tail regions, use an
   approximation based on the observation that erfcinv(x) is very approximately
   sqrt(-log(x)).

   PORTABLE=1 max. ulp err. = 3.12017
   PORTABLE=0 max. ulp err. = 3.13523
*/
float my_erfcinvf (float a)
{
    float r;

    if ((a >= 2.1875e-3f) && (a <= 1.998125f)) { // max. ulp err. = 2.77667
        float p, t;
        t = fmaf (-a, a, a + a);
        t = my_logf (t);
        p =              5.43877832e-9f;  //  0x1.75c000p-28 
        p = fmaf (p, t,  1.43286059e-7f); //  0x1.33b458p-23 
        p = fmaf (p, t,  1.22775396e-6f); //  0x1.49929cp-20 
        p = fmaf (p, t,  1.12962631e-7f); //  0x1.e52bbap-24 
        p = fmaf (p, t, -5.61531961e-5f); // -0x1.d70c12p-15 
        p = fmaf (p, t, -1.47697705e-4f); // -0x1.35be9ap-13 
        p = fmaf (p, t,  2.31468701e-3f); //  0x1.2f6402p-9 
        p = fmaf (p, t,  1.15392562e-2f); //  0x1.7a1e4cp-7 
        p = fmaf (p, t, -2.32015476e-1f); // -0x1.db2aeep-3 
        t = fmaf (p, t,  8.86226892e-1f); //  0x1.c5bf88p-1 
        r = fmaf (t, -a, t);
    } else {
        float p, q, s, t;
        t = (a >= 1.0f) ? (2.0f - a) : a;
        t = 0.0f - my_logf (t);
#if PORTABLE
        s = sqrtf (1.0f / t);
#else // PORTABLE
        s = sse_rsqrtf (t);
#endif // PORTABLE
        p =              2.23100796e+1f;  //  0x1.64f616p+4
        p = fmaf (p, s, -5.23008537e+1f); // -0x1.a26826p+5
        p = fmaf (p, s,  5.44409714e+1f); //  0x1.b3871cp+5
        p = fmaf (p, s, -3.35030403e+1f); // -0x1.0c063ap+5
        p = fmaf (p, s,  1.38580027e+1f); //  0x1.bb74c2p+3
        p = fmaf (p, s, -4.37277269e+0f); // -0x1.17db82p+2
        p = fmaf (p, s,  1.53075826e+0f); //  0x1.87dfc6p+0
        p = fmaf (p, s,  2.97993328e-2f); //  0x1.e83b76p-6
        p = fmaf (p, s, -3.71997419e-4f); // -0x1.86114cp-12
        p = fmaf (p, s, s);
#if PORTABLE
        r = 1.0f / p;
#else // PORTABLE
        r = sse_recipf (p);
        if (t == INFINITY) r = t;
#endif // PORTABLE
        if (a >= 1.0f) r = 0.0f - r;
    }
    return r;
}

/* Compute inverse of the CDF of the standard normal distribution.
   max ulp err = 4.08385
*/
float my_normcdfinvf (float a)
{
    return fmaf (-1.41421356f, my_erfcinvf (a + a), 0.0f);
}

/* natural logarithm. max ulp err = 0.85089 */
float my_logf (float a)
{
    float m, r, s, t, i, f;
    int32_t e;
    const float cutoff_f = 0.666666667f;
    if ((a > 0.0f) && (a <=  0x1.fffffep+127f)) { // 3.40282347e+38
        m = frexpf (a, &e);
        if (m < cutoff_f) {
            m = m + m;
            e = e - 1;
        }
        i = (float)e;
        f = m - 1.0f;
        s = f * f;
        /* Compute log1p(f) for f in [-1/3, 1/3] */
        r =             -0x1.0ae000p-3f;  // -0.130310059
        t =              0x1.208000p-3f;  //  0.140869141
        r = fmaf (r, s, -0x1.f1988ap-4f); // -0.121483363
        t = fmaf (t, s,  0x1.1e5740p-3f); //  0.139814854
        r = fmaf (r, s, -0x1.55b36ep-3f); // -0.166846141
        t = fmaf (t, s,  0x1.99d8b2p-3f); //  0.200120345
        r = fmaf (r, s, -0x1.fffe02p-3f); // -0.249996200
        r = fmaf (t, f, r);
        r = fmaf (r, f,  0x1.5554fap-2f); //  0.333331972
        r = fmaf (r, f, -0x1.000000p-1f); // -0.500000000
        r = fmaf (r, s, f);
        r = fmaf (i, 0x1.62e430p-01f, r); //  0.693147182 // log(2)
    } else {
        r = a + a;  // silence NaNs if necessary
        if (a  < 0.0f) r = 0.0f/0.0f; // QNaN INDEFINITE
        if (a == 0.0f) r = -INFINITY; // -INF
    }
    return r;
}

float sse_recipf (float a)
{
    __m128 t;
    float e, r;
    t = _mm_set_ss (a);
    t = _mm_rcp_ss (t);
    _mm_store_ss (&r, t);
    e = fmaf (0.0f - a, r, 1.0f);
    e = fmaf (e, e, e);
    r = fmaf (e, r, r);
    return r;
}


float sse_rsqrtf (float a)
{
    __m128 t;
    float e, r;
    t = _mm_set_ss (a);
    t = _mm_rsqrt_ss (t);
    _mm_store_ss (&r, t);
    e = fmaf (0.0f - a, r * r, 1.0f);
    r = fmaf (fmaf (0.375f, e, 0.5f), e * r, r);
    return r;
}

关于在 C 中计算互补误差函数 erfcinv() 的反函数，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/60472139/

文章推荐： xaml - TextBlock 中的多种颜色

文章推荐： perl - 在提示用户输入时如何提供可编辑的默认值？

r - 如何使用 ggplot 绘制反向(互补)ecdf？
我目前使用 stat_ecdf 来绘制累积频率图。这是我使用的代码 cumu_plot <- ggplot(house_total_year, aes(download_speed, col
python - pandas 操作是否与 groupby 互补(相反)？
我有一个包含很多列的表(数据框)。现在我想对其中一列的平均值进行计算。这意味着我需要对所有列进行分组，但我需要对其进行平均的列除外。我当然可以写: df.groupby(['col1', 'col2'

行者123

个人简介

我是一名优秀的程序员,十分优秀！

作者热门文章

滴滴打车优惠券免费领取

全站热门文章

首页

博学

6Ren·AI

商城

在 C 中计算互补误差函数 erfcinv() 的反函数