2 * Double-precision erf(x) function.
4 * Copyright (c) 2020, Arm Limited.
5 * SPDX-License-Identifier: MIT
8 #include "math_config.h"
12 #define TwoOverSqrtPiMinusOne 0x1.06eba8214db69p-3
13 #define C 0x1.b0ac16p-1
14 #define PA __erf_data.erf_poly_A
15 #define NA __erf_data.erf_ratio_N_A
16 #define DA __erf_data.erf_ratio_D_A
17 #define NB __erf_data.erf_ratio_N_B
18 #define DB __erf_data.erf_ratio_D_B
19 #define PC __erf_data.erfc_poly_C
20 #define PD __erf_data.erfc_poly_D
21 #define PE __erf_data.erfc_poly_E
22 #define PF __erf_data.erfc_poly_F
24 /* Top 32 bits of a double. */
25 static inline uint32_t
28 return asuint64 (x) >> 32;
31 /* Fast erf implementation using a mix of
32 rational and polynomial approximations.
33 Highest measured error is 1.01 ULPs at 0x1.39956ac43382fp+0. */
37 /* Get top word and sign. */
38 uint32_t ix = top32 (x);
39 uint32_t ia = ix & 0x7fffffff;
40 uint32_t sign = ix >> 31;
42 /* Normalized and subnormal cases */
44 { /* a = |x| < 0.84375. */
49 { /* a < 2^(-1015). */
50 double y = fma (TwoOverSqrtPiMinusOne, x, x);
51 return check_uflow (y);
53 return x + TwoOverSqrtPiMinusOne * x;
59 { /* a < 0.5 - Use polynomial approximation. */
60 double r1 = fma (x2, PA[1], PA[0]);
61 double r2 = fma (x2, PA[3], PA[2]);
62 double r3 = fma (x2, PA[5], PA[4]);
63 double r4 = fma (x2, PA[7], PA[6]);
64 double r5 = fma (x2, PA[9], PA[8]);
71 return fma (r, x, x); /* This fma is crucial for accuracy. */
74 { /* 0.5 <= a < 0.84375 - Use rational approximation. */
75 double x4, x8, r1n, r2n, r1d, r2d, r3d;
77 r1n = fma (x2, NA[1], NA[0]);
79 r2n = fma (x2, NA[3], NA[2]);
81 r1d = fma (x2, DA[0], 1.0);
82 r2d = fma (x2, DA[2], DA[1]);
83 r3d = fma (x2, DA[4], DA[3]);
84 double P = r1n + x4 * r2n + x8 * NA[4];
85 double Q = r1d + x4 * r2d + x8 * r3d;
86 return fma (P / Q, x, x);
89 else if (ia < 0x3ff40000)
90 { /* 0.84375 <= |x| < 1.25. */
91 double a2, a4, a6, r1n, r2n, r3n, r4n, r1d, r2d, r3d, r4d;
92 double a = fabs (x) - 1.0;
93 r1n = fma (a, NB[1], NB[0]);
95 r1d = fma (a, DB[0], 1.0);
97 r2n = fma (a, NB[3], NB[2]);
99 r2d = fma (a, DB[2], DB[1]);
100 r3n = fma (a, NB[5], NB[4]);
101 r3d = fma (a, DB[4], DB[3]);
104 double P = r1n + a2 * r2n + a4 * r3n + a6 * r4n;
105 double Q = r1d + a2 * r2d + a4 * r3d + a6 * r4d;
111 else if (ia < 0x40000000)
112 { /* 1.25 <= |x| < 2.0. */
116 double r1 = fma (a, PC[1], PC[0]);
117 double r2 = fma (a, PC[3], PC[2]);
118 double r3 = fma (a, PC[5], PC[4]);
119 double r4 = fma (a, PC[7], PC[6]);
120 double r5 = fma (a, PC[9], PC[8]);
121 double r6 = fma (a, PC[11], PC[10]);
122 double r7 = fma (a, PC[13], PC[12]);
123 double r8 = fma (a, PC[15], PC[14]);
141 else if (ia < 0x400a0000)
142 { /* 2 <= |x| < 3.25. */
144 a = fma (0.5, a, -1.0);
146 double r1 = fma (a, PD[1], PD[0]);
147 double r2 = fma (a, PD[3], PD[2]);
148 double r3 = fma (a, PD[5], PD[4]);
149 double r4 = fma (a, PD[7], PD[6]);
150 double r5 = fma (a, PD[9], PD[8]);
151 double r6 = fma (a, PD[11], PD[10]);
152 double r7 = fma (a, PD[13], PD[12]);
153 double r8 = fma (a, PD[15], PD[14]);
154 double r9 = fma (a, PD[17], PD[16]);
173 else if (ia < 0x40100000)
174 { /* 3.25 <= |x| < 4.0. */
178 double r1 = fma (a, PE[1], PE[0]);
179 double r2 = fma (a, PE[3], PE[2]);
180 double r3 = fma (a, PE[5], PE[4]);
181 double r4 = fma (a, PE[7], PE[6]);
182 double r5 = fma (a, PE[9], PE[8]);
183 double r6 = fma (a, PE[11], PE[10]);
184 double r7 = fma (a, PE[13], PE[12]);
201 else if (ia < 0x4017a000)
202 { /* 4 <= |x| < 5.90625. */
204 a = fma (0.5, a, -2.0);
206 double r1 = fma (a, PF[1], PF[0]);
207 double r2 = fma (a, PF[3], PF[2]);
208 double r3 = fma (a, PF[5], PF[4]);
209 double r4 = fma (a, PF[7], PF[6]);
210 double r5 = fma (a, PF[9], PF[8]);
211 double r6 = fma (a, PF[11], PF[10]);
212 double r7 = fma (a, PF[13], PF[12]);
213 double r8 = fma (a, PF[15], PF[14]);
235 /* Special cases : erf(nan)=nan, erf(+inf)=+1 and erf(-inf)=-1. */
236 if (unlikely (ia >= 0x7ff00000))
237 return (double) (1.0 - (sign << 1)) + 1.0 / x;