]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - lib/msun/src/e_rem_pio2.c
Optimize the 9pi/2 < |x| <= 2**19pi/2 case on amd64 and i386 by avoiding
[FreeBSD/FreeBSD.git] / lib / msun / src / e_rem_pio2.c
1
2 /* @(#)e_rem_pio2.c 1.4 95/01/18 */
3 /*
4  * ====================================================
5  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
6  *
7  * Developed at SunSoft, a Sun Microsystems, Inc. business.
8  * Permission to use, copy, modify, and distribute this
9  * software is freely granted, provided that this notice 
10  * is preserved.
11  * ====================================================
12  *
13  */
14
15 #include <sys/cdefs.h>
16 __FBSDID("$FreeBSD$");
17
18 /* __ieee754_rem_pio2(x,y)
19  * 
20  * return the remainder of x rem pi/2 in y[0]+y[1] 
21  * use __kernel_rem_pio2()
22  */
23
24 #include <float.h>
25
26 #include "math.h"
27 #include "math_private.h"
28
29 static const int32_t npio2_hw[] = {
30 0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
31 0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
32 0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
33 0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
34 0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
35 0x404858EB, 0x404921FB,
36 };
37
38 /*
39  * invpio2:  53 bits of 2/pi
40  * pio2_1:   first  33 bit of pi/2
41  * pio2_1t:  pi/2 - pio2_1
42  * pio2_2:   second 33 bit of pi/2
43  * pio2_2t:  pi/2 - (pio2_1+pio2_2)
44  * pio2_3:   third  33 bit of pi/2
45  * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
46  */
47
48 static const double
49 zero =  0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
50 half =  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
51 two24 =  1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
52 invpio2 =  6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
53 pio2_1  =  1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
54 pio2_1t =  6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
55 pio2_2  =  6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
56 pio2_2t =  2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
57 pio2_3  =  2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
58 pio2_3t =  8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
59
60 #ifdef INLINE_REM_PIO2
61 extern inline
62 #endif
63 int
64 __ieee754_rem_pio2(double x, double *y)
65 {
66         double z,w,t,r,fn;
67         double tx[3];
68         int32_t e0,i,j,nx,n,ix,hx;
69         u_int32_t low;
70
71         GET_HIGH_WORD(hx,x);            /* high word of x */
72         ix = hx&0x7fffffff;
73 #if 0 /* Must be handled in caller. */
74         if(ix<=0x3fe921fb)   /* |x| ~<= pi/4 , no need for reduction */
75             {y[0] = x; y[1] = 0; return 0;}
76 #endif
77         if (ix <= 0x400f6a7a) {         /* |x| ~<= 5pi/4 */
78             if ((ix & 0xfffff) == 0x921fb)  /* |x| ~= pi/2 or 2pi/2 */
79                 goto medium;            /* cancellation -- use medium case */
80             if (ix <= 0x4002d97c) {     /* |x| ~<= 3pi/4 */
81                 if (hx > 0) {
82                     z = x - pio2_1;     /* one round good to 85 bits */
83                     y[0] = z - pio2_1t;
84                     y[1] = (z-y[0])-pio2_1t;
85                     return 1;
86                 } else {
87                     z = x + pio2_1;
88                     y[0] = z + pio2_1t;
89                     y[1] = (z-y[0])+pio2_1t;
90                     return -1;
91                 }
92             } else {
93                 if (hx > 0) {
94                     z = x - 2*pio2_1;
95                     y[0] = z - 2*pio2_1t;
96                     y[1] = (z-y[0])-2*pio2_1t;
97                     return 2;
98                 } else {
99                     z = x + 2*pio2_1;
100                     y[0] = z + 2*pio2_1t;
101                     y[1] = (z-y[0])+2*pio2_1t;
102                     return -2;
103                 }
104             }
105         }
106         if (ix <= 0x401c463b) {         /* |x| ~<= 9pi/4 */
107             if (ix <= 0x4015fdbc) {     /* |x| ~<= 7pi/4 */
108                 if (ix == 0x4012d97c)   /* |x| ~= 3pi/2 */
109                     goto medium;
110                 if (hx > 0) {
111                     z = x - 3*pio2_1;
112                     y[0] = z - 3*pio2_1t;
113                     y[1] = (z-y[0])-3*pio2_1t;
114                     return 3;
115                 } else {
116                     z = x + 3*pio2_1;
117                     y[0] = z + 3*pio2_1t;
118                     y[1] = (z-y[0])+3*pio2_1t;
119                     return -3;
120                 }
121             } else {
122                 if (ix == 0x401921fb)   /* |x| ~= 4pi/2 */
123                     goto medium;
124                 if (hx > 0) {
125                     z = x - 4*pio2_1;
126                     y[0] = z - 4*pio2_1t;
127                     y[1] = (z-y[0])-4*pio2_1t;
128                     return 4;
129                 } else {
130                     z = x + 4*pio2_1;
131                     y[0] = z + 4*pio2_1t;
132                     y[1] = (z-y[0])+4*pio2_1t;
133                     return -4;
134                 }
135             }
136         }
137         if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
138 medium:
139             t  = fabs(x);
140 #ifdef HAVE_EFFICIENT_IRINT
141             /* Use a specialized rint() to get fn.  Assume round-to-nearest. */
142             STRICT_ASSIGN(double,fn,t*invpio2+0x1.8p52);
143             fn = fn-0x1.8p52;
144             n  = irint(fn);
145 #else
146             n  = (int32_t) (t*invpio2+half);
147             fn = (double)n;
148 #endif
149             r  = t-fn*pio2_1;
150             w  = fn*pio2_1t;    /* 1st round good to 85 bit */
151             if(n<32&&ix!=npio2_hw[n-1]) {       
152                 y[0] = r-w;     /* quick check no cancellation */
153             } else {
154                 u_int32_t high;
155                 j  = ix>>20;
156                 y[0] = r-w; 
157                 GET_HIGH_WORD(high,y[0]);
158                 i = j-((high>>20)&0x7ff);
159                 if(i>16) {  /* 2nd iteration needed, good to 118 */
160                     t  = r;
161                     w  = fn*pio2_2;     
162                     r  = t-w;
163                     w  = fn*pio2_2t-((t-r)-w);  
164                     y[0] = r-w;
165                     GET_HIGH_WORD(high,y[0]);
166                     i = j-((high>>20)&0x7ff);
167                     if(i>49)  { /* 3rd iteration need, 151 bits acc */
168                         t  = r; /* will cover all possible cases */
169                         w  = fn*pio2_3; 
170                         r  = t-w;
171                         w  = fn*pio2_3t-((t-r)-w);      
172                         y[0] = r-w;
173                     }
174                 }
175             }
176             y[1] = (r-y[0])-w;
177             if(hx<0)    {y[0] = -y[0]; y[1] = -y[1]; return -n;}
178             else         return n;
179         }
180     /* 
181      * all other (large) arguments
182      */
183         if(ix>=0x7ff00000) {            /* x is inf or NaN */
184             y[0]=y[1]=x-x; return 0;
185         }
186     /* set z = scalbn(|x|,ilogb(x)-23) */
187         GET_LOW_WORD(low,x);
188         SET_LOW_WORD(z,low);
189         e0      = (ix>>20)-1046;        /* e0 = ilogb(z)-23; */
190         SET_HIGH_WORD(z, ix - ((int32_t)(e0<<20)));
191         for(i=0;i<2;i++) {
192                 tx[i] = (double)((int32_t)(z));
193                 z     = (z-tx[i])*two24;
194         }
195         tx[2] = z;
196         nx = 3;
197         while(tx[nx-1]==zero) nx--;     /* skip zero term */
198         n  =  __kernel_rem_pio2(tx,y,e0,nx,2);
199         if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
200         return n;
201 }