lib/msun/src/e_sqrt.c

   1
   2 /* @(#)e_sqrt.c 1.3 95/01/18 */
   3 /*
   4  * ====================================================
   5  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
   6  *
   7  * Developed at SunSoft, a Sun Microsystems, Inc. business.
   8  * Permission to use, copy, modify, and distribute this
   9  * software is freely granted, provided that this notice
  10  * is preserved.
  11  * ====================================================
  12  */
  13
  14 #include <sys/cdefs.h>
  15 #include <float.h>
  16
  17 #include "math.h"
  18 #include "math_private.h"
  19
  20 #ifdef USE_BUILTIN_SQRT
  21 double
  22 sqrt(double x)
  23 {
  24         return (__builtin_sqrt(x));
  25 }
  26 #else
  27 /* sqrt(x)
  28  * Return correctly rounded sqrt.
  29  *           ------------------------------------------
  30  *           |  Use the hardware sqrt if you have one |
  31  *           ------------------------------------------
  32  * Method:
  33  *   Bit by bit method using integer arithmetic. (Slow, but portable)
  34  *   1. Normalization
  35  *      Scale x to y in [1,4) with even powers of 2:
  36  *      find an integer k such that  1 <= (y=x*2^(2k)) < 4, then
  37  *              sqrt(x) = 2^k * sqrt(y)
  38  *   2. Bit by bit computation
  39  *      Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
  40  *           i                                                   0
  41  *                                     i+1         2
  42  *          s  = 2*q , and      y  =  2   * ( y - q  ).         (1)
  43  *           i      i            i                 i
  44  *
  45  *      To compute q    from q , one checks whether
  46  *                  i+1       i
  47  *
  48  *                            -(i+1) 2
  49  *                      (q + 2      ) <= y.                     (2)
  50  *                        i
  51  *                                                            -(i+1)
  52  *      If (2) is false, then q   = q ; otherwise q   = q  + 2      .
  53  *                             i+1   i             i+1   i
  54  *
  55  *      With some algebric manipulation, it is not difficult to see
  56  *      that (2) is equivalent to
  57  *                             -(i+1)
  58  *                      s  +  2       <= y                      (3)
  59  *                       i                i
  60  *
  61  *      The advantage of (3) is that s  and y  can be computed by
  62  *                                    i      i
  63  *      the following recurrence formula:
  64  *          if (3) is false
  65  *
  66  *          s     =  s  ,       y    = y   ;                    (4)
  67  *           i+1      i          i+1    i
  68  *
  69  *          otherwise,
  70  *                         -i                     -(i+1)
  71  *          s     =  s  + 2  ,  y    = y  -  s  - 2             (5)
  72  *           i+1      i          i+1    i     i
  73  *
  74  *      One may easily use induction to prove (4) and (5).
  75  *      Note. Since the left hand side of (3) contain only i+2 bits,
  76  *            it does not necessary to do a full (53-bit) comparison
  77  *            in (3).
  78  *   3. Final rounding
  79  *      After generating the 53 bits result, we compute one more bit.
  80  *      Together with the remainder, we can decide whether the
  81  *      result is exact, bigger than 1/2ulp, or less than 1/2ulp
  82  *      (it will never equal to 1/2ulp).
  83  *      The rounding mode can be detected by checking whether
  84  *      huge + tiny is equal to huge, and whether huge - tiny is
  85  *      equal to huge for some floating point number "huge" and "tiny".
  86  *
  87  * Special cases:
  88  *      sqrt(+-0) = +-0         ... exact
  89  *      sqrt(inf) = inf
  90  *      sqrt(-ve) = NaN         ... with invalid signal
  91  *      sqrt(NaN) = NaN         ... with invalid signal for signaling NaN
  92  *
  93  * Other methods : see the appended file at the end of the program below.
  94  *---------------
  95  */
  96
  97 static  const double    one     = 1.0, tiny=1.0e-300;
  98
  99 double
 100 sqrt(double x)
 101 {
 102         double z;
 103         int32_t sign = (int)0x80000000;
 104         int32_t ix0,s0,q,m,t,i;
 105         u_int32_t r,t1,s1,ix1,q1;
 106
 107         EXTRACT_WORDS(ix0,ix1,x);
 108
 109     /* take care of Inf and NaN */
 110         if((ix0&0x7ff00000)==0x7ff00000) {
 111             return x*x+x;               /* sqrt(NaN)=NaN, sqrt(+inf)=+inf
 112                                            sqrt(-inf)=sNaN */
 113         }
 114     /* take care of zero */
 115         if(ix0<=0) {
 116             if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */
 117             else if(ix0<0)
 118                 return (x-x)/(x-x);             /* sqrt(-ve) = sNaN */
 119         }
 120     /* normalize x */
 121         m = (ix0>>20);
 122         if(m==0) {                              /* subnormal x */
 123             while(ix0==0) {
 124                 m -= 21;
 125                 ix0 |= (ix1>>11); ix1 <<= 21;
 126             }
 127             for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1;
 128             m -= i-1;
 129             ix0 |= (ix1>>(32-i));
 130             ix1 <<= i;
 131         }
 132         m -= 1023;      /* unbias exponent */
 133         ix0 = (ix0&0x000fffff)|0x00100000;
 134         if(m&1){        /* odd m, double x to make it even */
 135             ix0 += ix0 + ((ix1&sign)>>31);
 136             ix1 += ix1;
 137         }
 138         m >>= 1;        /* m = [m/2] */
 139
 140     /* generate sqrt(x) bit by bit */
 141         ix0 += ix0 + ((ix1&sign)>>31);
 142         ix1 += ix1;
 143         q = q1 = s0 = s1 = 0;   /* [q,q1] = sqrt(x) */
 144         r = 0x00200000;         /* r = moving bit from right to left */
 145
 146         while(r!=0) {
 147             t = s0+r;
 148             if(t<=ix0) {
 149                 s0   = t+r;
 150                 ix0 -= t;
 151                 q   += r;
 152             }
 153             ix0 += ix0 + ((ix1&sign)>>31);
 154             ix1 += ix1;
 155             r>>=1;
 156         }
 157
 158         r = sign;
 159         while(r!=0) {
 160             t1 = s1+r;
 161             t  = s0;
 162             if((t<ix0)||((t==ix0)&&(t1<=ix1))) {
 163                 s1  = t1+r;
 164                 if(((t1&sign)==sign)&&(s1&sign)==0) s0 += 1;
 165                 ix0 -= t;
 166                 if (ix1 < t1) ix0 -= 1;
 167                 ix1 -= t1;
 168                 q1  += r;
 169             }
 170             ix0 += ix0 + ((ix1&sign)>>31);
 171             ix1 += ix1;
 172             r>>=1;
 173         }
 174
 175     /* use floating add to find out rounding direction */
 176         if((ix0|ix1)!=0) {
 177             z = one-tiny; /* trigger inexact flag */
 178             if (z>=one) {
 179                 z = one+tiny;
 180                 if (q1==(u_int32_t)0xffffffff) { q1=0; q += 1;}
 181                 else if (z>one) {
 182                     if (q1==(u_int32_t)0xfffffffe) q+=1;
 183                     q1+=2;
 184                 } else
 185                     q1 += (q1&1);
 186             }
 187         }
 188         ix0 = (q>>1)+0x3fe00000;
 189         ix1 =  q1>>1;
 190         if ((q&1)==1) ix1 |= sign;
 191         ix0 += (m <<20);
 192         INSERT_WORDS(z,ix0,ix1);
 193         return z;
 194 }
 195 #endif
 196
 197 #if (LDBL_MANT_DIG == 53)
 198 __weak_reference(sqrt, sqrtl);
 199 #endif
 200
 201 /*
 202 Other methods  (use floating-point arithmetic)
 203 -------------
 204 (This is a copy of a drafted paper by Prof W. Kahan
 205 and K.C. Ng, written in May, 1986)
 206
 207         Two algorithms are given here to implement sqrt(x)
 208         (IEEE double precision arithmetic) in software.
 209         Both supply sqrt(x) correctly rounded. The first algorithm (in
 210         Section A) uses newton iterations and involves four divisions.
 211         The second one uses reciproot iterations to avoid division, but
 212         requires more multiplications. Both algorithms need the ability
 213         to chop results of arithmetic operations instead of round them,
 214         and the INEXACT flag to indicate when an arithmetic operation
 215         is executed exactly with no roundoff error, all part of the
 216         standard (IEEE 754-1985). The ability to perform shift, add,
 217         subtract and logical AND operations upon 32-bit words is needed
 218         too, though not part of the standard.
 219
 220 A.  sqrt(x) by Newton Iteration
 221
 222    (1)  Initial approximation
 223
 224         Let x0 and x1 be the leading and the trailing 32-bit words of
 225         a floating point number x (in IEEE double format) respectively
 226
 227             1    11                  52                           ...widths
 228            ------------------------------------------------------
 229         x: |s|    e     |             f                         |
 230            ------------------------------------------------------
 231               msb    lsb  msb                                 lsb ...order
 232
 233
 234              ------------------------        ------------------------
 235         x0:  |s|   e    |    f1     |    x1: |          f2           |
 236              ------------------------        ------------------------
 237
 238         By performing shifts and subtracts on x0 and x1 (both regarded
 239         as integers), we obtain an 8-bit approximation of sqrt(x) as
 240         follows.
 241
 242                 k  := (x0>>1) + 0x1ff80000;
 243                 y0 := k - T1[31&(k>>15)].       ... y ~ sqrt(x) to 8 bits
 244         Here k is a 32-bit integer and T1[] is an integer array containing
 245         correction terms. Now magically the floating value of y (y's
 246         leading 32-bit word is y0, the value of its trailing word is 0)
 247         approximates sqrt(x) to almost 8-bit.
 248
 249         Value of T1:
 250         static int T1[32]= {
 251         0,      1024,   3062,   5746,   9193,   13348,  18162,  23592,
 252         29598,  36145,  43202,  50740,  58733,  67158,  75992,  85215,
 253         83599,  71378,  60428,  50647,  41945,  34246,  27478,  21581,
 254         16499,  12183,  8588,   5674,   3403,   1742,   661,    130,};
 255
 256     (2) Iterative refinement
 257
 258         Apply Heron's rule three times to y, we have y approximates
 259         sqrt(x) to within 1 ulp (Unit in the Last Place):
 260
 261                 y := (y+x/y)/2          ... almost 17 sig. bits
 262                 y := (y+x/y)/2          ... almost 35 sig. bits
 263                 y := y-(y-x/y)/2        ... within 1 ulp
 264
 265
 266         Remark 1.
 267             Another way to improve y to within 1 ulp is:
 268
 269                 y := (y+x/y)            ... almost 17 sig. bits to 2*sqrt(x)
 270                 y := y - 0x00100006     ... almost 18 sig. bits to sqrt(x)
 271
 272                                 2
 273                             (x-y )*y
 274                 y := y + 2* ----------  ...within 1 ulp
 275                                2
 276                              3y  + x
 277
 278
 279         This formula has one division fewer than the one above; however,
 280         it requires more multiplications and additions. Also x must be
 281         scaled in advance to avoid spurious overflow in evaluating the
 282         expression 3y*y+x. Hence it is not recommended uless division
 283         is slow. If division is very slow, then one should use the
 284         reciproot algorithm given in section B.
 285
 286     (3) Final adjustment
 287
 288         By twiddling y's last bit it is possible to force y to be
 289         correctly rounded according to the prevailing rounding mode
 290         as follows. Let r and i be copies of the rounding mode and
 291         inexact flag before entering the square root program. Also we
 292         use the expression y+-ulp for the next representable floating
 293         numbers (up and down) of y. Note that y+-ulp = either fixed
 294         point y+-1, or multiply y by nextafter(1,+-inf) in chopped
 295         mode.
 296
 297                 I := FALSE;     ... reset INEXACT flag I
 298                 R := RZ;        ... set rounding mode to round-toward-zero
 299                 z := x/y;       ... chopped quotient, possibly inexact
 300                 If(not I) then {        ... if the quotient is exact
 301                     if(z=y) {
 302                         I := i;  ... restore inexact flag
 303                         R := r;  ... restore rounded mode
 304                         return sqrt(x):=y.
 305                     } else {
 306                         z := z - ulp;   ... special rounding
 307                     }
 308                 }
 309                 i := TRUE;              ... sqrt(x) is inexact
 310                 If (r=RN) then z=z+ulp  ... rounded-to-nearest
 311                 If (r=RP) then {        ... round-toward-+inf
 312                     y = y+ulp; z=z+ulp;
 313                 }
 314                 y := y+z;               ... chopped sum
 315                 y0:=y0-0x00100000;      ... y := y/2 is correctly rounded.
 316                 I := i;                 ... restore inexact flag
 317                 R := r;                 ... restore rounded mode
 318                 return sqrt(x):=y.
 319
 320     (4) Special cases
 321
 322         Square root of +inf, +-0, or NaN is itself;
 323         Square root of a negative number is NaN with invalid signal.
 324
 325
 326 B.  sqrt(x) by Reciproot Iteration
 327
 328    (1)  Initial approximation
 329
 330         Let x0 and x1 be the leading and the trailing 32-bit words of
 331         a floating point number x (in IEEE double format) respectively
 332         (see section A). By performing shifs and subtracts on x0 and y0,
 333         we obtain a 7.8-bit approximation of 1/sqrt(x) as follows.
 334
 335             k := 0x5fe80000 - (x0>>1);
 336             y0:= k - T2[63&(k>>14)].    ... y ~ 1/sqrt(x) to 7.8 bits
 337
 338         Here k is a 32-bit integer and T2[] is an integer array
 339         containing correction terms. Now magically the floating
 340         value of y (y's leading 32-bit word is y0, the value of
 341         its trailing word y1 is set to zero) approximates 1/sqrt(x)
 342         to almost 7.8-bit.
 343
 344         Value of T2:
 345         static int T2[64]= {
 346         0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866,
 347         0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
 348         0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
 349         0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
 350         0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
 351         0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
 352         0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
 353         0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd,};
 354
 355     (2) Iterative refinement
 356
 357         Apply Reciproot iteration three times to y and multiply the
 358         result by x to get an approximation z that matches sqrt(x)
 359         to about 1 ulp. To be exact, we will have
 360                 -1ulp < sqrt(x)-z<1.0625ulp.
 361
 362         ... set rounding mode to Round-to-nearest
 363            y := y*(1.5-0.5*x*y*y)       ... almost 15 sig. bits to 1/sqrt(x)
 364            y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x)
 365         ... special arrangement for better accuracy
 366            z := x*y                     ... 29 bits to sqrt(x), with z*y<1
 367            z := z + 0.5*z*(1-z*y)       ... about 1 ulp to sqrt(x)
 368
 369         Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that
 370         (a) the term z*y in the final iteration is always less than 1;
 371         (b) the error in the final result is biased upward so that
 372                 -1 ulp < sqrt(x) - z < 1.0625 ulp
 373             instead of |sqrt(x)-z|<1.03125ulp.
 374
 375     (3) Final adjustment
 376
 377         By twiddling y's last bit it is possible to force y to be
 378         correctly rounded according to the prevailing rounding mode
 379         as follows. Let r and i be copies of the rounding mode and
 380         inexact flag before entering the square root program. Also we
 381         use the expression y+-ulp for the next representable floating
 382         numbers (up and down) of y. Note that y+-ulp = either fixed
 383         point y+-1, or multiply y by nextafter(1,+-inf) in chopped
 384         mode.
 385
 386         R := RZ;                ... set rounding mode to round-toward-zero
 387         switch(r) {
 388             case RN:            ... round-to-nearest
 389                if(x<= z*(z-ulp)...chopped) z = z - ulp; else
 390                if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp;
 391                break;
 392             case RZ:case RM:    ... round-to-zero or round-to--inf
 393                R:=RP;           ... reset rounding mod to round-to-+inf
 394                if(x<z*z ... rounded up) z = z - ulp; else
 395                if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp;
 396                break;
 397             case RP:            ... round-to-+inf
 398                if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else
 399                if(x>z*z ...chopped) z = z+ulp;
 400                break;
 401         }
 402
 403         Remark 3. The above comparisons can be done in fixed point. For
 404         example, to compare x and w=z*z chopped, it suffices to compare
 405         x1 and w1 (the trailing parts of x and w), regarding them as
 406         two's complement integers.
 407
 408         ...Is z an exact square root?
 409         To determine whether z is an exact square root of x, let z1 be the
 410         trailing part of z, and also let x0 and x1 be the leading and
 411         trailing parts of x.
 412
 413         If ((z1&0x03ffffff)!=0) ... not exact if trailing 26 bits of z!=0
 414             I := 1;             ... Raise Inexact flag: z is not exact
 415         else {
 416             j := 1 - [(x0>>20)&1]       ... j = logb(x) mod 2
 417             k := z1 >> 26;              ... get z's 25-th and 26-th
 418                                             fraction bits
 419             I := i or (k&j) or ((k&(j+j+1))!=(x1&3));
 420         }
 421         R:= r           ... restore rounded mode
 422         return sqrt(x):=z.
 423
 424         If multiplication is cheaper then the foregoing red tape, the
 425         Inexact flag can be evaluated by
 426
 427             I := i;
 428             I := (z*z!=x) or I.
 429
 430         Note that z*z can overwrite I; this value must be sensed if it is
 431         True.
 432
 433         Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be
 434         zero.
 435
 436                     --------------------
 437                 z1: |        f2        |
 438                     --------------------
 439                 bit 31             bit 0
 440
 441         Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd
 442         or even of logb(x) have the following relations:
 443
 444         -------------------------------------------------
 445         bit 27,26 of z1         bit 1,0 of x1   logb(x)
 446         -------------------------------------------------
 447         00                      00              odd and even
 448         01                      01              even
 449         10                      10              odd
 450         10                      00              even
 451         11                      01              even
 452         -------------------------------------------------
 453
 454     (4) Special cases (see (4) of Section A).
 455
 456  */
 457