lib/libc/softfloat/bits64/softfloat-macros

   1 /* $NetBSD: softfloat-macros,v 1.1 2002/05/21 23:51:08 bjh21 Exp $ */
   2 /* $FreeBSD$ */
   3
   4 /*
   5 ===============================================================================
   6
   7 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
   8 Arithmetic Package, Release 2a.
   9
  10 Written by John R. Hauser.  This work was made possible in part by the
  11 International Computer Science Institute, located at Suite 600, 1947 Center
  12 Street, Berkeley, California 94704.  Funding was partially provided by the
  13 National Science Foundation under grant MIP-9311980.  The original version
  14 of this code was written as part of a project to build a fixed-point vector
  15 processor in collaboration with the University of California at Berkeley,
  16 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  17 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  18 arithmetic/SoftFloat.html'.
  19
  20 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  21 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  22 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  23 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  24 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  25
  26 Derivative works are acceptable, even for commercial purposes, so long as
  27 (1) they include prominent notice that the work is derivative, and (2) they
  28 include prominent notice akin to these four paragraphs for those parts of
  29 this code that are retained.
  30
  31 ===============================================================================
  32 */
  33
  34 /*
  35 -------------------------------------------------------------------------------
  36 Shifts `a' right by the number of bits given in `count'.  If any nonzero
  37 bits are shifted off, they are ``jammed'' into the least significant bit of
  38 the result by setting the least significant bit to 1.  The value of `count'
  39 can be arbitrarily large; in particular, if `count' is greater than 32, the
  40 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
  41 The result is stored in the location pointed to by `zPtr'.
  42 -------------------------------------------------------------------------------
  43 */
  44 INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
  45 {
  46     bits32 z;
  47
  48     if ( count == 0 ) {
  49         z = a;
  50     }
  51     else if ( count < 32 ) {
  52         z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
  53     }
  54     else {
  55         z = ( a != 0 );
  56     }
  57     *zPtr = z;
  58
  59 }
  60
  61 /*
  62 -------------------------------------------------------------------------------
  63 Shifts `a' right by the number of bits given in `count'.  If any nonzero
  64 bits are shifted off, they are ``jammed'' into the least significant bit of
  65 the result by setting the least significant bit to 1.  The value of `count'
  66 can be arbitrarily large; in particular, if `count' is greater than 64, the
  67 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
  68 The result is stored in the location pointed to by `zPtr'.
  69 -------------------------------------------------------------------------------
  70 */
  71 INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
  72 {
  73     bits64 z;
  74
  75     if ( count == 0 ) {
  76         z = a;
  77     }
  78     else if ( count < 64 ) {
  79         z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
  80     }
  81     else {
  82         z = ( a != 0 );
  83     }
  84     *zPtr = z;
  85
  86 }
  87
  88 /*
  89 -------------------------------------------------------------------------------
  90 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
  91 _plus_ the number of bits given in `count'.  The shifted result is at most
  92 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
  93 bits shifted off form a second 64-bit result as follows:  The _last_ bit
  94 shifted off is the most-significant bit of the extra result, and the other
  95 63 bits of the extra result are all zero if and only if _all_but_the_last_
  96 bits shifted off were all zero.  This extra result is stored in the location
  97 pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
  98     (This routine makes more sense if `a0' and `a1' are considered to form a
  99 fixed-point value with binary point between `a0' and `a1'.  This fixed-point
 100 value is shifted right by the number of bits given in `count', and the
 101 integer part of the result is returned at the location pointed to by
 102 `z0Ptr'.  The fractional part of the result may be slightly corrupted as
 103 described above, and is returned at the location pointed to by `z1Ptr'.)
 104 -------------------------------------------------------------------------------
 105 */
 106 INLINE void
 107  shift64ExtraRightJamming(
 108      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
 109 {
 110     bits64 z0, z1;
 111     int8 negCount = ( - count ) & 63;
 112
 113     if ( count == 0 ) {
 114         z1 = a1;
 115         z0 = a0;
 116     }
 117     else if ( count < 64 ) {
 118         z1 = ( a0<<negCount ) | ( a1 != 0 );
 119         z0 = a0>>count;
 120     }
 121     else {
 122         if ( count == 64 ) {
 123             z1 = a0 | ( a1 != 0 );
 124         }
 125         else {
 126             z1 = ( ( a0 | a1 ) != 0 );
 127         }
 128         z0 = 0;
 129     }
 130     *z1Ptr = z1;
 131     *z0Ptr = z0;
 132
 133 }
 134
 135 /*
 136 -------------------------------------------------------------------------------
 137 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
 138 number of bits given in `count'.  Any bits shifted off are lost.  The value
 139 of `count' can be arbitrarily large; in particular, if `count' is greater
 140 than 128, the result will be 0.  The result is broken into two 64-bit pieces
 141 which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 142 -------------------------------------------------------------------------------
 143 */
 144 INLINE void
 145  shift128Right(
 146      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
 147 {
 148     bits64 z0, z1;
 149     int8 negCount = ( - count ) & 63;
 150
 151     if ( count == 0 ) {
 152         z1 = a1;
 153         z0 = a0;
 154     }
 155     else if ( count < 64 ) {
 156         z1 = ( a0<<negCount ) | ( a1>>count );
 157         z0 = a0>>count;
 158     }
 159     else {
 160         z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
 161         z0 = 0;
 162     }
 163     *z1Ptr = z1;
 164     *z0Ptr = z0;
 165
 166 }
 167
 168 /*
 169 -------------------------------------------------------------------------------
 170 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
 171 number of bits given in `count'.  If any nonzero bits are shifted off, they
 172 are ``jammed'' into the least significant bit of the result by setting the
 173 least significant bit to 1.  The value of `count' can be arbitrarily large;
 174 in particular, if `count' is greater than 128, the result will be either
 175 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
 176 nonzero.  The result is broken into two 64-bit pieces which are stored at
 177 the locations pointed to by `z0Ptr' and `z1Ptr'.
 178 -------------------------------------------------------------------------------
 179 */
 180 INLINE void
 181  shift128RightJamming(
 182      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
 183 {
 184     bits64 z0, z1;
 185     int8 negCount = ( - count ) & 63;
 186
 187     if ( count == 0 ) {
 188         z1 = a1;
 189         z0 = a0;
 190     }
 191     else if ( count < 64 ) {
 192         z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
 193         z0 = a0>>count;
 194     }
 195     else {
 196         if ( count == 64 ) {
 197             z1 = a0 | ( a1 != 0 );
 198         }
 199         else if ( count < 128 ) {
 200             z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
 201         }
 202         else {
 203             z1 = ( ( a0 | a1 ) != 0 );
 204         }
 205         z0 = 0;
 206     }
 207     *z1Ptr = z1;
 208     *z0Ptr = z0;
 209
 210 }
 211
 212 /*
 213 -------------------------------------------------------------------------------
 214 Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
 215 by 64 _plus_ the number of bits given in `count'.  The shifted result is
 216 at most 128 nonzero bits; these are broken into two 64-bit pieces which are
 217 stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
 218 off form a third 64-bit result as follows:  The _last_ bit shifted off is
 219 the most-significant bit of the extra result, and the other 63 bits of the
 220 extra result are all zero if and only if _all_but_the_last_ bits shifted off
 221 were all zero.  This extra result is stored in the location pointed to by
 222 `z2Ptr'.  The value of `count' can be arbitrarily large.
 223     (This routine makes more sense if `a0', `a1', and `a2' are considered
 224 to form a fixed-point value with binary point between `a1' and `a2'.  This
 225 fixed-point value is shifted right by the number of bits given in `count',
 226 and the integer part of the result is returned at the locations pointed to
 227 by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
 228 corrupted as described above, and is returned at the location pointed to by
 229 `z2Ptr'.)
 230 -------------------------------------------------------------------------------
 231 */
 232 INLINE void
 233  shift128ExtraRightJamming(
 234      bits64 a0,
 235      bits64 a1,
 236      bits64 a2,
 237      int16 count,
 238      bits64 *z0Ptr,
 239      bits64 *z1Ptr,
 240      bits64 *z2Ptr
 241  )
 242 {
 243     bits64 z0, z1, z2;
 244     int8 negCount = ( - count ) & 63;
 245
 246     if ( count == 0 ) {
 247         z2 = a2;
 248         z1 = a1;
 249         z0 = a0;
 250     }
 251     else {
 252         if ( count < 64 ) {
 253             z2 = a1<<negCount;
 254             z1 = ( a0<<negCount ) | ( a1>>count );
 255             z0 = a0>>count;
 256         }
 257         else {
 258             if ( count == 64 ) {
 259                 z2 = a1;
 260                 z1 = a0;
 261             }
 262             else {
 263                 a2 |= a1;
 264                 if ( count < 128 ) {
 265                     z2 = a0<<negCount;
 266                     z1 = a0>>( count & 63 );
 267                 }
 268                 else {
 269                     z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
 270                     z1 = 0;
 271                 }
 272             }
 273             z0 = 0;
 274         }
 275         z2 |= ( a2 != 0 );
 276     }
 277     *z2Ptr = z2;
 278     *z1Ptr = z1;
 279     *z0Ptr = z0;
 280
 281 }
 282
 283 /*
 284 -------------------------------------------------------------------------------
 285 Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
 286 number of bits given in `count'.  Any bits shifted off are lost.  The value
 287 of `count' must be less than 64.  The result is broken into two 64-bit
 288 pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 289 -------------------------------------------------------------------------------
 290 */
 291 INLINE void
 292  shortShift128Left(
 293      bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
 294 {
 295
 296     *z1Ptr = a1<<count;
 297     *z0Ptr =
 298         ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
 299
 300 }
 301
 302 /*
 303 -------------------------------------------------------------------------------
 304 Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
 305 by the number of bits given in `count'.  Any bits shifted off are lost.
 306 The value of `count' must be less than 64.  The result is broken into three
 307 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
 308 `z1Ptr', and `z2Ptr'.
 309 -------------------------------------------------------------------------------
 310 */
 311 INLINE void
 312  shortShift192Left(
 313      bits64 a0,
 314      bits64 a1,
 315      bits64 a2,
 316      int16 count,
 317      bits64 *z0Ptr,
 318      bits64 *z1Ptr,
 319      bits64 *z2Ptr
 320  )
 321 {
 322     bits64 z0, z1, z2;
 323     int8 negCount;
 324
 325     z2 = a2<<count;
 326     z1 = a1<<count;
 327     z0 = a0<<count;
 328     if ( 0 < count ) {
 329         negCount = ( ( - count ) & 63 );
 330         z1 |= a2>>negCount;
 331         z0 |= a1>>negCount;
 332     }
 333     *z2Ptr = z2;
 334     *z1Ptr = z1;
 335     *z0Ptr = z0;
 336
 337 }
 338
 339 /*
 340 -------------------------------------------------------------------------------
 341 Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
 342 value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
 343 any carry out is lost.  The result is broken into two 64-bit pieces which
 344 are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 345 -------------------------------------------------------------------------------
 346 */
 347 INLINE void
 348  add128(
 349      bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
 350 {
 351     bits64 z1;
 352
 353     z1 = a1 + b1;
 354     *z1Ptr = z1;
 355     *z0Ptr = a0 + b0 + ( z1 < a1 );
 356
 357 }
 358
 359 /*
 360 -------------------------------------------------------------------------------
 361 Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
 362 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
 363 modulo 2^192, so any carry out is lost.  The result is broken into three
 364 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
 365 `z1Ptr', and `z2Ptr'.
 366 -------------------------------------------------------------------------------
 367 */
 368 INLINE void
 369  add192(
 370      bits64 a0,
 371      bits64 a1,
 372      bits64 a2,
 373      bits64 b0,
 374      bits64 b1,
 375      bits64 b2,
 376      bits64 *z0Ptr,
 377      bits64 *z1Ptr,
 378      bits64 *z2Ptr
 379  )
 380 {
 381     bits64 z0, z1, z2;
 382     int8 carry0, carry1;
 383
 384     z2 = a2 + b2;
 385     carry1 = ( z2 < a2 );
 386     z1 = a1 + b1;
 387     carry0 = ( z1 < a1 );
 388     z0 = a0 + b0;
 389     z1 += carry1;
 390     z0 += ( z1 < carry1 );
 391     z0 += carry0;
 392     *z2Ptr = z2;
 393     *z1Ptr = z1;
 394     *z0Ptr = z0;
 395
 396 }
 397
 398 /*
 399 -------------------------------------------------------------------------------
 400 Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
 401 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
 402 2^128, so any borrow out (carry out) is lost.  The result is broken into two
 403 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
 404 `z1Ptr'.
 405 -------------------------------------------------------------------------------
 406 */
 407 INLINE void
 408  sub128(
 409      bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
 410 {
 411
 412     *z1Ptr = a1 - b1;
 413     *z0Ptr = a0 - b0 - ( a1 < b1 );
 414
 415 }
 416
 417 /*
 418 -------------------------------------------------------------------------------
 419 Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
 420 from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
 421 Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
 422 result is broken into three 64-bit pieces which are stored at the locations
 423 pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
 424 -------------------------------------------------------------------------------
 425 */
 426 INLINE void
 427  sub192(
 428      bits64 a0,
 429      bits64 a1,
 430      bits64 a2,
 431      bits64 b0,
 432      bits64 b1,
 433      bits64 b2,
 434      bits64 *z0Ptr,
 435      bits64 *z1Ptr,
 436      bits64 *z2Ptr
 437  )
 438 {
 439     bits64 z0, z1, z2;
 440     int8 borrow0, borrow1;
 441
 442     z2 = a2 - b2;
 443     borrow1 = ( a2 < b2 );
 444     z1 = a1 - b1;
 445     borrow0 = ( a1 < b1 );
 446     z0 = a0 - b0;
 447     z0 -= ( z1 < borrow1 );
 448     z1 -= borrow1;
 449     z0 -= borrow0;
 450     *z2Ptr = z2;
 451     *z1Ptr = z1;
 452     *z0Ptr = z0;
 453
 454 }
 455
 456 /*
 457 -------------------------------------------------------------------------------
 458 Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
 459 into two 64-bit pieces which are stored at the locations pointed to by
 460 `z0Ptr' and `z1Ptr'.
 461 -------------------------------------------------------------------------------
 462 */
 463 INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
 464 {
 465     bits32 aHigh, aLow, bHigh, bLow;
 466     bits64 z0, zMiddleA, zMiddleB, z1;
 467
 468     aLow = a;
 469     aHigh = a>>32;
 470     bLow = b;
 471     bHigh = b>>32;
 472     z1 = ( (bits64) aLow ) * bLow;
 473     zMiddleA = ( (bits64) aLow ) * bHigh;
 474     zMiddleB = ( (bits64) aHigh ) * bLow;
 475     z0 = ( (bits64) aHigh ) * bHigh;
 476     zMiddleA += zMiddleB;
 477     z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
 478     zMiddleA <<= 32;
 479     z1 += zMiddleA;
 480     z0 += ( z1 < zMiddleA );
 481     *z1Ptr = z1;
 482     *z0Ptr = z0;
 483
 484 }
 485
 486 /*
 487 -------------------------------------------------------------------------------
 488 Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
 489 `b' to obtain a 192-bit product.  The product is broken into three 64-bit
 490 pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
 491 `z2Ptr'.
 492 -------------------------------------------------------------------------------
 493 */
 494 INLINE void
 495  mul128By64To192(
 496      bits64 a0,
 497      bits64 a1,
 498      bits64 b,
 499      bits64 *z0Ptr,
 500      bits64 *z1Ptr,
 501      bits64 *z2Ptr
 502  )
 503 {
 504     bits64 z0, z1, z2, more1;
 505
 506     mul64To128( a1, b, &z1, &z2 );
 507     mul64To128( a0, b, &z0, &more1 );
 508     add128( z0, more1, 0, z1, &z0, &z1 );
 509     *z2Ptr = z2;
 510     *z1Ptr = z1;
 511     *z0Ptr = z0;
 512
 513 }
 514
 515 /*
 516 -------------------------------------------------------------------------------
 517 Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
 518 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
 519 product.  The product is broken into four 64-bit pieces which are stored at
 520 the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
 521 -------------------------------------------------------------------------------
 522 */
 523 INLINE void
 524  mul128To256(
 525      bits64 a0,
 526      bits64 a1,
 527      bits64 b0,
 528      bits64 b1,
 529      bits64 *z0Ptr,
 530      bits64 *z1Ptr,
 531      bits64 *z2Ptr,
 532      bits64 *z3Ptr
 533  )
 534 {
 535     bits64 z0, z1, z2, z3;
 536     bits64 more1, more2;
 537
 538     mul64To128( a1, b1, &z2, &z3 );
 539     mul64To128( a1, b0, &z1, &more2 );
 540     add128( z1, more2, 0, z2, &z1, &z2 );
 541     mul64To128( a0, b0, &z0, &more1 );
 542     add128( z0, more1, 0, z1, &z0, &z1 );
 543     mul64To128( a0, b1, &more1, &more2 );
 544     add128( more1, more2, 0, z2, &more1, &z2 );
 545     add128( z0, z1, 0, more1, &z0, &z1 );
 546     *z3Ptr = z3;
 547     *z2Ptr = z2;
 548     *z1Ptr = z1;
 549     *z0Ptr = z0;
 550
 551 }
 552
 553 /*
 554 -------------------------------------------------------------------------------
 555 Returns an approximation to the 64-bit integer quotient obtained by dividing
 556 `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
 557 divisor `b' must be at least 2^63.  If q is the exact quotient truncated
 558 toward zero, the approximation returned lies between q and q + 2 inclusive.
 559 If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
 560 unsigned integer is returned.
 561 -------------------------------------------------------------------------------
 562 */
 563 static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
 564 {
 565     bits64 b0, b1;
 566     bits64 rem0, rem1, term0, term1;
 567     bits64 z;
 568
 569     if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
 570     b0 = b>>32;
 571     z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
 572     mul64To128( b, z, &term0, &term1 );
 573     sub128( a0, a1, term0, term1, &rem0, &rem1 );
 574     while ( ( (sbits64) rem0 ) < 0 ) {
 575         z -= LIT64( 0x100000000 );
 576         b1 = b<<32;
 577         add128( rem0, rem1, b0, b1, &rem0, &rem1 );
 578     }
 579     rem0 = ( rem0<<32 ) | ( rem1>>32 );
 580     z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
 581     return z;
 582
 583 }
 584
 585 #if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128)
 586 /*
 587 -------------------------------------------------------------------------------
 588 Returns an approximation to the square root of the 32-bit significand given
 589 by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
 590 `aExp' (the least significant bit) is 1, the integer returned approximates
 591 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
 592 is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
 593 case, the approximation returned lies strictly within +/-2 of the exact
 594 value.
 595 -------------------------------------------------------------------------------
 596 */
 597 static bits32 estimateSqrt32( int16 aExp, bits32 a )
 598 {
 599     static const bits16 sqrtOddAdjustments[] = {
 600         0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
 601         0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
 602     };
 603     static const bits16 sqrtEvenAdjustments[] = {
 604         0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
 605         0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
 606     };
 607     int8 idx;
 608     bits32 z;
 609
 610     idx = ( a>>27 ) & 15;
 611     if ( aExp & 1 ) {
 612         z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ idx ];
 613         z = ( ( a / z )<<14 ) + ( z<<15 );
 614         a >>= 1;
 615     }
 616     else {
 617         z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ idx ];
 618         z = a / z + z;
 619         z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
 620         if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
 621     }
 622     return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
 623
 624 }
 625 #endif
 626
 627 /*
 628 -------------------------------------------------------------------------------
 629 Returns the number of leading 0 bits before the most-significant 1 bit of
 630 `a'.  If `a' is zero, 32 is returned.
 631 -------------------------------------------------------------------------------
 632 */
 633 static int8 countLeadingZeros32( bits32 a )
 634 {
 635     static const int8 countLeadingZerosHigh[] = {
 636         8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
 637         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 638         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 639         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 640         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 641         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 642         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 643         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 644         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 645         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 646         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 647         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 648         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 649         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 650         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 651         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 652     };
 653     int8 shiftCount;
 654
 655     shiftCount = 0;
 656     if ( a < 0x10000 ) {
 657         shiftCount += 16;
 658         a <<= 16;
 659     }
 660     if ( a < 0x1000000 ) {
 661         shiftCount += 8;
 662         a <<= 8;
 663     }
 664     shiftCount += countLeadingZerosHigh[ a>>24 ];
 665     return shiftCount;
 666
 667 }
 668
 669 /*
 670 -------------------------------------------------------------------------------
 671 Returns the number of leading 0 bits before the most-significant 1 bit of
 672 `a'.  If `a' is zero, 64 is returned.
 673 -------------------------------------------------------------------------------
 674 */
 675 static int8 countLeadingZeros64( bits64 a )
 676 {
 677     int8 shiftCount;
 678
 679     shiftCount = 0;
 680     if ( a < ( (bits64) 1 )<<32 ) {
 681         shiftCount += 32;
 682     }
 683     else {
 684         a >>= 32;
 685     }
 686     shiftCount += countLeadingZeros32( a );
 687     return shiftCount;
 688
 689 }
 690
 691 /*
 692 -------------------------------------------------------------------------------
 693 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
 694 is equal to the 128-bit value formed by concatenating `b0' and `b1'.
 695 Otherwise, returns 0.
 696 -------------------------------------------------------------------------------
 697 */
 698 INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
 699 {
 700
 701     return ( a0 == b0 ) && ( a1 == b1 );
 702
 703 }
 704
 705 /*
 706 -------------------------------------------------------------------------------
 707 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
 708 than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
 709 Otherwise, returns 0.
 710 -------------------------------------------------------------------------------
 711 */
 712 INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
 713 {
 714
 715     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
 716
 717 }
 718
 719 /*
 720 -------------------------------------------------------------------------------
 721 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
 722 than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
 723 returns 0.
 724 -------------------------------------------------------------------------------
 725 */
 726 INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
 727 {
 728
 729     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
 730
 731 }
 732
 733 /*
 734 -------------------------------------------------------------------------------
 735 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
 736 not equal to the 128-bit value formed by concatenating `b0' and `b1'.
 737 Otherwise, returns 0.
 738 -------------------------------------------------------------------------------
 739 */
 740 INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
 741 {
 742
 743     return ( a0 != b0 ) || ( a1 != b1 );
 744
 745 }
 746