lib/libc/softfloat/bits32/softfloat.c

   1 /* $NetBSD: softfloat.c,v 1.1 2002/05/21 23:51:07 bjh21 Exp $ */
   2
   3 /*
   4  * This version hacked for use with gcc -msoft-float by bjh21.
   5  * (Mostly a case of #ifdefing out things GCC doesn't need or provides
   6  *  itself).
   7  */
   8
   9 /*
  10  * Things you may want to define:
  11  *
  12  * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
  13  *   -msoft-float) to work.  Include "softfloat-for-gcc.h" to get them
  14  *   properly renamed.
  15  */
  16
  17 /*
  18  * This differs from the standard bits32/softfloat.c in that float64
  19  * is defined to be a 64-bit integer rather than a structure.  The
  20  * structure is float64s, with translation between the two going via
  21  * float64u.
  22  */
  23
  24 /*
  25 ===============================================================================
  26
  27 This C source file is part of the SoftFloat IEC/IEEE Floating-Point
  28 Arithmetic Package, Release 2a.
  29
  30 Written by John R. Hauser.  This work was made possible in part by the
  31 International Computer Science Institute, located at Suite 600, 1947 Center
  32 Street, Berkeley, California 94704.  Funding was partially provided by the
  33 National Science Foundation under grant MIP-9311980.  The original version
  34 of this code was written as part of a project to build a fixed-point vector
  35 processor in collaboration with the University of California at Berkeley,
  36 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  37 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  38 arithmetic/SoftFloat.html'.
  39
  40 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  41 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  42 TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  43 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  44 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  45
  46 Derivative works are acceptable, even for commercial purposes, so long as
  47 (1) they include prominent notice that the work is derivative, and (2) they
  48 include prominent notice akin to these four paragraphs for those parts of
  49 this code that are retained.
  50
  51 ===============================================================================
  52 */
  53
  54 #include <sys/cdefs.h>
  55 __FBSDID("$FreeBSD$");
  56
  57 #ifdef SOFTFLOAT_FOR_GCC
  58 #include "softfloat-for-gcc.h"
  59 #endif
  60
  61 #include "milieu.h"
  62 #include "softfloat.h"
  63
  64 /*
  65  * Conversions between floats as stored in memory and floats as
  66  * SoftFloat uses them
  67  */
  68 #ifndef FLOAT64_DEMANGLE
  69 #define FLOAT64_DEMANGLE(a)     (a)
  70 #endif
  71 #ifndef FLOAT64_MANGLE
  72 #define FLOAT64_MANGLE(a)       (a)
  73 #endif
  74
  75 /*
  76 -------------------------------------------------------------------------------
  77 Floating-point rounding mode and exception flags.
  78 -------------------------------------------------------------------------------
  79 */
  80 fp_rnd_t float_rounding_mode = float_round_nearest_even;
  81 fp_except float_exception_flags = 0;
  82
  83 /*
  84 -------------------------------------------------------------------------------
  85 Primitive arithmetic functions, including multi-word arithmetic, and
  86 division and square root approximations.  (Can be specialized to target if
  87 desired.)
  88 -------------------------------------------------------------------------------
  89 */
  90 #include "softfloat-macros"
  91
  92 /*
  93 -------------------------------------------------------------------------------
  94 Functions and definitions to determine:  (1) whether tininess for underflow
  95 is detected before or after rounding by default, (2) what (if anything)
  96 happens when exceptions are raised, (3) how signaling NaNs are distinguished
  97 from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs
  98 are propagated from function inputs to output.  These details are target-
  99 specific.
 100 -------------------------------------------------------------------------------
 101 */
 102 #include "softfloat-specialize"
 103
 104 /*
 105 -------------------------------------------------------------------------------
 106 Returns the fraction bits of the single-precision floating-point value `a'.
 107 -------------------------------------------------------------------------------
 108 */
 109 INLINE bits32 extractFloat32Frac( float32 a )
 110 {
 111
 112     return a & 0x007FFFFF;
 113
 114 }
 115
 116 /*
 117 -------------------------------------------------------------------------------
 118 Returns the exponent bits of the single-precision floating-point value `a'.
 119 -------------------------------------------------------------------------------
 120 */
 121 INLINE int16 extractFloat32Exp( float32 a )
 122 {
 123
 124     return ( a>>23 ) & 0xFF;
 125
 126 }
 127
 128 /*
 129 -------------------------------------------------------------------------------
 130 Returns the sign bit of the single-precision floating-point value `a'.
 131 -------------------------------------------------------------------------------
 132 */
 133 INLINE flag extractFloat32Sign( float32 a )
 134 {
 135
 136     return a>>31;
 137
 138 }
 139
 140 /*
 141 -------------------------------------------------------------------------------
 142 Normalizes the subnormal single-precision floating-point value represented
 143 by the denormalized significand `aSig'.  The normalized exponent and
 144 significand are stored at the locations pointed to by `zExpPtr' and
 145 `zSigPtr', respectively.
 146 -------------------------------------------------------------------------------
 147 */
 148 static void
 149  normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
 150 {
 151     int8 shiftCount;
 152
 153     shiftCount = countLeadingZeros32( aSig ) - 8;
 154     *zSigPtr = aSig<<shiftCount;
 155     *zExpPtr = 1 - shiftCount;
 156
 157 }
 158
 159 /*
 160 -------------------------------------------------------------------------------
 161 Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
 162 single-precision floating-point value, returning the result.  After being
 163 shifted into the proper positions, the three fields are simply added
 164 together to form the result.  This means that any integer portion of `zSig'
 165 will be added into the exponent.  Since a properly normalized significand
 166 will have an integer portion equal to 1, the `zExp' input should be 1 less
 167 than the desired result exponent whenever `zSig' is a complete, normalized
 168 significand.
 169 -------------------------------------------------------------------------------
 170 */
 171 INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
 172 {
 173
 174     return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
 175
 176 }
 177
 178 /*
 179 -------------------------------------------------------------------------------
 180 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 181 and significand `zSig', and returns the proper single-precision floating-
 182 point value corresponding to the abstract input.  Ordinarily, the abstract
 183 value is simply rounded and packed into the single-precision format, with
 184 the inexact exception raised if the abstract input cannot be represented
 185 exactly.  However, if the abstract value is too large, the overflow and
 186 inexact exceptions are raised and an infinity or maximal finite value is
 187 returned.  If the abstract value is too small, the input value is rounded to
 188 a subnormal number, and the underflow and inexact exceptions are raised if
 189 the abstract input cannot be represented exactly as a subnormal single-
 190 precision floating-point number.
 191     The input significand `zSig' has its binary point between bits 30
 192 and 29, which is 7 bits to the left of the usual location.  This shifted
 193 significand must be normalized or smaller.  If `zSig' is not normalized,
 194 `zExp' must be 0; in that case, the result returned is a subnormal number,
 195 and it must not require rounding.  In the usual case that `zSig' is
 196 normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
 197 The handling of underflow and overflow follows the IEC/IEEE Standard for
 198 Binary Floating-Point Arithmetic.
 199 -------------------------------------------------------------------------------
 200 */
 201 static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
 202 {
 203     int8 roundingMode;
 204     flag roundNearestEven;
 205     int8 roundIncrement, roundBits;
 206     flag isTiny;
 207
 208     roundingMode = float_rounding_mode;
 209     roundNearestEven = roundingMode == float_round_nearest_even;
 210     roundIncrement = 0x40;
 211     if ( ! roundNearestEven ) {
 212         if ( roundingMode == float_round_to_zero ) {
 213             roundIncrement = 0;
 214         }
 215         else {
 216             roundIncrement = 0x7F;
 217             if ( zSign ) {
 218                 if ( roundingMode == float_round_up ) roundIncrement = 0;
 219             }
 220             else {
 221                 if ( roundingMode == float_round_down ) roundIncrement = 0;
 222             }
 223         }
 224     }
 225     roundBits = zSig & 0x7F;
 226     if ( 0xFD <= (bits16) zExp ) {
 227         if (    ( 0xFD < zExp )
 228              || (    ( zExp == 0xFD )
 229                   && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
 230            ) {
 231             float_raise( float_flag_overflow | float_flag_inexact );
 232             return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
 233         }
 234         if ( zExp < 0 ) {
 235             isTiny =
 236                    ( float_detect_tininess == float_tininess_before_rounding )
 237                 || ( zExp < -1 )
 238                 || ( zSig + roundIncrement < 0x80000000 );
 239             shift32RightJamming( zSig, - zExp, &zSig );
 240             zExp = 0;
 241             roundBits = zSig & 0x7F;
 242             if ( isTiny && roundBits ) float_raise( float_flag_underflow );
 243         }
 244     }
 245     if ( roundBits ) float_exception_flags |= float_flag_inexact;
 246     zSig = ( zSig + roundIncrement )>>7;
 247     zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
 248     if ( zSig == 0 ) zExp = 0;
 249     return packFloat32( zSign, zExp, zSig );
 250
 251 }
 252
 253 /*
 254 -------------------------------------------------------------------------------
 255 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 256 and significand `zSig', and returns the proper single-precision floating-
 257 point value corresponding to the abstract input.  This routine is just like
 258 `roundAndPackFloat32' except that `zSig' does not have to be normalized.
 259 Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
 260 floating-point exponent.
 261 -------------------------------------------------------------------------------
 262 */
 263 static float32
 264  normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
 265 {
 266     int8 shiftCount;
 267
 268     shiftCount = countLeadingZeros32( zSig ) - 1;
 269     return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
 270
 271 }
 272
 273 /*
 274 -------------------------------------------------------------------------------
 275 Returns the least-significant 32 fraction bits of the double-precision
 276 floating-point value `a'.
 277 -------------------------------------------------------------------------------
 278 */
 279 INLINE bits32 extractFloat64Frac1( float64 a )
 280 {
 281
 282     return FLOAT64_DEMANGLE(a) & LIT64( 0x00000000FFFFFFFF );
 283
 284 }
 285
 286 /*
 287 -------------------------------------------------------------------------------
 288 Returns the most-significant 20 fraction bits of the double-precision
 289 floating-point value `a'.
 290 -------------------------------------------------------------------------------
 291 */
 292 INLINE bits32 extractFloat64Frac0( float64 a )
 293 {
 294
 295     return ( FLOAT64_DEMANGLE(a)>>32 ) & 0x000FFFFF;
 296
 297 }
 298
 299 /*
 300 -------------------------------------------------------------------------------
 301 Returns the exponent bits of the double-precision floating-point value `a'.
 302 -------------------------------------------------------------------------------
 303 */
 304 INLINE int16 extractFloat64Exp( float64 a )
 305 {
 306
 307     return ( FLOAT64_DEMANGLE(a)>>52 ) & 0x7FF;
 308
 309 }
 310
 311 /*
 312 -------------------------------------------------------------------------------
 313 Returns the sign bit of the double-precision floating-point value `a'.
 314 -------------------------------------------------------------------------------
 315 */
 316 INLINE flag extractFloat64Sign( float64 a )
 317 {
 318
 319     return FLOAT64_DEMANGLE(a)>>63;
 320
 321 }
 322
 323 /*
 324 -------------------------------------------------------------------------------
 325 Normalizes the subnormal double-precision floating-point value represented
 326 by the denormalized significand formed by the concatenation of `aSig0' and
 327 `aSig1'.  The normalized exponent is stored at the location pointed to by
 328 `zExpPtr'.  The most significant 21 bits of the normalized significand are
 329 stored at the location pointed to by `zSig0Ptr', and the least significant
 330 32 bits of the normalized significand are stored at the location pointed to
 331 by `zSig1Ptr'.
 332 -------------------------------------------------------------------------------
 333 */
 334 static void
 335  normalizeFloat64Subnormal(
 336      bits32 aSig0,
 337      bits32 aSig1,
 338      int16 *zExpPtr,
 339      bits32 *zSig0Ptr,
 340      bits32 *zSig1Ptr
 341  )
 342 {
 343     int8 shiftCount;
 344
 345     if ( aSig0 == 0 ) {
 346         shiftCount = countLeadingZeros32( aSig1 ) - 11;
 347         if ( shiftCount < 0 ) {
 348             *zSig0Ptr = aSig1>>( - shiftCount );
 349             *zSig1Ptr = aSig1<<( shiftCount & 31 );
 350         }
 351         else {
 352             *zSig0Ptr = aSig1<<shiftCount;
 353             *zSig1Ptr = 0;
 354         }
 355         *zExpPtr = - shiftCount - 31;
 356     }
 357     else {
 358         shiftCount = countLeadingZeros32( aSig0 ) - 11;
 359         shortShift64Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
 360         *zExpPtr = 1 - shiftCount;
 361     }
 362
 363 }
 364
 365 /*
 366 -------------------------------------------------------------------------------
 367 Packs the sign `zSign', the exponent `zExp', and the significand formed by
 368 the concatenation of `zSig0' and `zSig1' into a double-precision floating-
 369 point value, returning the result.  After being shifted into the proper
 370 positions, the three fields `zSign', `zExp', and `zSig0' are simply added
 371 together to form the most significant 32 bits of the result.  This means
 372 that any integer portion of `zSig0' will be added into the exponent.  Since
 373 a properly normalized significand will have an integer portion equal to 1,
 374 the `zExp' input should be 1 less than the desired result exponent whenever
 375 `zSig0' and `zSig1' concatenated form a complete, normalized significand.
 376 -------------------------------------------------------------------------------
 377 */
 378 INLINE float64
 379  packFloat64( flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
 380 {
 381
 382     return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
 383                            ( ( (bits64) zExp )<<52 ) +
 384                            ( ( (bits64) zSig0 )<<32 ) + zSig1 );
 385
 386
 387 }
 388
 389 /*
 390 -------------------------------------------------------------------------------
 391 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 392 and extended significand formed by the concatenation of `zSig0', `zSig1',
 393 and `zSig2', and returns the proper double-precision floating-point value
 394 corresponding to the abstract input.  Ordinarily, the abstract value is
 395 simply rounded and packed into the double-precision format, with the inexact
 396 exception raised if the abstract input cannot be represented exactly.
 397 However, if the abstract value is too large, the overflow and inexact
 398 exceptions are raised and an infinity or maximal finite value is returned.
 399 If the abstract value is too small, the input value is rounded to a
 400 subnormal number, and the underflow and inexact exceptions are raised if the
 401 abstract input cannot be represented exactly as a subnormal double-precision
 402 floating-point number.
 403     The input significand must be normalized or smaller.  If the input
 404 significand is not normalized, `zExp' must be 0; in that case, the result
 405 returned is a subnormal number, and it must not require rounding.  In the
 406 usual case that the input significand is normalized, `zExp' must be 1 less
 407 than the ``true'' floating-point exponent.  The handling of underflow and
 408 overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 409 -------------------------------------------------------------------------------
 410 */
 411 static float64
 412  roundAndPackFloat64(
 413      flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1, bits32 zSig2 )
 414 {
 415     int8 roundingMode;
 416     flag roundNearestEven, increment, isTiny;
 417
 418     roundingMode = float_rounding_mode;
 419     roundNearestEven = ( roundingMode == float_round_nearest_even );
 420     increment = ( (sbits32) zSig2 < 0 );
 421     if ( ! roundNearestEven ) {
 422         if ( roundingMode == float_round_to_zero ) {
 423             increment = 0;
 424         }
 425         else {
 426             if ( zSign ) {
 427                 increment = ( roundingMode == float_round_down ) && zSig2;
 428             }
 429             else {
 430                 increment = ( roundingMode == float_round_up ) && zSig2;
 431             }
 432         }
 433     }
 434     if ( 0x7FD <= (bits16) zExp ) {
 435         if (    ( 0x7FD < zExp )
 436              || (    ( zExp == 0x7FD )
 437                   && eq64( 0x001FFFFF, 0xFFFFFFFF, zSig0, zSig1 )
 438                   && increment
 439                 )
 440            ) {
 441             float_raise( float_flag_overflow | float_flag_inexact );
 442             if (    ( roundingMode == float_round_to_zero )
 443                  || ( zSign && ( roundingMode == float_round_up ) )
 444                  || ( ! zSign && ( roundingMode == float_round_down ) )
 445                ) {
 446                 return packFloat64( zSign, 0x7FE, 0x000FFFFF, 0xFFFFFFFF );
 447             }
 448             return packFloat64( zSign, 0x7FF, 0, 0 );
 449         }
 450         if ( zExp < 0 ) {
 451             isTiny =
 452                    ( float_detect_tininess == float_tininess_before_rounding )
 453                 || ( zExp < -1 )
 454                 || ! increment
 455                 || lt64( zSig0, zSig1, 0x001FFFFF, 0xFFFFFFFF );
 456             shift64ExtraRightJamming(
 457                 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
 458             zExp = 0;
 459             if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
 460             if ( roundNearestEven ) {
 461                 increment = ( (sbits32) zSig2 < 0 );
 462             }
 463             else {
 464                 if ( zSign ) {
 465                     increment = ( roundingMode == float_round_down ) && zSig2;
 466                 }
 467                 else {
 468                     increment = ( roundingMode == float_round_up ) && zSig2;
 469                 }
 470             }
 471         }
 472     }
 473     if ( zSig2 ) float_exception_flags |= float_flag_inexact;
 474     if ( increment ) {
 475         add64( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
 476         zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
 477     }
 478     else {
 479         if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
 480     }
 481     return packFloat64( zSign, zExp, zSig0, zSig1 );
 482
 483 }
 484
 485 /*
 486 -------------------------------------------------------------------------------
 487 Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 488 and significand formed by the concatenation of `zSig0' and `zSig1', and
 489 returns the proper double-precision floating-point value corresponding
 490 to the abstract input.  This routine is just like `roundAndPackFloat64'
 491 except that the input significand has fewer bits and does not have to be
 492 normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
 493 point exponent.
 494 -------------------------------------------------------------------------------
 495 */
 496 static float64
 497  normalizeRoundAndPackFloat64(
 498      flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
 499 {
 500     int8 shiftCount;
 501     bits32 zSig2;
 502
 503     if ( zSig0 == 0 ) {
 504         zSig0 = zSig1;
 505         zSig1 = 0;
 506         zExp -= 32;
 507     }
 508     shiftCount = countLeadingZeros32( zSig0 ) - 11;
 509     if ( 0 <= shiftCount ) {
 510         zSig2 = 0;
 511         shortShift64Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
 512     }
 513     else {
 514         shift64ExtraRightJamming(
 515             zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
 516     }
 517     zExp -= shiftCount;
 518     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
 519
 520 }
 521
 522 /*
 523 -------------------------------------------------------------------------------
 524 Returns the result of converting the 32-bit two's complement integer `a' to
 525 the single-precision floating-point format.  The conversion is performed
 526 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 527 -------------------------------------------------------------------------------
 528 */
 529 float32 int32_to_float32( int32 a )
 530 {
 531     flag zSign;
 532
 533     if ( a == 0 ) return 0;
 534     if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
 535     zSign = ( a < 0 );
 536     return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
 537
 538 }
 539
 540 /*
 541 -------------------------------------------------------------------------------
 542 Returns the result of converting the 32-bit two's complement integer `a' to
 543 the double-precision floating-point format.  The conversion is performed
 544 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
 545 -------------------------------------------------------------------------------
 546 */
 547 float64 int32_to_float64( int32 a )
 548 {
 549     flag zSign;
 550     bits32 absA;
 551     int8 shiftCount;
 552     bits32 zSig0, zSig1;
 553
 554     if ( a == 0 ) return packFloat64( 0, 0, 0, 0 );
 555     zSign = ( a < 0 );
 556     absA = zSign ? - a : a;
 557     shiftCount = countLeadingZeros32( absA ) - 11;
 558     if ( 0 <= shiftCount ) {
 559         zSig0 = absA<<shiftCount;
 560         zSig1 = 0;
 561     }
 562     else {
 563         shift64Right( absA, 0, - shiftCount, &zSig0, &zSig1 );
 564     }
 565     return packFloat64( zSign, 0x412 - shiftCount, zSig0, zSig1 );
 566
 567 }
 568
 569 #ifndef SOFTFLOAT_FOR_GCC
 570 /*
 571 -------------------------------------------------------------------------------
 572 Returns the result of converting the single-precision floating-point value
 573 `a' to the 32-bit two's complement integer format.  The conversion is
 574 performed according to the IEC/IEEE Standard for Binary Floating-Point
 575 Arithmetic---which means in particular that the conversion is rounded
 576 according to the current rounding mode.  If `a' is a NaN, the largest
 577 positive integer is returned.  Otherwise, if the conversion overflows, the
 578 largest integer with the same sign as `a' is returned.
 579 -------------------------------------------------------------------------------
 580 */
 581 int32 float32_to_int32( float32 a )
 582 {
 583     flag aSign;
 584     int16 aExp, shiftCount;
 585     bits32 aSig, aSigExtra;
 586     int32 z;
 587     int8 roundingMode;
 588
 589     aSig = extractFloat32Frac( a );
 590     aExp = extractFloat32Exp( a );
 591     aSign = extractFloat32Sign( a );
 592     shiftCount = aExp - 0x96;
 593     if ( 0 <= shiftCount ) {
 594         if ( 0x9E <= aExp ) {
 595             if ( a != 0xCF000000 ) {
 596                 float_raise( float_flag_invalid );
 597                 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
 598                     return 0x7FFFFFFF;
 599                 }
 600             }
 601             return (sbits32) 0x80000000;
 602         }
 603         z = ( aSig | 0x00800000 )<<shiftCount;
 604         if ( aSign ) z = - z;
 605     }
 606     else {
 607         if ( aExp < 0x7E ) {
 608             aSigExtra = aExp | aSig;
 609             z = 0;
 610         }
 611         else {
 612             aSig |= 0x00800000;
 613             aSigExtra = aSig<<( shiftCount & 31 );
 614             z = aSig>>( - shiftCount );
 615         }
 616         if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
 617         roundingMode = float_rounding_mode;
 618         if ( roundingMode == float_round_nearest_even ) {
 619             if ( (sbits32) aSigExtra < 0 ) {
 620                 ++z;
 621                 if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1;
 622             }
 623             if ( aSign ) z = - z;
 624         }
 625         else {
 626             aSigExtra = ( aSigExtra != 0 );
 627             if ( aSign ) {
 628                 z += ( roundingMode == float_round_down ) & aSigExtra;
 629                 z = - z;
 630             }
 631             else {
 632                 z += ( roundingMode == float_round_up ) & aSigExtra;
 633             }
 634         }
 635     }
 636     return z;
 637
 638 }
 639 #endif
 640
 641 /*
 642 -------------------------------------------------------------------------------
 643 Returns the result of converting the single-precision floating-point value
 644 `a' to the 32-bit two's complement integer format.  The conversion is
 645 performed according to the IEC/IEEE Standard for Binary Floating-Point
 646 Arithmetic, except that the conversion is always rounded toward zero.
 647 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
 648 the conversion overflows, the largest integer with the same sign as `a' is
 649 returned.
 650 -------------------------------------------------------------------------------
 651 */
 652 int32 float32_to_int32_round_to_zero( float32 a )
 653 {
 654     flag aSign;
 655     int16 aExp, shiftCount;
 656     bits32 aSig;
 657     int32 z;
 658
 659     aSig = extractFloat32Frac( a );
 660     aExp = extractFloat32Exp( a );
 661     aSign = extractFloat32Sign( a );
 662     shiftCount = aExp - 0x9E;
 663     if ( 0 <= shiftCount ) {
 664         if ( a != 0xCF000000 ) {
 665             float_raise( float_flag_invalid );
 666             if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
 667         }
 668         return (sbits32) 0x80000000;
 669     }
 670     else if ( aExp <= 0x7E ) {
 671         if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
 672         return 0;
 673     }
 674     aSig = ( aSig | 0x00800000 )<<8;
 675     z = aSig>>( - shiftCount );
 676     if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
 677         float_exception_flags |= float_flag_inexact;
 678     }
 679     if ( aSign ) z = - z;
 680     return z;
 681
 682 }
 683
 684 /*
 685 -------------------------------------------------------------------------------
 686 Returns the result of converting the single-precision floating-point value
 687 `a' to the double-precision floating-point format.  The conversion is
 688 performed according to the IEC/IEEE Standard for Binary Floating-Point
 689 Arithmetic.
 690 -------------------------------------------------------------------------------
 691 */
 692 float64 float32_to_float64( float32 a )
 693 {
 694     flag aSign;
 695     int16 aExp;
 696     bits32 aSig, zSig0, zSig1;
 697
 698     aSig = extractFloat32Frac( a );
 699     aExp = extractFloat32Exp( a );
 700     aSign = extractFloat32Sign( a );
 701     if ( aExp == 0xFF ) {
 702         if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
 703         return packFloat64( aSign, 0x7FF, 0, 0 );
 704     }
 705     if ( aExp == 0 ) {
 706         if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 );
 707         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
 708         --aExp;
 709     }
 710     shift64Right( aSig, 0, 3, &zSig0, &zSig1 );
 711     return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 );
 712
 713 }
 714
 715 #ifndef SOFTFLOAT_FOR_GCC
 716 /*
 717 -------------------------------------------------------------------------------
 718 Rounds the single-precision floating-point value `a' to an integer,
 719 and returns the result as a single-precision floating-point value.  The
 720 operation is performed according to the IEC/IEEE Standard for Binary
 721 Floating-Point Arithmetic.
 722 -------------------------------------------------------------------------------
 723 */
 724 float32 float32_round_to_int( float32 a )
 725 {
 726     flag aSign;
 727     int16 aExp;
 728     bits32 lastBitMask, roundBitsMask;
 729     int8 roundingMode;
 730     float32 z;
 731
 732     aExp = extractFloat32Exp( a );
 733     if ( 0x96 <= aExp ) {
 734         if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
 735             return propagateFloat32NaN( a, a );
 736         }
 737         return a;
 738     }
 739     if ( aExp <= 0x7E ) {
 740         if ( (bits32) ( a<<1 ) == 0 ) return a;
 741         float_exception_flags |= float_flag_inexact;
 742         aSign = extractFloat32Sign( a );
 743         switch ( float_rounding_mode ) {
 744          case float_round_nearest_even:
 745             if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
 746                 return packFloat32( aSign, 0x7F, 0 );
 747             }
 748             break;
 749          case float_round_to_zero:
 750             break;
 751          case float_round_down:
 752             return aSign ? 0xBF800000 : 0;
 753          case float_round_up:
 754             return aSign ? 0x80000000 : 0x3F800000;
 755         }
 756         return packFloat32( aSign, 0, 0 );
 757     }
 758     lastBitMask = 1;
 759     lastBitMask <<= 0x96 - aExp;
 760     roundBitsMask = lastBitMask - 1;
 761     z = a;
 762     roundingMode = float_rounding_mode;
 763     if ( roundingMode == float_round_nearest_even ) {
 764         z += lastBitMask>>1;
 765         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
 766     }
 767     else if ( roundingMode != float_round_to_zero ) {
 768         if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
 769             z += roundBitsMask;
 770         }
 771     }
 772     z &= ~ roundBitsMask;
 773     if ( z != a ) float_exception_flags |= float_flag_inexact;
 774     return z;
 775
 776 }
 777 #endif
 778
 779 /*
 780 -------------------------------------------------------------------------------
 781 Returns the result of adding the absolute values of the single-precision
 782 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
 783 before being returned.  `zSign' is ignored if the result is a NaN.
 784 The addition is performed according to the IEC/IEEE Standard for Binary
 785 Floating-Point Arithmetic.
 786 -------------------------------------------------------------------------------
 787 */
 788 static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
 789 {
 790     int16 aExp, bExp, zExp;
 791     bits32 aSig, bSig, zSig;
 792     int16 expDiff;
 793
 794     aSig = extractFloat32Frac( a );
 795     aExp = extractFloat32Exp( a );
 796     bSig = extractFloat32Frac( b );
 797     bExp = extractFloat32Exp( b );
 798     expDiff = aExp - bExp;
 799     aSig <<= 6;
 800     bSig <<= 6;
 801     if ( 0 < expDiff ) {
 802         if ( aExp == 0xFF ) {
 803             if ( aSig ) return propagateFloat32NaN( a, b );
 804             return a;
 805         }
 806         if ( bExp == 0 ) {
 807             --expDiff;
 808         }
 809         else {
 810             bSig |= 0x20000000;
 811         }
 812         shift32RightJamming( bSig, expDiff, &bSig );
 813         zExp = aExp;
 814     }
 815     else if ( expDiff < 0 ) {
 816         if ( bExp == 0xFF ) {
 817             if ( bSig ) return propagateFloat32NaN( a, b );
 818             return packFloat32( zSign, 0xFF, 0 );
 819         }
 820         if ( aExp == 0 ) {
 821             ++expDiff;
 822         }
 823         else {
 824             aSig |= 0x20000000;
 825         }
 826         shift32RightJamming( aSig, - expDiff, &aSig );
 827         zExp = bExp;
 828     }
 829     else {
 830         if ( aExp == 0xFF ) {
 831             if ( aSig | bSig ) return propagateFloat32NaN( a, b );
 832             return a;
 833         }
 834         if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
 835         zSig = 0x40000000 + aSig + bSig;
 836         zExp = aExp;
 837         goto roundAndPack;
 838     }
 839     aSig |= 0x20000000;
 840     zSig = ( aSig + bSig )<<1;
 841     --zExp;
 842     if ( (sbits32) zSig < 0 ) {
 843         zSig = aSig + bSig;
 844         ++zExp;
 845     }
 846  roundAndPack:
 847     return roundAndPackFloat32( zSign, zExp, zSig );
 848
 849 }
 850
 851 /*
 852 -------------------------------------------------------------------------------
 853 Returns the result of subtracting the absolute values of the single-
 854 precision floating-point values `a' and `b'.  If `zSign' is 1, the
 855 difference is negated before being returned.  `zSign' is ignored if the
 856 result is a NaN.  The subtraction is performed according to the IEC/IEEE
 857 Standard for Binary Floating-Point Arithmetic.
 858 -------------------------------------------------------------------------------
 859 */
 860 static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
 861 {
 862     int16 aExp, bExp, zExp;
 863     bits32 aSig, bSig, zSig;
 864     int16 expDiff;
 865
 866     aSig = extractFloat32Frac( a );
 867     aExp = extractFloat32Exp( a );
 868     bSig = extractFloat32Frac( b );
 869     bExp = extractFloat32Exp( b );
 870     expDiff = aExp - bExp;
 871     aSig <<= 7;
 872     bSig <<= 7;
 873     if ( 0 < expDiff ) goto aExpBigger;
 874     if ( expDiff < 0 ) goto bExpBigger;
 875     if ( aExp == 0xFF ) {
 876         if ( aSig | bSig ) return propagateFloat32NaN( a, b );
 877         float_raise( float_flag_invalid );
 878         return float32_default_nan;
 879     }
 880     if ( aExp == 0 ) {
 881         aExp = 1;
 882         bExp = 1;
 883     }
 884     if ( bSig < aSig ) goto aBigger;
 885     if ( aSig < bSig ) goto bBigger;
 886     return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
 887  bExpBigger:
 888     if ( bExp == 0xFF ) {
 889         if ( bSig ) return propagateFloat32NaN( a, b );
 890         return packFloat32( zSign ^ 1, 0xFF, 0 );
 891     }
 892     if ( aExp == 0 ) {
 893         ++expDiff;
 894     }
 895     else {
 896         aSig |= 0x40000000;
 897     }
 898     shift32RightJamming( aSig, - expDiff, &aSig );
 899     bSig |= 0x40000000;
 900  bBigger:
 901     zSig = bSig - aSig;
 902     zExp = bExp;
 903     zSign ^= 1;
 904     goto normalizeRoundAndPack;
 905  aExpBigger:
 906     if ( aExp == 0xFF ) {
 907         if ( aSig ) return propagateFloat32NaN( a, b );
 908         return a;
 909     }
 910     if ( bExp == 0 ) {
 911         --expDiff;
 912     }
 913     else {
 914         bSig |= 0x40000000;
 915     }
 916     shift32RightJamming( bSig, expDiff, &bSig );
 917     aSig |= 0x40000000;
 918  aBigger:
 919     zSig = aSig - bSig;
 920     zExp = aExp;
 921  normalizeRoundAndPack:
 922     --zExp;
 923     return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
 924
 925 }
 926
 927 /*
 928 -------------------------------------------------------------------------------
 929 Returns the result of adding the single-precision floating-point values `a'
 930 and `b'.  The operation is performed according to the IEC/IEEE Standard for
 931 Binary Floating-Point Arithmetic.
 932 -------------------------------------------------------------------------------
 933 */
 934 float32 float32_add( float32 a, float32 b )
 935 {
 936     flag aSign, bSign;
 937
 938     aSign = extractFloat32Sign( a );
 939     bSign = extractFloat32Sign( b );
 940     if ( aSign == bSign ) {
 941         return addFloat32Sigs( a, b, aSign );
 942     }
 943     else {
 944         return subFloat32Sigs( a, b, aSign );
 945     }
 946
 947 }
 948
 949 /*
 950 -------------------------------------------------------------------------------
 951 Returns the result of subtracting the single-precision floating-point values
 952 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
 953 for Binary Floating-Point Arithmetic.
 954 -------------------------------------------------------------------------------
 955 */
 956 float32 float32_sub( float32 a, float32 b )
 957 {
 958     flag aSign, bSign;
 959
 960     aSign = extractFloat32Sign( a );
 961     bSign = extractFloat32Sign( b );
 962     if ( aSign == bSign ) {
 963         return subFloat32Sigs( a, b, aSign );
 964     }
 965     else {
 966         return addFloat32Sigs( a, b, aSign );
 967     }
 968
 969 }
 970
 971 /*
 972 -------------------------------------------------------------------------------
 973 Returns the result of multiplying the single-precision floating-point values
 974 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
 975 for Binary Floating-Point Arithmetic.
 976 -------------------------------------------------------------------------------
 977 */
 978 float32 float32_mul( float32 a, float32 b )
 979 {
 980     flag aSign, bSign, zSign;
 981     int16 aExp, bExp, zExp;
 982     bits32 aSig, bSig, zSig0, zSig1;
 983
 984     aSig = extractFloat32Frac( a );
 985     aExp = extractFloat32Exp( a );
 986     aSign = extractFloat32Sign( a );
 987     bSig = extractFloat32Frac( b );
 988     bExp = extractFloat32Exp( b );
 989     bSign = extractFloat32Sign( b );
 990     zSign = aSign ^ bSign;
 991     if ( aExp == 0xFF ) {
 992         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
 993             return propagateFloat32NaN( a, b );
 994         }
 995         if ( ( bExp | bSig ) == 0 ) {
 996             float_raise( float_flag_invalid );
 997             return float32_default_nan;
 998         }
 999         return packFloat32( zSign, 0xFF, 0 );
1000     }
1001     if ( bExp == 0xFF ) {
1002         if ( bSig ) return propagateFloat32NaN( a, b );
1003         if ( ( aExp | aSig ) == 0 ) {
1004             float_raise( float_flag_invalid );
1005             return float32_default_nan;
1006         }
1007         return packFloat32( zSign, 0xFF, 0 );
1008     }
1009     if ( aExp == 0 ) {
1010         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1011         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1012     }
1013     if ( bExp == 0 ) {
1014         if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1015         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1016     }
1017     zExp = aExp + bExp - 0x7F;
1018     aSig = ( aSig | 0x00800000 )<<7;
1019     bSig = ( bSig | 0x00800000 )<<8;
1020     mul32To64( aSig, bSig, &zSig0, &zSig1 );
1021     zSig0 |= ( zSig1 != 0 );
1022     if ( 0 <= (sbits32) ( zSig0<<1 ) ) {
1023         zSig0 <<= 1;
1024         --zExp;
1025     }
1026     return roundAndPackFloat32( zSign, zExp, zSig0 );
1027
1028 }
1029
1030 /*
1031 -------------------------------------------------------------------------------
1032 Returns the result of dividing the single-precision floating-point value `a'
1033 by the corresponding value `b'.  The operation is performed according to the
1034 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1035 -------------------------------------------------------------------------------
1036 */
1037 float32 float32_div( float32 a, float32 b )
1038 {
1039     flag aSign, bSign, zSign;
1040     int16 aExp, bExp, zExp;
1041     bits32 aSig, bSig, zSig, rem0, rem1, term0, term1;
1042
1043     aSig = extractFloat32Frac( a );
1044     aExp = extractFloat32Exp( a );
1045     aSign = extractFloat32Sign( a );
1046     bSig = extractFloat32Frac( b );
1047     bExp = extractFloat32Exp( b );
1048     bSign = extractFloat32Sign( b );
1049     zSign = aSign ^ bSign;
1050     if ( aExp == 0xFF ) {
1051         if ( aSig ) return propagateFloat32NaN( a, b );
1052         if ( bExp == 0xFF ) {
1053             if ( bSig ) return propagateFloat32NaN( a, b );
1054             float_raise( float_flag_invalid );
1055             return float32_default_nan;
1056         }
1057         return packFloat32( zSign, 0xFF, 0 );
1058     }
1059     if ( bExp == 0xFF ) {
1060         if ( bSig ) return propagateFloat32NaN( a, b );
1061         return packFloat32( zSign, 0, 0 );
1062     }
1063     if ( bExp == 0 ) {
1064         if ( bSig == 0 ) {
1065             if ( ( aExp | aSig ) == 0 ) {
1066                 float_raise( float_flag_invalid );
1067                 return float32_default_nan;
1068             }
1069             float_raise( float_flag_divbyzero );
1070             return packFloat32( zSign, 0xFF, 0 );
1071         }
1072         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1073     }
1074     if ( aExp == 0 ) {
1075         if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1076         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1077     }
1078     zExp = aExp - bExp + 0x7D;
1079     aSig = ( aSig | 0x00800000 )<<7;
1080     bSig = ( bSig | 0x00800000 )<<8;
1081     if ( bSig <= ( aSig + aSig ) ) {
1082         aSig >>= 1;
1083         ++zExp;
1084     }
1085     zSig = estimateDiv64To32( aSig, 0, bSig );
1086     if ( ( zSig & 0x3F ) <= 2 ) {
1087         mul32To64( bSig, zSig, &term0, &term1 );
1088         sub64( aSig, 0, term0, term1, &rem0, &rem1 );
1089         while ( (sbits32) rem0 < 0 ) {
1090             --zSig;
1091             add64( rem0, rem1, 0, bSig, &rem0, &rem1 );
1092         }
1093         zSig |= ( rem1 != 0 );
1094     }
1095     return roundAndPackFloat32( zSign, zExp, zSig );
1096
1097 }
1098
1099 #ifndef SOFTFLOAT_FOR_GCC
1100 /*
1101 -------------------------------------------------------------------------------
1102 Returns the remainder of the single-precision floating-point value `a'
1103 with respect to the corresponding value `b'.  The operation is performed
1104 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1105 -------------------------------------------------------------------------------
1106 */
1107 float32 float32_rem( float32 a, float32 b )
1108 {
1109     flag aSign, bSign, zSign;
1110     int16 aExp, bExp, expDiff;
1111     bits32 aSig, bSig, q, allZero, alternateASig;
1112     sbits32 sigMean;
1113
1114     aSig = extractFloat32Frac( a );
1115     aExp = extractFloat32Exp( a );
1116     aSign = extractFloat32Sign( a );
1117     bSig = extractFloat32Frac( b );
1118     bExp = extractFloat32Exp( b );
1119     bSign = extractFloat32Sign( b );
1120     if ( aExp == 0xFF ) {
1121         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1122             return propagateFloat32NaN( a, b );
1123         }
1124         float_raise( float_flag_invalid );
1125         return float32_default_nan;
1126     }
1127     if ( bExp == 0xFF ) {
1128         if ( bSig ) return propagateFloat32NaN( a, b );
1129         return a;
1130     }
1131     if ( bExp == 0 ) {
1132         if ( bSig == 0 ) {
1133             float_raise( float_flag_invalid );
1134             return float32_default_nan;
1135         }
1136         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1137     }
1138     if ( aExp == 0 ) {
1139         if ( aSig == 0 ) return a;
1140         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1141     }
1142     expDiff = aExp - bExp;
1143     aSig = ( aSig | 0x00800000 )<<8;
1144     bSig = ( bSig | 0x00800000 )<<8;
1145     if ( expDiff < 0 ) {
1146         if ( expDiff < -1 ) return a;
1147         aSig >>= 1;
1148     }
1149     q = ( bSig <= aSig );
1150     if ( q ) aSig -= bSig;
1151     expDiff -= 32;
1152     while ( 0 < expDiff ) {
1153         q = estimateDiv64To32( aSig, 0, bSig );
1154         q = ( 2 < q ) ? q - 2 : 0;
1155         aSig = - ( ( bSig>>2 ) * q );
1156         expDiff -= 30;
1157     }
1158     expDiff += 32;
1159     if ( 0 < expDiff ) {
1160         q = estimateDiv64To32( aSig, 0, bSig );
1161         q = ( 2 < q ) ? q - 2 : 0;
1162         q >>= 32 - expDiff;
1163         bSig >>= 2;
1164         aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
1165     }
1166     else {
1167         aSig >>= 2;
1168         bSig >>= 2;
1169     }
1170     do {
1171         alternateASig = aSig;
1172         ++q;
1173         aSig -= bSig;
1174     } while ( 0 <= (sbits32) aSig );
1175     sigMean = aSig + alternateASig;
1176     if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
1177         aSig = alternateASig;
1178     }
1179     zSign = ( (sbits32) aSig < 0 );
1180     if ( zSign ) aSig = - aSig;
1181     return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
1182
1183 }
1184 #endif
1185
1186 #ifndef SOFTFLOAT_FOR_GCC
1187 /*
1188 -------------------------------------------------------------------------------
1189 Returns the square root of the single-precision floating-point value `a'.
1190 The operation is performed according to the IEC/IEEE Standard for Binary
1191 Floating-Point Arithmetic.
1192 -------------------------------------------------------------------------------
1193 */
1194 float32 float32_sqrt( float32 a )
1195 {
1196     flag aSign;
1197     int16 aExp, zExp;
1198     bits32 aSig, zSig, rem0, rem1, term0, term1;
1199
1200     aSig = extractFloat32Frac( a );
1201     aExp = extractFloat32Exp( a );
1202     aSign = extractFloat32Sign( a );
1203     if ( aExp == 0xFF ) {
1204         if ( aSig ) return propagateFloat32NaN( a, 0 );
1205         if ( ! aSign ) return a;
1206         float_raise( float_flag_invalid );
1207         return float32_default_nan;
1208     }
1209     if ( aSign ) {
1210         if ( ( aExp | aSig ) == 0 ) return a;
1211         float_raise( float_flag_invalid );
1212         return float32_default_nan;
1213     }
1214     if ( aExp == 0 ) {
1215         if ( aSig == 0 ) return 0;
1216         normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1217     }
1218     zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
1219     aSig = ( aSig | 0x00800000 )<<8;
1220     zSig = estimateSqrt32( aExp, aSig ) + 2;
1221     if ( ( zSig & 0x7F ) <= 5 ) {
1222         if ( zSig < 2 ) {
1223             zSig = 0x7FFFFFFF;
1224             goto roundAndPack;
1225         }
1226         else {
1227             aSig >>= aExp & 1;
1228             mul32To64( zSig, zSig, &term0, &term1 );
1229             sub64( aSig, 0, term0, term1, &rem0, &rem1 );
1230             while ( (sbits32) rem0 < 0 ) {
1231                 --zSig;
1232                 shortShift64Left( 0, zSig, 1, &term0, &term1 );
1233                 term1 |= 1;
1234                 add64( rem0, rem1, term0, term1, &rem0, &rem1 );
1235             }
1236             zSig |= ( ( rem0 | rem1 ) != 0 );
1237         }
1238     }
1239     shift32RightJamming( zSig, 1, &zSig );
1240  roundAndPack:
1241     return roundAndPackFloat32( 0, zExp, zSig );
1242
1243 }
1244 #endif
1245
1246 /*
1247 -------------------------------------------------------------------------------
1248 Returns 1 if the single-precision floating-point value `a' is equal to
1249 the corresponding value `b', and 0 otherwise.  The comparison is performed
1250 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1251 -------------------------------------------------------------------------------
1252 */
1253 flag float32_eq( float32 a, float32 b )
1254 {
1255
1256     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1257          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1258        ) {
1259         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1260             float_raise( float_flag_invalid );
1261         }
1262         return 0;
1263     }
1264     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1265
1266 }
1267
1268 /*
1269 -------------------------------------------------------------------------------
1270 Returns 1 if the single-precision floating-point value `a' is less than
1271 or equal to the corresponding value `b', and 0 otherwise.  The comparison
1272 is performed according to the IEC/IEEE Standard for Binary Floating-Point
1273 Arithmetic.
1274 -------------------------------------------------------------------------------
1275 */
1276 flag float32_le( float32 a, float32 b )
1277 {
1278     flag aSign, bSign;
1279
1280     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1281          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1282        ) {
1283         float_raise( float_flag_invalid );
1284         return 0;
1285     }
1286     aSign = extractFloat32Sign( a );
1287     bSign = extractFloat32Sign( b );
1288     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1289     return ( a == b ) || ( aSign ^ ( a < b ) );
1290
1291 }
1292
1293 /*
1294 -------------------------------------------------------------------------------
1295 Returns 1 if the single-precision floating-point value `a' is less than
1296 the corresponding value `b', and 0 otherwise.  The comparison is performed
1297 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1298 -------------------------------------------------------------------------------
1299 */
1300 flag float32_lt( float32 a, float32 b )
1301 {
1302     flag aSign, bSign;
1303
1304     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1305          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1306        ) {
1307         float_raise( float_flag_invalid );
1308         return 0;
1309     }
1310     aSign = extractFloat32Sign( a );
1311     bSign = extractFloat32Sign( b );
1312     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1313     return ( a != b ) && ( aSign ^ ( a < b ) );
1314
1315 }
1316
1317 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1318 /*
1319 -------------------------------------------------------------------------------
1320 Returns 1 if the single-precision floating-point value `a' is equal to
1321 the corresponding value `b', and 0 otherwise.  The invalid exception is
1322 raised if either operand is a NaN.  Otherwise, the comparison is performed
1323 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1324 -------------------------------------------------------------------------------
1325 */
1326 flag float32_eq_signaling( float32 a, float32 b )
1327 {
1328
1329     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1330          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1331        ) {
1332         float_raise( float_flag_invalid );
1333         return 0;
1334     }
1335     return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1336
1337 }
1338
1339 /*
1340 -------------------------------------------------------------------------------
1341 Returns 1 if the single-precision floating-point value `a' is less than or
1342 equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
1343 cause an exception.  Otherwise, the comparison is performed according to the
1344 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1345 -------------------------------------------------------------------------------
1346 */
1347 flag float32_le_quiet( float32 a, float32 b )
1348 {
1349     flag aSign, bSign;
1350     int16 aExp, bExp;
1351
1352     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1353          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1354        ) {
1355         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1356             float_raise( float_flag_invalid );
1357         }
1358         return 0;
1359     }
1360     aSign = extractFloat32Sign( a );
1361     bSign = extractFloat32Sign( b );
1362     if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1363     return ( a == b ) || ( aSign ^ ( a < b ) );
1364
1365 }
1366
1367 /*
1368 -------------------------------------------------------------------------------
1369 Returns 1 if the single-precision floating-point value `a' is less than
1370 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
1371 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
1372 Standard for Binary Floating-Point Arithmetic.
1373 -------------------------------------------------------------------------------
1374 */
1375 flag float32_lt_quiet( float32 a, float32 b )
1376 {
1377     flag aSign, bSign;
1378
1379     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1380          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1381        ) {
1382         if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1383             float_raise( float_flag_invalid );
1384         }
1385         return 0;
1386     }
1387     aSign = extractFloat32Sign( a );
1388     bSign = extractFloat32Sign( b );
1389     if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1390     return ( a != b ) && ( aSign ^ ( a < b ) );
1391
1392 }
1393 #endif /* !SOFTFLOAT_FOR_GCC */
1394
1395 #ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1396 /*
1397 -------------------------------------------------------------------------------
1398 Returns the result of converting the double-precision floating-point value
1399 `a' to the 32-bit two's complement integer format.  The conversion is
1400 performed according to the IEC/IEEE Standard for Binary Floating-Point
1401 Arithmetic---which means in particular that the conversion is rounded
1402 according to the current rounding mode.  If `a' is a NaN, the largest
1403 positive integer is returned.  Otherwise, if the conversion overflows, the
1404 largest integer with the same sign as `a' is returned.
1405 -------------------------------------------------------------------------------
1406 */
1407 int32 float64_to_int32( float64 a )
1408 {
1409     flag aSign;
1410     int16 aExp, shiftCount;
1411     bits32 aSig0, aSig1, absZ, aSigExtra;
1412     int32 z;
1413     int8 roundingMode;
1414
1415     aSig1 = extractFloat64Frac1( a );
1416     aSig0 = extractFloat64Frac0( a );
1417     aExp = extractFloat64Exp( a );
1418     aSign = extractFloat64Sign( a );
1419     shiftCount = aExp - 0x413;
1420     if ( 0 <= shiftCount ) {
1421         if ( 0x41E < aExp ) {
1422             if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
1423             goto invalid;
1424         }
1425         shortShift64Left(
1426             aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
1427         if ( 0x80000000 < absZ ) goto invalid;
1428     }
1429     else {
1430         aSig1 = ( aSig1 != 0 );
1431         if ( aExp < 0x3FE ) {
1432             aSigExtra = aExp | aSig0 | aSig1;
1433             absZ = 0;
1434         }
1435         else {
1436             aSig0 |= 0x00100000;
1437             aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
1438             absZ = aSig0>>( - shiftCount );
1439         }
1440     }
1441     roundingMode = float_rounding_mode;
1442     if ( roundingMode == float_round_nearest_even ) {
1443         if ( (sbits32) aSigExtra < 0 ) {
1444             ++absZ;
1445             if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1;
1446         }
1447         z = aSign ? - absZ : absZ;
1448     }
1449     else {
1450         aSigExtra = ( aSigExtra != 0 );
1451         if ( aSign ) {
1452             z = - (   absZ
1453                     + ( ( roundingMode == float_round_down ) & aSigExtra ) );
1454         }
1455         else {
1456             z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra );
1457         }
1458     }
1459     if ( ( aSign ^ ( z < 0 ) ) && z ) {
1460  invalid:
1461         float_raise( float_flag_invalid );
1462         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
1463     }
1464     if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
1465     return z;
1466
1467 }
1468 #endif /* !SOFTFLOAT_FOR_GCC */
1469
1470 /*
1471 -------------------------------------------------------------------------------
1472 Returns the result of converting the double-precision floating-point value
1473 `a' to the 32-bit two's complement integer format.  The conversion is
1474 performed according to the IEC/IEEE Standard for Binary Floating-Point
1475 Arithmetic, except that the conversion is always rounded toward zero.
1476 If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1477 the conversion overflows, the largest integer with the same sign as `a' is
1478 returned.
1479 -------------------------------------------------------------------------------
1480 */
1481 int32 float64_to_int32_round_to_zero( float64 a )
1482 {
1483     flag aSign;
1484     int16 aExp, shiftCount;
1485     bits32 aSig0, aSig1, absZ, aSigExtra;
1486     int32 z;
1487
1488     aSig1 = extractFloat64Frac1( a );
1489     aSig0 = extractFloat64Frac0( a );
1490     aExp = extractFloat64Exp( a );
1491     aSign = extractFloat64Sign( a );
1492     shiftCount = aExp - 0x413;
1493     if ( 0 <= shiftCount ) {
1494         if ( 0x41E < aExp ) {
1495             if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
1496             goto invalid;
1497         }
1498         shortShift64Left(
1499             aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
1500     }
1501     else {
1502         if ( aExp < 0x3FF ) {
1503             if ( aExp | aSig0 | aSig1 ) {
1504                 float_exception_flags |= float_flag_inexact;
1505             }
1506             return 0;
1507         }
1508         aSig0 |= 0x00100000;
1509         aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
1510         absZ = aSig0>>( - shiftCount );
1511     }
1512     z = aSign ? - absZ : absZ;
1513     if ( ( aSign ^ ( z < 0 ) ) && z ) {
1514  invalid:
1515         float_raise( float_flag_invalid );
1516         return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
1517     }
1518     if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
1519     return z;
1520
1521 }
1522
1523 /*
1524 -------------------------------------------------------------------------------
1525 Returns the result of converting the double-precision floating-point value
1526 `a' to the single-precision floating-point format.  The conversion is
1527 performed according to the IEC/IEEE Standard for Binary Floating-Point
1528 Arithmetic.
1529 -------------------------------------------------------------------------------
1530 */
1531 float32 float64_to_float32( float64 a )
1532 {
1533     flag aSign;
1534     int16 aExp;
1535     bits32 aSig0, aSig1, zSig;
1536     bits32 allZero;
1537
1538     aSig1 = extractFloat64Frac1( a );
1539     aSig0 = extractFloat64Frac0( a );
1540     aExp = extractFloat64Exp( a );
1541     aSign = extractFloat64Sign( a );
1542     if ( aExp == 0x7FF ) {
1543         if ( aSig0 | aSig1 ) {
1544             return commonNaNToFloat32( float64ToCommonNaN( a ) );
1545         }
1546         return packFloat32( aSign, 0xFF, 0 );
1547     }
1548     shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig );
1549     if ( aExp ) zSig |= 0x40000000;
1550     return roundAndPackFloat32( aSign, aExp - 0x381, zSig );
1551
1552 }
1553
1554 #ifndef SOFTFLOAT_FOR_GCC
1555 /*
1556 -------------------------------------------------------------------------------
1557 Rounds the double-precision floating-point value `a' to an integer,
1558 and returns the result as a double-precision floating-point value.  The
1559 operation is performed according to the IEC/IEEE Standard for Binary
1560 Floating-Point Arithmetic.
1561 -------------------------------------------------------------------------------
1562 */
1563 float64 float64_round_to_int( float64 a )
1564 {
1565     flag aSign;
1566     int16 aExp;
1567     bits32 lastBitMask, roundBitsMask;
1568     int8 roundingMode;
1569     float64 z;
1570
1571     aExp = extractFloat64Exp( a );
1572     if ( 0x413 <= aExp ) {
1573         if ( 0x433 <= aExp ) {
1574             if (    ( aExp == 0x7FF )
1575                  && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) {
1576                 return propagateFloat64NaN( a, a );
1577             }
1578             return a;
1579         }
1580         lastBitMask = 1;
1581         lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1;
1582         roundBitsMask = lastBitMask - 1;
1583         z = a;
1584         roundingMode = float_rounding_mode;
1585         if ( roundingMode == float_round_nearest_even ) {
1586             if ( lastBitMask ) {
1587                 add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
1588                 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
1589             }
1590             else {
1591                 if ( (sbits32) z.low < 0 ) {
1592                     ++z.high;
1593                     if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1;
1594                 }
1595             }
1596         }
1597         else if ( roundingMode != float_round_to_zero ) {
1598             if (   extractFloat64Sign( z )
1599                  ^ ( roundingMode == float_round_up ) ) {
1600                 add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
1601             }
1602         }
1603         z.low &= ~ roundBitsMask;
1604     }
1605     else {
1606         if ( aExp <= 0x3FE ) {
1607             if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
1608             float_exception_flags |= float_flag_inexact;
1609             aSign = extractFloat64Sign( a );
1610             switch ( float_rounding_mode ) {
1611              case float_round_nearest_even:
1612                 if (    ( aExp == 0x3FE )
1613                      && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) )
1614                    ) {
1615                     return packFloat64( aSign, 0x3FF, 0, 0 );
1616                 }
1617                 break;
1618              case float_round_down:
1619                 return
1620                       aSign ? packFloat64( 1, 0x3FF, 0, 0 )
1621                     : packFloat64( 0, 0, 0, 0 );
1622              case float_round_up:
1623                 return
1624                       aSign ? packFloat64( 1, 0, 0, 0 )
1625                     : packFloat64( 0, 0x3FF, 0, 0 );
1626             }
1627             return packFloat64( aSign, 0, 0, 0 );
1628         }
1629         lastBitMask = 1;
1630         lastBitMask <<= 0x413 - aExp;
1631         roundBitsMask = lastBitMask - 1;
1632         z.low = 0;
1633         z.high = a.high;
1634         roundingMode = float_rounding_mode;
1635         if ( roundingMode == float_round_nearest_even ) {
1636             z.high += lastBitMask>>1;
1637             if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
1638                 z.high &= ~ lastBitMask;
1639             }
1640         }
1641         else if ( roundingMode != float_round_to_zero ) {
1642             if (   extractFloat64Sign( z )
1643                  ^ ( roundingMode == float_round_up ) ) {
1644                 z.high |= ( a.low != 0 );
1645                 z.high += roundBitsMask;
1646             }
1647         }
1648         z.high &= ~ roundBitsMask;
1649     }
1650     if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
1651         float_exception_flags |= float_flag_inexact;
1652     }
1653     return z;
1654
1655 }
1656 #endif
1657
1658 /*
1659 -------------------------------------------------------------------------------
1660 Returns the result of adding the absolute values of the double-precision
1661 floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
1662 before being returned.  `zSign' is ignored if the result is a NaN.
1663 The addition is performed according to the IEC/IEEE Standard for Binary
1664 Floating-Point Arithmetic.
1665 -------------------------------------------------------------------------------
1666 */
1667 static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
1668 {
1669     int16 aExp, bExp, zExp;
1670     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
1671     int16 expDiff;
1672
1673     aSig1 = extractFloat64Frac1( a );
1674     aSig0 = extractFloat64Frac0( a );
1675     aExp = extractFloat64Exp( a );
1676     bSig1 = extractFloat64Frac1( b );
1677     bSig0 = extractFloat64Frac0( b );
1678     bExp = extractFloat64Exp( b );
1679     expDiff = aExp - bExp;
1680     if ( 0 < expDiff ) {
1681         if ( aExp == 0x7FF ) {
1682             if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1683             return a;
1684         }
1685         if ( bExp == 0 ) {
1686             --expDiff;
1687         }
1688         else {
1689             bSig0 |= 0x00100000;
1690         }
1691         shift64ExtraRightJamming(
1692             bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
1693         zExp = aExp;
1694     }
1695     else if ( expDiff < 0 ) {
1696         if ( bExp == 0x7FF ) {
1697             if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1698             return packFloat64( zSign, 0x7FF, 0, 0 );
1699         }
1700         if ( aExp == 0 ) {
1701             ++expDiff;
1702         }
1703         else {
1704             aSig0 |= 0x00100000;
1705         }
1706         shift64ExtraRightJamming(
1707             aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
1708         zExp = bExp;
1709     }
1710     else {
1711         if ( aExp == 0x7FF ) {
1712             if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
1713                 return propagateFloat64NaN( a, b );
1714             }
1715             return a;
1716         }
1717         add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1718         if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 );
1719         zSig2 = 0;
1720         zSig0 |= 0x00200000;
1721         zExp = aExp;
1722         goto shiftRight1;
1723     }
1724     aSig0 |= 0x00100000;
1725     add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1726     --zExp;
1727     if ( zSig0 < 0x00200000 ) goto roundAndPack;
1728     ++zExp;
1729  shiftRight1:
1730     shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
1731  roundAndPack:
1732     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1733
1734 }
1735
1736 /*
1737 -------------------------------------------------------------------------------
1738 Returns the result of subtracting the absolute values of the double-
1739 precision floating-point values `a' and `b'.  If `zSign' is 1, the
1740 difference is negated before being returned.  `zSign' is ignored if the
1741 result is a NaN.  The subtraction is performed according to the IEC/IEEE
1742 Standard for Binary Floating-Point Arithmetic.
1743 -------------------------------------------------------------------------------
1744 */
1745 static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
1746 {
1747     int16 aExp, bExp, zExp;
1748     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
1749     int16 expDiff;
1750
1751     aSig1 = extractFloat64Frac1( a );
1752     aSig0 = extractFloat64Frac0( a );
1753     aExp = extractFloat64Exp( a );
1754     bSig1 = extractFloat64Frac1( b );
1755     bSig0 = extractFloat64Frac0( b );
1756     bExp = extractFloat64Exp( b );
1757     expDiff = aExp - bExp;
1758     shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 );
1759     shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 );
1760     if ( 0 < expDiff ) goto aExpBigger;
1761     if ( expDiff < 0 ) goto bExpBigger;
1762     if ( aExp == 0x7FF ) {
1763         if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
1764             return propagateFloat64NaN( a, b );
1765         }
1766         float_raise( float_flag_invalid );
1767         return float64_default_nan;
1768     }
1769     if ( aExp == 0 ) {
1770         aExp = 1;
1771         bExp = 1;
1772     }
1773     if ( bSig0 < aSig0 ) goto aBigger;
1774     if ( aSig0 < bSig0 ) goto bBigger;
1775     if ( bSig1 < aSig1 ) goto aBigger;
1776     if ( aSig1 < bSig1 ) goto bBigger;
1777     return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 );
1778  bExpBigger:
1779     if ( bExp == 0x7FF ) {
1780         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1781         return packFloat64( zSign ^ 1, 0x7FF, 0, 0 );
1782     }
1783     if ( aExp == 0 ) {
1784         ++expDiff;
1785     }
1786     else {
1787         aSig0 |= 0x40000000;
1788     }
1789     shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
1790     bSig0 |= 0x40000000;
1791  bBigger:
1792     sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
1793     zExp = bExp;
1794     zSign ^= 1;
1795     goto normalizeRoundAndPack;
1796  aExpBigger:
1797     if ( aExp == 0x7FF ) {
1798         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1799         return a;
1800     }
1801     if ( bExp == 0 ) {
1802         --expDiff;
1803     }
1804     else {
1805         bSig0 |= 0x40000000;
1806     }
1807     shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
1808     aSig0 |= 0x40000000;
1809  aBigger:
1810     sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1811     zExp = aExp;
1812  normalizeRoundAndPack:
1813     --zExp;
1814     return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 );
1815
1816 }
1817
1818 /*
1819 -------------------------------------------------------------------------------
1820 Returns the result of adding the double-precision floating-point values `a'
1821 and `b'.  The operation is performed according to the IEC/IEEE Standard for
1822 Binary Floating-Point Arithmetic.
1823 -------------------------------------------------------------------------------
1824 */
1825 float64 float64_add( float64 a, float64 b )
1826 {
1827     flag aSign, bSign;
1828
1829     aSign = extractFloat64Sign( a );
1830     bSign = extractFloat64Sign( b );
1831     if ( aSign == bSign ) {
1832         return addFloat64Sigs( a, b, aSign );
1833     }
1834     else {
1835         return subFloat64Sigs( a, b, aSign );
1836     }
1837
1838 }
1839
1840 /*
1841 -------------------------------------------------------------------------------
1842 Returns the result of subtracting the double-precision floating-point values
1843 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1844 for Binary Floating-Point Arithmetic.
1845 -------------------------------------------------------------------------------
1846 */
1847 float64 float64_sub( float64 a, float64 b )
1848 {
1849     flag aSign, bSign;
1850
1851     aSign = extractFloat64Sign( a );
1852     bSign = extractFloat64Sign( b );
1853     if ( aSign == bSign ) {
1854         return subFloat64Sigs( a, b, aSign );
1855     }
1856     else {
1857         return addFloat64Sigs( a, b, aSign );
1858     }
1859
1860 }
1861
1862 /*
1863 -------------------------------------------------------------------------------
1864 Returns the result of multiplying the double-precision floating-point values
1865 `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1866 for Binary Floating-Point Arithmetic.
1867 -------------------------------------------------------------------------------
1868 */
1869 float64 float64_mul( float64 a, float64 b )
1870 {
1871     flag aSign, bSign, zSign;
1872     int16 aExp, bExp, zExp;
1873     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
1874
1875     aSig1 = extractFloat64Frac1( a );
1876     aSig0 = extractFloat64Frac0( a );
1877     aExp = extractFloat64Exp( a );
1878     aSign = extractFloat64Sign( a );
1879     bSig1 = extractFloat64Frac1( b );
1880     bSig0 = extractFloat64Frac0( b );
1881     bExp = extractFloat64Exp( b );
1882     bSign = extractFloat64Sign( b );
1883     zSign = aSign ^ bSign;
1884     if ( aExp == 0x7FF ) {
1885         if (    ( aSig0 | aSig1 )
1886              || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
1887             return propagateFloat64NaN( a, b );
1888         }
1889         if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
1890         return packFloat64( zSign, 0x7FF, 0, 0 );
1891     }
1892     if ( bExp == 0x7FF ) {
1893         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1894         if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
1895  invalid:
1896             float_raise( float_flag_invalid );
1897             return float64_default_nan;
1898         }
1899         return packFloat64( zSign, 0x7FF, 0, 0 );
1900     }
1901     if ( aExp == 0 ) {
1902         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1903         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
1904     }
1905     if ( bExp == 0 ) {
1906         if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1907         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
1908     }
1909     zExp = aExp + bExp - 0x400;
1910     aSig0 |= 0x00100000;
1911     shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 );
1912     mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
1913     add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
1914     zSig2 |= ( zSig3 != 0 );
1915     if ( 0x00200000 <= zSig0 ) {
1916         shift64ExtraRightJamming(
1917             zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
1918         ++zExp;
1919     }
1920     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1921
1922 }
1923
1924 /*
1925 -------------------------------------------------------------------------------
1926 Returns the result of dividing the double-precision floating-point value `a'
1927 by the corresponding value `b'.  The operation is performed according to the
1928 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1929 -------------------------------------------------------------------------------
1930 */
1931 float64 float64_div( float64 a, float64 b )
1932 {
1933     flag aSign, bSign, zSign;
1934     int16 aExp, bExp, zExp;
1935     bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
1936     bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
1937
1938     aSig1 = extractFloat64Frac1( a );
1939     aSig0 = extractFloat64Frac0( a );
1940     aExp = extractFloat64Exp( a );
1941     aSign = extractFloat64Sign( a );
1942     bSig1 = extractFloat64Frac1( b );
1943     bSig0 = extractFloat64Frac0( b );
1944     bExp = extractFloat64Exp( b );
1945     bSign = extractFloat64Sign( b );
1946     zSign = aSign ^ bSign;
1947     if ( aExp == 0x7FF ) {
1948         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1949         if ( bExp == 0x7FF ) {
1950             if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1951             goto invalid;
1952         }
1953         return packFloat64( zSign, 0x7FF, 0, 0 );
1954     }
1955     if ( bExp == 0x7FF ) {
1956         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1957         return packFloat64( zSign, 0, 0, 0 );
1958     }
1959     if ( bExp == 0 ) {
1960         if ( ( bSig0 | bSig1 ) == 0 ) {
1961             if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
1962  invalid:
1963                 float_raise( float_flag_invalid );
1964                 return float64_default_nan;
1965             }
1966             float_raise( float_flag_divbyzero );
1967             return packFloat64( zSign, 0x7FF, 0, 0 );
1968         }
1969         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
1970     }
1971     if ( aExp == 0 ) {
1972         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1973         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
1974     }
1975     zExp = aExp - bExp + 0x3FD;
1976     shortShift64Left( aSig0 | 0x00100000, aSig1, 11, &aSig0, &aSig1 );
1977     shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
1978     if ( le64( bSig0, bSig1, aSig0, aSig1 ) ) {
1979         shift64Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
1980         ++zExp;
1981     }
1982     zSig0 = estimateDiv64To32( aSig0, aSig1, bSig0 );
1983     mul64By32To96( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
1984     sub96( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
1985     while ( (sbits32) rem0 < 0 ) {
1986         --zSig0;
1987         add96( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
1988     }
1989     zSig1 = estimateDiv64To32( rem1, rem2, bSig0 );
1990     if ( ( zSig1 & 0x3FF ) <= 4 ) {
1991         mul64By32To96( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
1992         sub96( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
1993         while ( (sbits32) rem1 < 0 ) {
1994             --zSig1;
1995             add96( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
1996         }
1997         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
1998     }
1999     shift64ExtraRightJamming( zSig0, zSig1, 0, 11, &zSig0, &zSig1, &zSig2 );
2000     return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
2001
2002 }
2003
2004 #ifndef SOFTFLOAT_FOR_GCC
2005 /*
2006 -------------------------------------------------------------------------------
2007 Returns the remainder of the double-precision floating-point value `a'
2008 with respect to the corresponding value `b'.  The operation is performed
2009 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2010 -------------------------------------------------------------------------------
2011 */
2012 float64 float64_rem( float64 a, float64 b )
2013 {
2014     flag aSign, bSign, zSign;
2015     int16 aExp, bExp, expDiff;
2016     bits32 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
2017     bits32 allZero, alternateASig0, alternateASig1, sigMean1;
2018     sbits32 sigMean0;
2019     float64 z;
2020
2021     aSig1 = extractFloat64Frac1( a );
2022     aSig0 = extractFloat64Frac0( a );
2023     aExp = extractFloat64Exp( a );
2024     aSign = extractFloat64Sign( a );
2025     bSig1 = extractFloat64Frac1( b );
2026     bSig0 = extractFloat64Frac0( b );
2027     bExp = extractFloat64Exp( b );
2028     bSign = extractFloat64Sign( b );
2029     if ( aExp == 0x7FF ) {
2030         if (    ( aSig0 | aSig1 )
2031              || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
2032             return propagateFloat64NaN( a, b );
2033         }
2034         goto invalid;
2035     }
2036     if ( bExp == 0x7FF ) {
2037         if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
2038         return a;
2039     }
2040     if ( bExp == 0 ) {
2041         if ( ( bSig0 | bSig1 ) == 0 ) {
2042  invalid:
2043             float_raise( float_flag_invalid );
2044             return float64_default_nan;
2045         }
2046         normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
2047     }
2048     if ( aExp == 0 ) {
2049         if ( ( aSig0 | aSig1 ) == 0 ) return a;
2050         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2051     }
2052     expDiff = aExp - bExp;
2053     if ( expDiff < -1 ) return a;
2054     shortShift64Left(
2055         aSig0 | 0x00100000, aSig1, 11 - ( expDiff < 0 ), &aSig0, &aSig1 );
2056     shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
2057     q = le64( bSig0, bSig1, aSig0, aSig1 );
2058     if ( q ) sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
2059     expDiff -= 32;
2060     while ( 0 < expDiff ) {
2061         q = estimateDiv64To32( aSig0, aSig1, bSig0 );
2062         q = ( 4 < q ) ? q - 4 : 0;
2063         mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
2064         shortShift96Left( term0, term1, term2, 29, &term1, &term2, &allZero );
2065         shortShift64Left( aSig0, aSig1, 29, &aSig0, &allZero );
2066         sub64( aSig0, 0, term1, term2, &aSig0, &aSig1 );
2067         expDiff -= 29;
2068     }
2069     if ( -32 < expDiff ) {
2070         q = estimateDiv64To32( aSig0, aSig1, bSig0 );
2071         q = ( 4 < q ) ? q - 4 : 0;
2072         q >>= - expDiff;
2073         shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
2074         expDiff += 24;
2075         if ( expDiff < 0 ) {
2076             shift64Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
2077         }
2078         else {
2079             shortShift64Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
2080         }
2081         mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
2082         sub64( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
2083     }
2084     else {
2085         shift64Right( aSig0, aSig1, 8, &aSig0, &aSig1 );
2086         shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
2087     }
2088     do {
2089         alternateASig0 = aSig0;
2090         alternateASig1 = aSig1;
2091         ++q;
2092         sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
2093     } while ( 0 <= (sbits32) aSig0 );
2094     add64(
2095         aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
2096     if (    ( sigMean0 < 0 )
2097          || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
2098         aSig0 = alternateASig0;
2099         aSig1 = alternateASig1;
2100     }
2101     zSign = ( (sbits32) aSig0 < 0 );
2102     if ( zSign ) sub64( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
2103     return
2104         normalizeRoundAndPackFloat64( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
2105
2106 }
2107 #endif
2108
2109 #ifndef SOFTFLOAT_FOR_GCC
2110 /*
2111 -------------------------------------------------------------------------------
2112 Returns the square root of the double-precision floating-point value `a'.
2113 The operation is performed according to the IEC/IEEE Standard for Binary
2114 Floating-Point Arithmetic.
2115 -------------------------------------------------------------------------------
2116 */
2117 float64 float64_sqrt( float64 a )
2118 {
2119     flag aSign;
2120     int16 aExp, zExp;
2121     bits32 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
2122     bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
2123     float64 z;
2124
2125     aSig1 = extractFloat64Frac1( a );
2126     aSig0 = extractFloat64Frac0( a );
2127     aExp = extractFloat64Exp( a );
2128     aSign = extractFloat64Sign( a );
2129     if ( aExp == 0x7FF ) {
2130         if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, a );
2131         if ( ! aSign ) return a;
2132         goto invalid;
2133     }
2134     if ( aSign ) {
2135         if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
2136  invalid:
2137         float_raise( float_flag_invalid );
2138         return float64_default_nan;
2139     }
2140     if ( aExp == 0 ) {
2141         if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( 0, 0, 0, 0 );
2142         normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2143     }
2144     zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
2145     aSig0 |= 0x00100000;
2146     shortShift64Left( aSig0, aSig1, 11, &term0, &term1 );
2147     zSig0 = ( estimateSqrt32( aExp, term0 )>>1 ) + 1;
2148     if ( zSig0 == 0 ) zSig0 = 0x7FFFFFFF;
2149     doubleZSig0 = zSig0 + zSig0;
2150     shortShift64Left( aSig0, aSig1, 9 - ( aExp & 1 ), &aSig0, &aSig1 );
2151     mul32To64( zSig0, zSig0, &term0, &term1 );
2152     sub64( aSig0, aSig1, term0, term1, &rem0, &rem1 );
2153     while ( (sbits32) rem0 < 0 ) {
2154         --zSig0;
2155         doubleZSig0 -= 2;
2156         add64( rem0, rem1, 0, doubleZSig0 | 1, &rem0, &rem1 );
2157     }
2158     zSig1 = estimateDiv64To32( rem1, 0, doubleZSig0 );
2159     if ( ( zSig1 & 0x1FF ) <= 5 ) {
2160         if ( zSig1 == 0 ) zSig1 = 1;
2161         mul32To64( doubleZSig0, zSig1, &term1, &term2 );
2162         sub64( rem1, 0, term1, term2, &rem1, &rem2 );
2163         mul32To64( zSig1, zSig1, &term2, &term3 );
2164         sub96( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
2165         while ( (sbits32) rem1 < 0 ) {
2166             --zSig1;
2167             shortShift64Left( 0, zSig1, 1, &term2, &term3 );
2168             term3 |= 1;
2169             term2 |= doubleZSig0;
2170             add96( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
2171         }
2172         zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
2173     }
2174     shift64ExtraRightJamming( zSig0, zSig1, 0, 10, &zSig0, &zSig1, &zSig2 );
2175     return roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2 );
2176
2177 }
2178 #endif
2179
2180 /*
2181 -------------------------------------------------------------------------------
2182 Returns 1 if the double-precision floating-point value `a' is equal to
2183 the corresponding value `b', and 0 otherwise.  The comparison is performed
2184 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2185 -------------------------------------------------------------------------------
2186 */
2187 flag float64_eq( float64 a, float64 b )
2188 {
2189
2190     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2191               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2192          || (    ( extractFloat64Exp( b ) == 0x7FF )
2193               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2194        ) {
2195         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2196             float_raise( float_flag_invalid );
2197         }
2198         return 0;
2199     }
2200     return ( a == b ) ||
2201         ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
2202
2203 }
2204
2205 /*
2206 -------------------------------------------------------------------------------
2207 Returns 1 if the double-precision floating-point value `a' is less than
2208 or equal to the corresponding value `b', and 0 otherwise.  The comparison
2209 is performed according to the IEC/IEEE Standard for Binary Floating-Point
2210 Arithmetic.
2211 -------------------------------------------------------------------------------
2212 */
2213 flag float64_le( float64 a, float64 b )
2214 {
2215     flag aSign, bSign;
2216
2217     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2218               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2219          || (    ( extractFloat64Exp( b ) == 0x7FF )
2220               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2221        ) {
2222         float_raise( float_flag_invalid );
2223         return 0;
2224     }
2225     aSign = extractFloat64Sign( a );
2226     bSign = extractFloat64Sign( b );
2227     if ( aSign != bSign )
2228         return aSign ||
2229             ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
2230               0 );
2231     return ( a == b ) ||
2232         ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
2233 }
2234
2235 /*
2236 -------------------------------------------------------------------------------
2237 Returns 1 if the double-precision floating-point value `a' is less than
2238 the corresponding value `b', and 0 otherwise.  The comparison is performed
2239 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2240 -------------------------------------------------------------------------------
2241 */
2242 flag float64_lt( float64 a, float64 b )
2243 {
2244     flag aSign, bSign;
2245
2246     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2247               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2248          || (    ( extractFloat64Exp( b ) == 0x7FF )
2249               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2250        ) {
2251         float_raise( float_flag_invalid );
2252         return 0;
2253     }
2254     aSign = extractFloat64Sign( a );
2255     bSign = extractFloat64Sign( b );
2256     if ( aSign != bSign )
2257         return aSign &&
2258             ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
2259               0 );
2260     return ( a != b ) &&
2261         ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
2262
2263 }
2264
2265 #ifndef SOFTFLOAT_FOR_GCC
2266 /*
2267 -------------------------------------------------------------------------------
2268 Returns 1 if the double-precision floating-point value `a' is equal to
2269 the corresponding value `b', and 0 otherwise.  The invalid exception is
2270 raised if either operand is a NaN.  Otherwise, the comparison is performed
2271 according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2272 -------------------------------------------------------------------------------
2273 */
2274 flag float64_eq_signaling( float64 a, float64 b )
2275 {
2276
2277     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2278               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2279          || (    ( extractFloat64Exp( b ) == 0x7FF )
2280               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2281        ) {
2282         float_raise( float_flag_invalid );
2283         return 0;
2284     }
2285     return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
2286
2287 }
2288
2289 /*
2290 -------------------------------------------------------------------------------
2291 Returns 1 if the double-precision floating-point value `a' is less than or
2292 equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
2293 cause an exception.  Otherwise, the comparison is performed according to the
2294 IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2295 -------------------------------------------------------------------------------
2296 */
2297 flag float64_le_quiet( float64 a, float64 b )
2298 {
2299     flag aSign, bSign;
2300
2301     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2302               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2303          || (    ( extractFloat64Exp( b ) == 0x7FF )
2304               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2305        ) {
2306         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2307             float_raise( float_flag_invalid );
2308         }
2309         return 0;
2310     }
2311     aSign = extractFloat64Sign( a );
2312     bSign = extractFloat64Sign( b );
2313     if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
2314     return ( a == b ) || ( aSign ^ ( a < b ) );
2315
2316 }
2317
2318 /*
2319 -------------------------------------------------------------------------------
2320 Returns 1 if the double-precision floating-point value `a' is less than
2321 the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2322 exception.  Otherwise, the comparison is performed according to the IEC/IEEE
2323 Standard for Binary Floating-Point Arithmetic.
2324 -------------------------------------------------------------------------------
2325 */
2326 flag float64_lt_quiet( float64 a, float64 b )
2327 {
2328     flag aSign, bSign;
2329
2330     if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2331               && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2332          || (    ( extractFloat64Exp( b ) == 0x7FF )
2333               && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2334        ) {
2335         if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2336             float_raise( float_flag_invalid );
2337         }
2338         return 0;
2339     }
2340     aSign = extractFloat64Sign( a );
2341     bSign = extractFloat64Sign( b );
2342     if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
2343     return ( a != b ) && ( aSign ^ ( a < b ) );
2344
2345 }
2346
2347 #endif