3 ===============================================================================
5 This C source file is part of TestFloat, Release 2a, a package of programs
6 for testing the correctness of floating-point arithmetic complying to the
7 IEC/IEEE Standard for Floating-Point.
9 Written by John R. Hauser. More information is available through the Web
10 page `http://HTTP.CS.Berkeley.EDU/~jhauser/arithmetic/TestFloat.html'.
12 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
13 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
14 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
15 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
16 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
18 Derivative works are acceptable, even for commercial purposes, so long as
19 (1) they include prominent notice that the work is derivative, and (2) they
20 include prominent notice akin to these four paragraphs for those parts of
21 this code that are retained.
23 ===============================================================================
26 int8 slow_float_rounding_mode;
27 int8 slow_float_exception_flags;
28 int8 slow_float_detect_tininess;
30 int8 slow_floatx80_rounding_precision;
46 static const floatX floatXNaN = { TRUE, FALSE, FALSE, FALSE, 0, { 0, 0 } };
47 static const floatX floatXPositiveZero =
48 { FALSE, FALSE, TRUE, FALSE, 0, { 0, 0 } };
49 static const floatX floatXNegativeZero =
50 { FALSE, FALSE, TRUE, TRUE, 0, { 0, 0 } };
52 static bits128X shortShift128Left( bits128X a, int8 shiftCount )
56 negShiftCount = ( - shiftCount & 63 );
57 a.a0 = ( a.a0<<shiftCount ) | ( a.a1>>negShiftCount );
63 static bits128X shortShift128RightJamming( bits128X a, int8 shiftCount )
68 negShiftCount = ( - shiftCount & 63 );
69 extra = a.a1<<negShiftCount;
70 a.a1 = ( a.a0<<negShiftCount ) | ( a.a1>>shiftCount ) | ( extra != 0 );
76 static bits128X neg128( bits128X a )
90 static bits128X add128( bits128X a, bits128X b )
94 a.a0 += b.a0 + ( a.a1 < b.a1 );
99 static flag eq128( bits128X a, bits128X b )
102 return ( a.a0 == b.a0 ) && ( a.a1 == b.a1 );
106 static flag le128( bits128X a, bits128X b )
109 return ( a.a0 < b.a0 ) || ( ( a.a0 == b.a0 ) && ( a.a1 <= b.a1 ) );
113 static flag lt128( bits128X a, bits128X b )
116 return ( a.a0 < b.a0 ) || ( ( a.a0 == b.a0 ) && ( a.a1 < b.a1 ) );
120 static floatX roundFloatXTo24( flag isTiny, floatX zx )
124 zx.sig.a0 |= ( zx.sig.a1 != 0 );
126 roundBits = zx.sig.a0 & 0xFFFFFFFF;
127 zx.sig.a0 -= roundBits;
129 slow_float_exception_flags |= float_flag_inexact;
130 if ( isTiny ) slow_float_exception_flags |= float_flag_underflow;
131 switch ( slow_float_rounding_mode ) {
132 case float_round_nearest_even:
133 if ( roundBits < 0x80000000 ) goto noIncrement;
134 if ( ( roundBits == 0x80000000 )
135 && ! ( zx.sig.a0 & LIT64( 0x100000000 ) ) ) {
139 case float_round_to_zero:
141 case float_round_down:
142 if ( ! zx.sign ) goto noIncrement;
145 if ( zx.sign ) goto noIncrement;
148 zx.sig.a0 += LIT64( 0x100000000 );
149 if ( zx.sig.a0 == LIT64( 0x0100000000000000 ) ) {
150 zx.sig.a0 = LIT64( 0x0080000000000000 );
159 static floatX roundFloatXTo53( flag isTiny, floatX zx )
163 zx.sig.a0 |= ( zx.sig.a1 != 0 );
165 roundBits = zx.sig.a0 & 7;
166 zx.sig.a0 -= roundBits;
168 slow_float_exception_flags |= float_flag_inexact;
169 if ( isTiny ) slow_float_exception_flags |= float_flag_underflow;
170 switch ( slow_float_rounding_mode ) {
171 case float_round_nearest_even:
172 if ( roundBits < 4 ) goto noIncrement;
173 if ( ( roundBits == 4 ) && ! ( zx.sig.a0 & 8 ) ) goto noIncrement;
175 case float_round_to_zero:
177 case float_round_down:
178 if ( ! zx.sign ) goto noIncrement;
181 if ( zx.sign ) goto noIncrement;
185 if ( zx.sig.a0 == LIT64( 0x0100000000000000 ) ) {
186 zx.sig.a0 = LIT64( 0x0080000000000000 );
195 static floatX roundFloatXTo64( flag isTiny, floatX zx )
199 roundBits = zx.sig.a1 & LIT64( 0x00FFFFFFFFFFFFFF );
200 zx.sig.a1 -= roundBits;
202 slow_float_exception_flags |= float_flag_inexact;
203 if ( isTiny ) slow_float_exception_flags |= float_flag_underflow;
204 switch ( slow_float_rounding_mode ) {
205 case float_round_nearest_even:
206 if ( roundBits < LIT64( 0x0080000000000000 ) ) goto noIncrement;
207 if ( ( roundBits == LIT64( 0x0080000000000000 ) )
208 && ! ( zx.sig.a1 & LIT64( 0x0100000000000000 ) ) ) {
212 case float_round_to_zero:
214 case float_round_down:
215 if ( ! zx.sign ) goto noIncrement;
218 if ( zx.sign ) goto noIncrement;
221 zx.sig.a1 += LIT64( 0x0100000000000000 );
222 zx.sig.a0 += ( zx.sig.a1 == 0 );
223 if ( zx.sig.a0 == LIT64( 0x0100000000000000 ) ) {
224 zx.sig.a0 = LIT64( 0x0080000000000000 );
233 static floatX roundFloatXTo113( flag isTiny, floatX zx )
237 roundBits = zx.sig.a1 & 0x7F;
238 zx.sig.a1 -= roundBits;
240 slow_float_exception_flags |= float_flag_inexact;
241 if ( isTiny ) slow_float_exception_flags |= float_flag_underflow;
242 switch ( slow_float_rounding_mode ) {
243 case float_round_nearest_even:
244 if ( roundBits < 0x40 ) goto noIncrement;
245 if ( ( roundBits == 0x40 )
246 && ! ( zx.sig.a1 & 0x80 ) ) goto noIncrement;
248 case float_round_to_zero:
250 case float_round_down:
251 if ( ! zx.sign ) goto noIncrement;
254 if ( zx.sign ) goto noIncrement;
258 zx.sig.a0 += ( zx.sig.a1 == 0 );
259 if ( zx.sig.a0 == LIT64( 0x0100000000000000 ) ) {
260 zx.sig.a0 = LIT64( 0x0080000000000000 );
269 static floatX int32ToFloatX( int32 a )
277 ax.sig.a0 = ax.sign ? - (bits64) a : a;
285 while ( ax.sig.a0 < LIT64( 0x0080000000000000 ) ) {
293 static int32 floatXToInt32( floatX ax )
295 int8 savedExceptionFlags;
299 if ( ax.isInf || ax.isNaN ) {
300 slow_float_exception_flags |= float_flag_invalid;
301 return ( ax.isInf & ax.sign ) ? (sbits32) 0x80000000 : 0x7FFFFFFF;
303 if ( ax.isZero ) return 0;
304 savedExceptionFlags = slow_float_exception_flags;
305 shiftCount = 52 - ax.exp;
306 if ( 56 < shiftCount ) {
311 while ( 0 < shiftCount ) {
312 ax.sig = shortShift128RightJamming( ax.sig, 1 );
316 ax = roundFloatXTo53( FALSE, ax );
317 ax.sig = shortShift128RightJamming( ax.sig, 3 );
319 if ( ax.sign ) z = - z;
320 if ( ( shiftCount < 0 )
322 || ( ( z != 0 ) && ( ( ax.sign ^ ( z < 0 ) ) != 0 ) )
324 slow_float_exception_flags = savedExceptionFlags | float_flag_invalid;
325 return ax.sign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
331 static floatX int64ToFloatX( int64 a )
339 ax.sig.a1 = ax.sign ? - a : a;
346 ax.sig = shortShift128Left( ax.sig, 56 );
348 while ( ax.sig.a0 < LIT64( 0x0080000000000000 ) ) {
349 ax.sig = shortShift128Left( ax.sig, 1 );
356 static int64 floatXToInt64( floatX ax )
358 int8 savedExceptionFlags;
362 if ( ax.isInf || ax.isNaN ) {
363 slow_float_exception_flags |= float_flag_invalid;
365 ( ax.isInf & ax.sign ) ? (sbits64) LIT64( 0x8000000000000000 )
366 : LIT64( 0x7FFFFFFFFFFFFFFF );
368 if ( ax.isZero ) return 0;
369 savedExceptionFlags = slow_float_exception_flags;
370 shiftCount = 112 - ax.exp;
371 if ( 116 < shiftCount ) {
376 while ( 0 < shiftCount ) {
377 ax.sig = shortShift128RightJamming( ax.sig, 1 );
381 ax = roundFloatXTo113( FALSE, ax );
382 ax.sig = shortShift128RightJamming( ax.sig, 7 );
384 if ( ax.sign ) z = - z;
385 if ( ( shiftCount < 0 )
387 || ( ( z != 0 ) && ( ( ax.sign ^ ( z < 0 ) ) != 0 ) )
389 slow_float_exception_flags = savedExceptionFlags | float_flag_invalid;
391 ax.sign ? (sbits64) LIT64( 0x8000000000000000 )
392 : LIT64( 0x7FFFFFFFFFFFFFFF );
398 static floatX float32ToFloatX( float32 a )
406 ax.sign = ( ( a & 0x80000000 ) != 0 );
407 expField = ( a>>23 ) & 0xFF;
409 ax.sig.a0 = a & 0x007FFFFF;
411 if ( expField == 0 ) {
412 if ( ax.sig.a0 == 0 ) {
420 } while ( ax.sig.a0 < LIT64( 0x0080000000000000 ) );
424 else if ( expField == 0xFF ) {
425 if ( ax.sig.a0 == 0 ) {
433 ax.sig.a0 |= LIT64( 0x0080000000000000 );
434 ax.exp = expField - 0x7F;
440 static float32 floatXToFloat32( floatX zx )
447 if ( zx.isZero ) return zx.sign ? 0x80000000 : 0;
448 if ( zx.isInf ) return zx.sign ? 0xFF800000 : 0x7F800000;
449 if ( zx.isNaN ) return 0xFFFFFFFF;
450 while ( LIT64( 0x0100000000000000 ) <= zx.sig.a0 ) {
451 zx.sig = shortShift128RightJamming( zx.sig, 1 );
454 while ( zx.sig.a0 < LIT64( 0x0080000000000000 ) ) {
455 zx.sig = shortShift128Left( zx.sig, 1 );
460 ( slow_float_detect_tininess == float_tininess_before_rounding )
461 && ( zx.exp + 0x7F <= 0 );
462 zx = roundFloatXTo24( isTiny, zx );
463 expField = zx.exp + 0x7F;
464 if ( 0xFF <= expField ) {
465 slow_float_exception_flags |=
466 float_flag_overflow | float_flag_inexact;
468 switch ( slow_float_rounding_mode ) {
469 case float_round_nearest_even:
470 case float_round_down:
473 case float_round_to_zero:
480 switch ( slow_float_rounding_mode ) {
481 case float_round_nearest_even:
485 case float_round_to_zero:
486 case float_round_down:
493 if ( expField <= 0 ) {
496 expField = zx.exp + 0x7F;
497 if ( expField < -27 ) {
498 zx.sig.a1 = ( zx.sig.a0 != 0 ) || ( zx.sig.a1 != 0 );
502 while ( expField <= 0 ) {
503 zx.sig = shortShift128RightJamming( zx.sig, 1 );
507 zx = roundFloatXTo24( isTiny, zx );
508 expField = ( LIT64( 0x0080000000000000 ) <= zx.sig.a0 ) ? 1 : 0;
512 if ( zx.sign ) z |= 0x80000000;
513 z |= ( zx.sig.a0>>32 ) & 0x007FFFFF;
518 static floatX float64ToFloatX( float64 a )
526 ax.sign = ( ( a & LIT64( 0x8000000000000000 ) ) != 0 );
527 expField = ( a>>52 ) & 0x7FF;
529 ax.sig.a0 = a & LIT64( 0x000FFFFFFFFFFFFF );
530 if ( expField == 0 ) {
531 if ( ax.sig.a0 == 0 ) {
535 expField = 1 - 0x3FF;
539 } while ( ax.sig.a0 < LIT64( 0x0010000000000000 ) );
543 else if ( expField == 0x7FF ) {
544 if ( ax.sig.a0 == 0 ) {
552 ax.exp = expField - 0x3FF;
553 ax.sig.a0 |= LIT64( 0x0010000000000000 );
560 static float64 floatXToFloat64( floatX zx )
567 if ( zx.isZero ) return zx.sign ? LIT64( 0x8000000000000000 ) : 0;
570 zx.sign ? LIT64( 0xFFF0000000000000 )
571 : LIT64( 0x7FF0000000000000 );
573 if ( zx.isNaN ) return LIT64( 0xFFFFFFFFFFFFFFFF );
574 while ( LIT64( 0x0100000000000000 ) <= zx.sig.a0 ) {
575 zx.sig = shortShift128RightJamming( zx.sig, 1 );
578 while ( zx.sig.a0 < LIT64( 0x0080000000000000 ) ) {
579 zx.sig = shortShift128Left( zx.sig, 1 );
584 ( slow_float_detect_tininess == float_tininess_before_rounding )
585 && ( zx.exp + 0x3FF <= 0 );
586 zx = roundFloatXTo53( isTiny, zx );
587 expField = zx.exp + 0x3FF;
588 if ( 0x7FF <= expField ) {
589 slow_float_exception_flags |=
590 float_flag_overflow | float_flag_inexact;
592 switch ( slow_float_rounding_mode ) {
593 case float_round_nearest_even:
594 case float_round_down:
595 z = LIT64( 0xFFF0000000000000 );
597 case float_round_to_zero:
599 z = LIT64( 0xFFEFFFFFFFFFFFFF );
604 switch ( slow_float_rounding_mode ) {
605 case float_round_nearest_even:
607 z = LIT64( 0x7FF0000000000000 );
609 case float_round_to_zero:
610 case float_round_down:
611 z = LIT64( 0x7FEFFFFFFFFFFFFF );
617 if ( expField <= 0 ) {
620 expField = zx.exp + 0x3FF;
621 if ( expField < -56 ) {
622 zx.sig.a1 = ( zx.sig.a0 != 0 ) || ( zx.sig.a1 != 0 );
626 while ( expField <= 0 ) {
627 zx.sig = shortShift128RightJamming( zx.sig, 1 );
631 zx = roundFloatXTo53( isTiny, zx );
632 expField = ( LIT64( 0x0080000000000000 ) <= zx.sig.a0 ) ? 1 : 0;
637 if ( zx.sign ) z |= LIT64( 0x8000000000000000 );
638 z |= zx.sig.a0 & LIT64( 0x000FFFFFFFFFFFFF );
645 static floatX floatx80ToFloatX( floatx80 a )
653 ax.sign = ( ( a.high & 0x8000 ) != 0 );
654 expField = a.high & 0x7FFF;
657 if ( expField == 0 ) {
658 if ( ax.sig.a1 == 0 ) {
662 expField = 1 - 0x3FFF;
663 while ( ax.sig.a1 < LIT64( 0x8000000000000000 ) ) {
670 else if ( expField == 0x7FFF ) {
671 if ( ( ax.sig.a1 & LIT64( 0x7FFFFFFFFFFFFFFF ) ) == 0 ) {
679 ax.exp = expField - 0x3FFF;
681 ax.sig = shortShift128Left( ax.sig, 56 );
686 static floatx80 floatXToFloatx80( floatX zx )
695 z.high = zx.sign ? 0x8000 : 0;
699 z.low = LIT64( 0x8000000000000000 );
700 z.high = zx.sign ? 0xFFFF : 0x7FFF;
704 z.low = LIT64( 0xFFFFFFFFFFFFFFFF );
708 while ( LIT64( 0x0100000000000000 ) <= zx.sig.a0 ) {
709 zx.sig = shortShift128RightJamming( zx.sig, 1 );
712 while ( zx.sig.a0 < LIT64( 0x0080000000000000 ) ) {
713 zx.sig = shortShift128Left( zx.sig, 1 );
718 ( slow_float_detect_tininess == float_tininess_before_rounding )
719 && ( zx.exp + 0x3FFF <= 0 );
720 switch ( slow_floatx80_rounding_precision ) {
722 zx = roundFloatXTo24( isTiny, zx );
725 zx = roundFloatXTo53( isTiny, zx );
728 zx = roundFloatXTo64( isTiny, zx );
731 expField = zx.exp + 0x3FFF;
732 if ( 0x7FFF <= expField ) {
733 slow_float_exception_flags |=
734 float_flag_overflow | float_flag_inexact;
736 switch ( slow_float_rounding_mode ) {
737 case float_round_nearest_even:
738 case float_round_down:
739 z.low = LIT64( 0x8000000000000000 );
742 case float_round_to_zero:
744 switch ( slow_floatx80_rounding_precision ) {
746 z.low = LIT64( 0xFFFFFF0000000000 );
749 z.low = LIT64( 0xFFFFFFFFFFFFF800 );
752 z.low = LIT64( 0xFFFFFFFFFFFFFFFF );
760 switch ( slow_float_rounding_mode ) {
761 case float_round_nearest_even:
763 z.low = LIT64( 0x8000000000000000 );
766 case float_round_to_zero:
767 case float_round_down:
768 switch ( slow_floatx80_rounding_precision ) {
770 z.low = LIT64( 0xFFFFFF0000000000 );
773 z.low = LIT64( 0xFFFFFFFFFFFFF800 );
776 z.low = LIT64( 0xFFFFFFFFFFFFFFFF );
785 if ( expField <= 0 ) {
788 expField = zx.exp + 0x3FFF;
789 if ( expField < -70 ) {
790 zx.sig.a1 = ( zx.sig.a0 != 0 ) || ( zx.sig.a1 != 0 );
794 while ( expField <= 0 ) {
795 zx.sig = shortShift128RightJamming( zx.sig, 1 );
799 switch ( slow_floatx80_rounding_precision ) {
801 zx = roundFloatXTo24( isTiny, zx );
804 zx = roundFloatXTo53( isTiny, zx );
807 zx = roundFloatXTo64( isTiny, zx );
810 expField = ( LIT64( 0x0080000000000000 ) <= zx.sig.a0 ) ? 1 : 0;
812 zx.sig = shortShift128RightJamming( zx.sig, 56 );
815 if ( zx.sign ) z.high |= 0x8000;
824 static floatX float128ToFloatX( float128 a )
832 ax.sign = ( ( a.high & LIT64( 0x8000000000000000 ) ) != 0 );
833 expField = ( a.high>>48 ) & 0x7FFF;
835 ax.sig.a0 = a.high & LIT64( 0x0000FFFFFFFFFFFF );
836 if ( expField == 0 ) {
837 if ( ( ax.sig.a0 == 0 ) && ( ax.sig.a1 == 0 ) ) {
841 expField = 1 - 0x3FFF;
843 ax.sig = shortShift128Left( ax.sig, 1 );
845 } while ( ax.sig.a0 < LIT64( 0x0001000000000000 ) );
849 else if ( expField == 0x7FFF ) {
850 if ( ( ax.sig.a0 == 0 ) && ( ax.sig.a1 == 0 ) ) {
858 ax.exp = expField - 0x3FFF;
859 ax.sig.a0 |= LIT64( 0x0001000000000000 );
861 ax.sig = shortShift128Left( ax.sig, 7 );
866 static float128 floatXToFloat128( floatX zx )
875 z.high = zx.sign ? LIT64( 0x8000000000000000 ) : 0;
881 zx.sign ? LIT64( 0xFFFF000000000000 )
882 : LIT64( 0x7FFF000000000000 );
886 z.high = z.low = LIT64( 0xFFFFFFFFFFFFFFFF );
889 while ( LIT64( 0x0100000000000000 ) <= zx.sig.a0 ) {
890 zx.sig = shortShift128RightJamming( zx.sig, 1 );
893 while ( zx.sig.a0 < LIT64( 0x0080000000000000 ) ) {
894 zx.sig = shortShift128Left( zx.sig, 1 );
899 ( slow_float_detect_tininess == float_tininess_before_rounding )
900 && ( zx.exp + 0x3FFF <= 0 );
901 zx = roundFloatXTo113( isTiny, zx );
902 expField = zx.exp + 0x3FFF;
903 if ( 0x7FFF <= expField ) {
904 slow_float_exception_flags |=
905 float_flag_overflow | float_flag_inexact;
907 switch ( slow_float_rounding_mode ) {
908 case float_round_nearest_even:
909 case float_round_down:
911 z.high = LIT64( 0xFFFF000000000000 );
913 case float_round_to_zero:
915 z.low = LIT64( 0xFFFFFFFFFFFFFFFF );
916 z.high = LIT64( 0xFFFEFFFFFFFFFFFF );
921 switch ( slow_float_rounding_mode ) {
922 case float_round_nearest_even:
925 z.high = LIT64( 0x7FFF000000000000 );
927 case float_round_to_zero:
928 case float_round_down:
929 z.low = LIT64( 0xFFFFFFFFFFFFFFFF );
930 z.high = LIT64( 0x7FFEFFFFFFFFFFFF );
936 if ( expField <= 0 ) {
939 expField = zx.exp + 0x3FFF;
940 if ( expField < -120 ) {
941 zx.sig.a1 = ( zx.sig.a0 != 0 ) || ( zx.sig.a1 != 0 );
945 while ( expField <= 0 ) {
946 zx.sig = shortShift128RightJamming( zx.sig, 1 );
950 zx = roundFloatXTo113( isTiny, zx );
951 expField = ( LIT64( 0x0080000000000000 ) <= zx.sig.a0 ) ? 1 : 0;
953 zx.sig = shortShift128RightJamming( zx.sig, 7 );
957 if ( zx.sign ) z.high |= LIT64( 0x8000000000000000 );
958 z.high |= zx.sig.a0 & LIT64( 0x0000FFFFFFFFFFFF );
965 static floatX floatXInvalid( void )
968 slow_float_exception_flags |= float_flag_invalid;
973 static floatX floatXRoundToInt( floatX ax )
977 if ( ax.isNaN || ax.isInf ) return ax;
978 shiftCount = 112 - ax.exp;
979 if ( shiftCount <= 0 ) return ax;
980 if ( 119 < shiftCount ) {
982 ax.sig.a1 = ! ax.isZero;
986 while ( 0 < shiftCount ) {
987 ax.sig = shortShift128RightJamming( ax.sig, 1 );
992 ax = roundFloatXTo113( FALSE, ax );
993 if ( ( ax.sig.a0 == 0 ) && ( ax.sig.a1 == 0 ) ) ax.isZero = TRUE;
998 static floatX floatXAdd( floatX ax, floatX bx )
1003 if ( ax.isNaN ) return ax;
1004 if ( bx.isNaN ) return bx;
1005 if ( ax.isInf && bx.isInf ) {
1006 if ( ax.sign == bx.sign ) return ax;
1007 return floatXInvalid();
1009 if ( ax.isInf ) return ax;
1010 if ( bx.isInf ) return bx;
1011 if ( ax.isZero && bx.isZero ) {
1012 if ( ax.sign == bx.sign ) return ax;
1013 goto completeCancellation;
1015 if ( ( ax.sign != bx.sign )
1016 && ( ax.exp == bx.exp )
1017 && eq128( ax.sig, bx.sig )
1019 completeCancellation:
1021 ( slow_float_rounding_mode == float_round_down ) ?
1023 : floatXPositiveZero;
1025 if ( ax.isZero ) return bx;
1026 if ( bx.isZero ) return ax;
1027 expDiff = ax.exp - bx.exp;
1028 if ( expDiff < 0 ) {
1031 if ( expDiff < -120 ) {
1036 while ( expDiff < 0 ) {
1037 zx.sig = shortShift128RightJamming( zx.sig, 1 );
1041 if ( ax.sign != bx.sign ) zx.sig = neg128( zx.sig );
1043 zx.sig = add128( zx.sig, bx.sig );
1048 if ( 120 < expDiff ) {
1053 while ( 0 < expDiff ) {
1054 zx.sig = shortShift128RightJamming( zx.sig, 1 );
1058 if ( ax.sign != bx.sign ) zx.sig = neg128( zx.sig );
1060 zx.sig = add128( zx.sig, ax.sig );
1062 if ( zx.sig.a0 & LIT64( 0x8000000000000000 ) ) {
1063 zx.sig = neg128( zx.sig );
1064 zx.sign = ! zx.sign;
1070 static floatX floatXMul( floatX ax, floatX bx )
1075 if ( ax.isNaN ) return ax;
1076 if ( bx.isNaN ) return bx;
1078 if ( bx.isZero ) return floatXInvalid();
1079 if ( bx.sign ) ax.sign = ! ax.sign;
1083 if ( ax.isZero ) return floatXInvalid();
1084 if ( ax.sign ) bx.sign = ! bx.sign;
1089 if ( ax.isZero || bx.isZero ) {
1090 return zx.sign ? floatXNegativeZero : floatXPositiveZero;
1092 zx.exp += bx.exp + 1;
1095 for ( bitNum = 0; bitNum < 119; ++bitNum ) {
1096 if ( bx.sig.a1 & 2 ) zx.sig = add128( zx.sig, ax.sig );
1097 bx.sig = shortShift128RightJamming( bx.sig, 1 );
1098 zx.sig = shortShift128RightJamming( zx.sig, 1 );
1104 static floatX floatXDiv( floatX ax, floatX bx )
1110 if ( ax.isNaN ) return ax;
1111 if ( bx.isNaN ) return bx;
1113 if ( bx.isInf ) return floatXInvalid();
1114 if ( bx.sign ) ax.sign = ! ax.sign;
1118 if ( ax.isZero ) return floatXInvalid();
1119 slow_float_exception_flags |= float_flag_divbyzero;
1120 if ( ax.sign ) bx.sign = ! bx.sign;
1127 if ( ax.isZero || bx.isInf ) {
1128 return zx.sign ? floatXNegativeZero : floatXPositiveZero;
1130 zx.exp -= bx.exp + 1;
1133 negBSig = neg128( bx.sig );
1134 for ( bitNum = 0; bitNum < 120; ++bitNum ) {
1135 if ( le128( bx.sig, ax.sig ) ) {
1137 ax.sig = add128( ax.sig, negBSig );
1139 ax.sig = shortShift128Left( ax.sig, 1 );
1140 zx.sig = shortShift128Left( zx.sig, 1 );
1142 if ( ax.sig.a0 || ax.sig.a1 ) zx.sig.a1 |= 1;
1147 static floatX floatXRem( floatX ax, floatX bx )
1150 flag lastQuotientBit;
1153 if ( ax.isNaN ) return ax;
1154 if ( bx.isNaN ) return bx;
1155 if ( ax.isInf || bx.isZero ) return floatXInvalid();
1156 if ( ax.isZero || bx.isInf ) return ax;
1158 if ( ax.exp < bx.exp ) return ax;
1159 bx.sig = shortShift128Left( bx.sig, 1 );
1160 negBSig = neg128( bx.sig );
1161 while ( bx.exp < ax.exp ) {
1162 if ( le128( bx.sig, ax.sig ) ) ax.sig = add128( ax.sig, negBSig );
1163 ax.sig = shortShift128Left( ax.sig, 1 );
1166 lastQuotientBit = le128( bx.sig, ax.sig );
1167 if ( lastQuotientBit ) ax.sig = add128( ax.sig, negBSig );
1169 ax.sig = neg128( add128( ax.sig, negBSig ) );
1170 if ( lt128( ax.sig, savedASig ) ) {
1171 ax.sign = ! ax.sign;
1173 else if ( lt128( savedASig, ax.sig ) ) {
1177 if ( lastQuotientBit ) {
1178 ax.sign = ! ax.sign;
1184 if ( ( ax.sig.a0 == 0 ) && ( ax.sig.a1 == 0 ) ) ax.isZero = TRUE;
1189 static floatX floatXSqrt( floatX ax )
1192 bits128X bitSig, savedASig;
1195 if ( ax.isNaN || ax.isZero ) return ax;
1196 if ( ax.sign ) return floatXInvalid();
1197 if ( ax.isInf ) return ax;
1200 if ( ( ax.exp & 1 ) == 0 ) ax.sig = shortShift128RightJamming( ax.sig, 1 );
1204 bitSig.a0 = LIT64( 0x0080000000000000 );
1205 for ( bitNum = 0; bitNum < 120; ++bitNum ) {
1207 ax.sig = add128( ax.sig, neg128( zx.sig ) );
1208 ax.sig = shortShift128Left( ax.sig, 1 );
1209 ax.sig = add128( ax.sig, neg128( bitSig ) );
1210 if ( ax.sig.a0 & LIT64( 0x8000000000000000 ) ) {
1211 ax.sig = shortShift128Left( savedASig, 1 );
1214 zx.sig.a1 |= bitSig.a1;
1215 zx.sig.a0 |= bitSig.a0;
1217 bitSig = shortShift128RightJamming( bitSig, 1 );
1219 if ( ax.sig.a0 || ax.sig.a1 ) zx.sig.a1 |= 1;
1224 static flag floatXEq( floatX ax, floatX bx )
1227 if ( ax.isNaN || bx.isNaN ) return FALSE;
1228 if ( ax.isZero && bx.isZero ) return TRUE;
1229 if ( ax.sign != bx.sign ) return FALSE;
1230 if ( ax.isInf || bx.isInf ) return ax.isInf && bx.isInf;
1231 return ( ax.exp == bx.exp ) && eq128( ax.sig, bx.sig );
1235 static flag floatXLe( floatX ax, floatX bx )
1238 if ( ax.isNaN || bx.isNaN ) return FALSE;
1239 if ( ax.isZero && bx.isZero ) return TRUE;
1240 if ( ax.sign != bx.sign ) return ax.sign;
1242 if ( ax.isInf || bx.isZero ) return TRUE;
1243 if ( bx.isInf || ax.isZero ) return FALSE;
1244 if ( bx.exp < ax.exp ) return TRUE;
1245 if ( ax.exp < bx.exp ) return FALSE;
1246 return le128( bx.sig, ax.sig );
1249 if ( bx.isInf || ax.isZero ) return TRUE;
1250 if ( ax.isInf || bx.isZero ) return FALSE;
1251 if ( ax.exp < bx.exp ) return TRUE;
1252 if ( bx.exp < ax.exp ) return FALSE;
1253 return le128( ax.sig, bx.sig );
1258 static flag floatXLt( floatX ax, floatX bx )
1261 if ( ax.isNaN || bx.isNaN ) return FALSE;
1262 if ( ax.isZero && bx.isZero ) return FALSE;
1263 if ( ax.sign != bx.sign ) return ax.sign;
1264 if ( ax.isInf && bx.isInf ) return FALSE;
1266 if ( ax.isInf || bx.isZero ) return TRUE;
1267 if ( bx.isInf || ax.isZero ) return FALSE;
1268 if ( bx.exp < ax.exp ) return TRUE;
1269 if ( ax.exp < bx.exp ) return FALSE;
1270 return lt128( bx.sig, ax.sig );
1273 if ( bx.isInf || ax.isZero ) return TRUE;
1274 if ( ax.isInf || bx.isZero ) return FALSE;
1275 if ( ax.exp < bx.exp ) return TRUE;
1276 if ( bx.exp < ax.exp ) return FALSE;
1277 return lt128( ax.sig, bx.sig );
1282 float32 slow_int32_to_float32( int32 a )
1285 return floatXToFloat32( int32ToFloatX( a ) );
1289 float64 slow_int32_to_float64( int32 a )
1292 return floatXToFloat64( int32ToFloatX( a ) );
1298 floatx80 slow_int32_to_floatx80( int32 a )
1301 return floatXToFloatx80( int32ToFloatX( a ) );
1309 float128 slow_int32_to_float128( int32 a )
1312 return floatXToFloat128( int32ToFloatX( a ) );
1318 float32 slow_int64_to_float32( int64 a )
1321 return floatXToFloat32( int64ToFloatX( a ) );
1325 float64 slow_int64_to_float64( int64 a )
1328 return floatXToFloat64( int64ToFloatX( a ) );
1334 floatx80 slow_int64_to_floatx80( int64 a )
1337 return floatXToFloatx80( int64ToFloatX( a ) );
1345 float128 slow_int64_to_float128( int64 a )
1348 return floatXToFloat128( int64ToFloatX( a ) );
1354 int32 slow_float32_to_int32( float32 a )
1357 return floatXToInt32( float32ToFloatX( a ) );
1361 int32 slow_float32_to_int32_round_to_zero( float32 a )
1363 int8 savedRoundingMode;
1366 savedRoundingMode = slow_float_rounding_mode;
1367 slow_float_rounding_mode = float_round_to_zero;
1368 z = floatXToInt32( float32ToFloatX( a ) );
1369 slow_float_rounding_mode = savedRoundingMode;
1374 int64 slow_float32_to_int64( float32 a )
1377 return floatXToInt64( float32ToFloatX( a ) );
1381 int64 slow_float32_to_int64_round_to_zero( float32 a )
1383 int8 savedRoundingMode;
1386 savedRoundingMode = slow_float_rounding_mode;
1387 slow_float_rounding_mode = float_round_to_zero;
1388 z = floatXToInt64( float32ToFloatX( a ) );
1389 slow_float_rounding_mode = savedRoundingMode;
1394 float64 slow_float32_to_float64( float32 a )
1397 return floatXToFloat64( float32ToFloatX( a ) );
1403 floatx80 slow_float32_to_floatx80( float32 a )
1406 return floatXToFloatx80( float32ToFloatX( a ) );
1414 float128 slow_float32_to_float128( float32 a )
1417 return floatXToFloat128( float32ToFloatX( a ) );
1423 float32 slow_float32_round_to_int( float32 a )
1426 return floatXToFloat32( floatXRoundToInt( float32ToFloatX( a ) ) );
1430 float32 slow_float32_add( float32 a, float32 b )
1435 floatXAdd( float32ToFloatX( a ), float32ToFloatX( b ) ) );
1439 float32 slow_float32_sub( float32 a, float32 b )
1445 floatXAdd( float32ToFloatX( a ), float32ToFloatX( b ) ) );
1449 float32 slow_float32_mul( float32 a, float32 b )
1454 floatXMul( float32ToFloatX( a ), float32ToFloatX( b ) ) );
1458 float32 slow_float32_div( float32 a, float32 b )
1463 floatXDiv( float32ToFloatX( a ), float32ToFloatX( b ) ) );
1467 float32 slow_float32_rem( float32 a, float32 b )
1472 floatXRem( float32ToFloatX( a ), float32ToFloatX( b ) ) );
1476 float32 slow_float32_sqrt( float32 a )
1479 return floatXToFloat32( floatXSqrt( float32ToFloatX( a ) ) );
1483 flag slow_float32_eq( float32 a, float32 b )
1486 return floatXEq( float32ToFloatX( a ), float32ToFloatX( b ) );
1490 flag slow_float32_le( float32 a, float32 b )
1494 ax = float32ToFloatX( a );
1495 bx = float32ToFloatX( b );
1496 if ( ax.isNaN || bx.isNaN ) {
1497 slow_float_exception_flags |= float_flag_invalid;
1499 return floatXLe( ax, bx );
1503 flag slow_float32_lt( float32 a, float32 b )
1507 ax = float32ToFloatX( a );
1508 bx = float32ToFloatX( b );
1509 if ( ax.isNaN || bx.isNaN ) {
1510 slow_float_exception_flags |= float_flag_invalid;
1512 return floatXLt( ax, bx );
1516 flag slow_float32_eq_signaling( float32 a, float32 b )
1520 ax = float32ToFloatX( a );
1521 bx = float32ToFloatX( b );
1522 if ( ax.isNaN || bx.isNaN ) {
1523 slow_float_exception_flags |= float_flag_invalid;
1525 return floatXEq( ax, bx );
1529 flag slow_float32_le_quiet( float32 a, float32 b )
1532 return floatXLe( float32ToFloatX( a ), float32ToFloatX( b ) );
1536 flag slow_float32_lt_quiet( float32 a, float32 b )
1539 return floatXLt( float32ToFloatX( a ), float32ToFloatX( b ) );
1543 int32 slow_float64_to_int32( float64 a )
1546 return floatXToInt32( float64ToFloatX( a ) );
1550 int32 slow_float64_to_int32_round_to_zero( float64 a )
1552 int8 savedRoundingMode;
1555 savedRoundingMode = slow_float_rounding_mode;
1556 slow_float_rounding_mode = float_round_to_zero;
1557 z = floatXToInt32( float64ToFloatX( a ) );
1558 slow_float_rounding_mode = savedRoundingMode;
1563 int64 slow_float64_to_int64( float64 a )
1566 return floatXToInt64( float64ToFloatX( a ) );
1570 int64 slow_float64_to_int64_round_to_zero( float64 a )
1572 int8 savedRoundingMode;
1575 savedRoundingMode = slow_float_rounding_mode;
1576 slow_float_rounding_mode = float_round_to_zero;
1577 z = floatXToInt64( float64ToFloatX( a ) );
1578 slow_float_rounding_mode = savedRoundingMode;
1583 float32 slow_float64_to_float32( float64 a )
1586 return floatXToFloat32( float64ToFloatX( a ) );
1592 floatx80 slow_float64_to_floatx80( float64 a )
1595 return floatXToFloatx80( float64ToFloatX( a ) );
1603 float128 slow_float64_to_float128( float64 a )
1606 return floatXToFloat128( float64ToFloatX( a ) );
1612 float64 slow_float64_round_to_int( float64 a )
1615 return floatXToFloat64( floatXRoundToInt( float64ToFloatX( a ) ) );
1619 float64 slow_float64_add( float64 a, float64 b )
1624 floatXAdd( float64ToFloatX( a ), float64ToFloatX( b ) ) );
1628 float64 slow_float64_sub( float64 a, float64 b )
1631 b ^= LIT64( 0x8000000000000000 );
1634 floatXAdd( float64ToFloatX( a ), float64ToFloatX( b ) ) );
1638 float64 slow_float64_mul( float64 a, float64 b )
1643 floatXMul( float64ToFloatX( a ), float64ToFloatX( b ) ) );
1647 float64 slow_float64_div( float64 a, float64 b )
1652 floatXDiv( float64ToFloatX( a ), float64ToFloatX( b ) ) );
1656 float64 slow_float64_rem( float64 a, float64 b )
1661 floatXRem( float64ToFloatX( a ), float64ToFloatX( b ) ) );
1665 float64 slow_float64_sqrt( float64 a )
1668 return floatXToFloat64( floatXSqrt( float64ToFloatX( a ) ) );
1672 flag slow_float64_eq( float64 a, float64 b )
1675 return floatXEq( float64ToFloatX( a ), float64ToFloatX( b ) );
1679 flag slow_float64_le( float64 a, float64 b )
1683 ax = float64ToFloatX( a );
1684 bx = float64ToFloatX( b );
1685 if ( ax.isNaN || bx.isNaN ) {
1686 slow_float_exception_flags |= float_flag_invalid;
1688 return floatXLe( ax, bx );
1692 flag slow_float64_lt( float64 a, float64 b )
1696 ax = float64ToFloatX( a );
1697 bx = float64ToFloatX( b );
1698 if ( ax.isNaN || bx.isNaN ) {
1699 slow_float_exception_flags |= float_flag_invalid;
1701 return floatXLt( ax, bx );
1705 flag slow_float64_eq_signaling( float64 a, float64 b )
1709 ax = float64ToFloatX( a );
1710 bx = float64ToFloatX( b );
1711 if ( ax.isNaN || bx.isNaN ) {
1712 slow_float_exception_flags |= float_flag_invalid;
1714 return floatXEq( ax, bx );
1718 flag slow_float64_le_quiet( float64 a, float64 b )
1721 return floatXLe( float64ToFloatX( a ), float64ToFloatX( b ) );
1725 flag slow_float64_lt_quiet( float64 a, float64 b )
1728 return floatXLt( float64ToFloatX( a ), float64ToFloatX( b ) );
1734 int32 slow_floatx80_to_int32( floatx80 a )
1737 return floatXToInt32( floatx80ToFloatX( a ) );
1741 int32 slow_floatx80_to_int32_round_to_zero( floatx80 a )
1743 int8 savedRoundingMode;
1746 savedRoundingMode = slow_float_rounding_mode;
1747 slow_float_rounding_mode = float_round_to_zero;
1748 z = floatXToInt32( floatx80ToFloatX( a ) );
1749 slow_float_rounding_mode = savedRoundingMode;
1754 int64 slow_floatx80_to_int64( floatx80 a )
1757 return floatXToInt64( floatx80ToFloatX( a ) );
1761 int64 slow_floatx80_to_int64_round_to_zero( floatx80 a )
1763 int8 savedRoundingMode;
1766 savedRoundingMode = slow_float_rounding_mode;
1767 slow_float_rounding_mode = float_round_to_zero;
1768 z = floatXToInt64( floatx80ToFloatX( a ) );
1769 slow_float_rounding_mode = savedRoundingMode;
1774 float32 slow_floatx80_to_float32( floatx80 a )
1777 return floatXToFloat32( floatx80ToFloatX( a ) );
1781 float64 slow_floatx80_to_float64( floatx80 a )
1784 return floatXToFloat64( floatx80ToFloatX( a ) );
1790 float128 slow_floatx80_to_float128( floatx80 a )
1793 return floatXToFloat128( floatx80ToFloatX( a ) );
1799 floatx80 slow_floatx80_round_to_int( floatx80 a )
1802 return floatXToFloatx80( floatXRoundToInt( floatx80ToFloatX( a ) ) );
1806 floatx80 slow_floatx80_add( floatx80 a, floatx80 b )
1811 floatXAdd( floatx80ToFloatX( a ), floatx80ToFloatX( b ) ) );
1815 floatx80 slow_floatx80_sub( floatx80 a, floatx80 b )
1821 floatXAdd( floatx80ToFloatX( a ), floatx80ToFloatX( b ) ) );
1825 floatx80 slow_floatx80_mul( floatx80 a, floatx80 b )
1830 floatXMul( floatx80ToFloatX( a ), floatx80ToFloatX( b ) ) );
1834 floatx80 slow_floatx80_div( floatx80 a, floatx80 b )
1839 floatXDiv( floatx80ToFloatX( a ), floatx80ToFloatX( b ) ) );
1843 floatx80 slow_floatx80_rem( floatx80 a, floatx80 b )
1848 floatXRem( floatx80ToFloatX( a ), floatx80ToFloatX( b ) ) );
1852 floatx80 slow_floatx80_sqrt( floatx80 a )
1855 return floatXToFloatx80( floatXSqrt( floatx80ToFloatX( a ) ) );
1859 flag slow_floatx80_eq( floatx80 a, floatx80 b )
1862 return floatXEq( floatx80ToFloatX( a ), floatx80ToFloatX( b ) );
1866 flag slow_floatx80_le( floatx80 a, floatx80 b )
1870 ax = floatx80ToFloatX( a );
1871 bx = floatx80ToFloatX( b );
1872 if ( ax.isNaN || bx.isNaN ) {
1873 slow_float_exception_flags |= float_flag_invalid;
1875 return floatXLe( ax, bx );
1879 flag slow_floatx80_lt( floatx80 a, floatx80 b )
1883 ax = floatx80ToFloatX( a );
1884 bx = floatx80ToFloatX( b );
1885 if ( ax.isNaN || bx.isNaN ) {
1886 slow_float_exception_flags |= float_flag_invalid;
1888 return floatXLt( ax, bx );
1892 flag slow_floatx80_eq_signaling( floatx80 a, floatx80 b )
1896 ax = floatx80ToFloatX( a );
1897 bx = floatx80ToFloatX( b );
1898 if ( ax.isNaN || bx.isNaN ) {
1899 slow_float_exception_flags |= float_flag_invalid;
1901 return floatXEq( ax, bx );
1905 flag slow_floatx80_le_quiet( floatx80 a, floatx80 b )
1908 return floatXLe( floatx80ToFloatX( a ), floatx80ToFloatX( b ) );
1912 flag slow_floatx80_lt_quiet( floatx80 a, floatx80 b )
1915 return floatXLt( floatx80ToFloatX( a ), floatx80ToFloatX( b ) );
1923 int32 slow_float128_to_int32( float128 a )
1926 return floatXToInt32( float128ToFloatX( a ) );
1930 int32 slow_float128_to_int32_round_to_zero( float128 a )
1932 int8 savedRoundingMode;
1935 savedRoundingMode = slow_float_rounding_mode;
1936 slow_float_rounding_mode = float_round_to_zero;
1937 z = floatXToInt32( float128ToFloatX( a ) );
1938 slow_float_rounding_mode = savedRoundingMode;
1943 int64 slow_float128_to_int64( float128 a )
1946 return floatXToInt64( float128ToFloatX( a ) );
1950 int64 slow_float128_to_int64_round_to_zero( float128 a )
1952 int8 savedRoundingMode;
1955 savedRoundingMode = slow_float_rounding_mode;
1956 slow_float_rounding_mode = float_round_to_zero;
1957 z = floatXToInt64( float128ToFloatX( a ) );
1958 slow_float_rounding_mode = savedRoundingMode;
1963 float32 slow_float128_to_float32( float128 a )
1966 return floatXToFloat32( float128ToFloatX( a ) );
1970 float64 slow_float128_to_float64( float128 a )
1973 return floatXToFloat64( float128ToFloatX( a ) );
1979 floatx80 slow_float128_to_floatx80( float128 a )
1982 return floatXToFloatx80( float128ToFloatX( a ) );
1988 float128 slow_float128_round_to_int( float128 a )
1991 return floatXToFloat128( floatXRoundToInt( float128ToFloatX( a ) ) );
1995 float128 slow_float128_add( float128 a, float128 b )
2000 floatXAdd( float128ToFloatX( a ), float128ToFloatX( b ) ) );
2004 float128 slow_float128_sub( float128 a, float128 b )
2007 b.high ^= LIT64( 0x8000000000000000 );
2010 floatXAdd( float128ToFloatX( a ), float128ToFloatX( b ) ) );
2014 float128 slow_float128_mul( float128 a, float128 b )
2019 floatXMul( float128ToFloatX( a ), float128ToFloatX( b ) ) );
2023 float128 slow_float128_div( float128 a, float128 b )
2028 floatXDiv( float128ToFloatX( a ), float128ToFloatX( b ) ) );
2032 float128 slow_float128_rem( float128 a, float128 b )
2037 floatXRem( float128ToFloatX( a ), float128ToFloatX( b ) ) );
2041 float128 slow_float128_sqrt( float128 a )
2044 return floatXToFloat128( floatXSqrt( float128ToFloatX( a ) ) );
2048 flag slow_float128_eq( float128 a, float128 b )
2051 return floatXEq( float128ToFloatX( a ), float128ToFloatX( b ) );
2055 flag slow_float128_le( float128 a, float128 b )
2059 ax = float128ToFloatX( a );
2060 bx = float128ToFloatX( b );
2061 if ( ax.isNaN || bx.isNaN ) {
2062 slow_float_exception_flags |= float_flag_invalid;
2064 return floatXLe( ax, bx );
2068 flag slow_float128_lt( float128 a, float128 b )
2072 ax = float128ToFloatX( a );
2073 bx = float128ToFloatX( b );
2074 if ( ax.isNaN || bx.isNaN ) {
2075 slow_float_exception_flags |= float_flag_invalid;
2077 return floatXLt( ax, bx );
2081 flag slow_float128_eq_signaling( float128 a, float128 b )
2085 ax = float128ToFloatX( a );
2086 bx = float128ToFloatX( b );
2087 if ( ax.isNaN || bx.isNaN ) {
2088 slow_float_exception_flags |= float_flag_invalid;
2090 return floatXEq( ax, bx );
2094 flag slow_float128_le_quiet( float128 a, float128 b )
2097 return floatXLe( float128ToFloatX( a ), float128ToFloatX( b ) );
2101 flag slow_float128_lt_quiet( float128 a, float128 b )
2104 return floatXLt( float128ToFloatX( a ), float128ToFloatX( b ) );