1 /* $NetBSD: softfloat-macros,v 1.1 2002/05/21 23:51:08 bjh21 Exp $ */
5 ===============================================================================
7 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
8 Arithmetic Package, Release 2a.
10 Written by John R. Hauser. This work was made possible in part by the
11 International Computer Science Institute, located at Suite 600, 1947 Center
12 Street, Berkeley, California 94704. Funding was partially provided by the
13 National Science Foundation under grant MIP-9311980. The original version
14 of this code was written as part of a project to build a fixed-point vector
15 processor in collaboration with the University of California at Berkeley,
16 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
17 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
18 arithmetic/SoftFloat.html'.
20 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
21 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
22 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
23 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
24 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
26 Derivative works are acceptable, even for commercial purposes, so long as
27 (1) they include prominent notice that the work is derivative, and (2) they
28 include prominent notice akin to these four paragraphs for those parts of
29 this code that are retained.
31 ===============================================================================
35 -------------------------------------------------------------------------------
36 Shifts `a' right by the number of bits given in `count'. If any nonzero
37 bits are shifted off, they are ``jammed'' into the least significant bit of
38 the result by setting the least significant bit to 1. The value of `count'
39 can be arbitrarily large; in particular, if `count' is greater than 32, the
40 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
41 The result is stored in the location pointed to by `zPtr'.
42 -------------------------------------------------------------------------------
44 INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
51 else if ( count < 32 ) {
52 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
62 -------------------------------------------------------------------------------
63 Shifts `a' right by the number of bits given in `count'. If any nonzero
64 bits are shifted off, they are ``jammed'' into the least significant bit of
65 the result by setting the least significant bit to 1. The value of `count'
66 can be arbitrarily large; in particular, if `count' is greater than 64, the
67 result will be either 0 or 1, depending on whether `a' is zero or nonzero.
68 The result is stored in the location pointed to by `zPtr'.
69 -------------------------------------------------------------------------------
71 INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
78 else if ( count < 64 ) {
79 z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
89 -------------------------------------------------------------------------------
90 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
91 _plus_ the number of bits given in `count'. The shifted result is at most
92 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
93 bits shifted off form a second 64-bit result as follows: The _last_ bit
94 shifted off is the most-significant bit of the extra result, and the other
95 63 bits of the extra result are all zero if and only if _all_but_the_last_
96 bits shifted off were all zero. This extra result is stored in the location
97 pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
98 (This routine makes more sense if `a0' and `a1' are considered to form a
99 fixed-point value with binary point between `a0' and `a1'. This fixed-point
100 value is shifted right by the number of bits given in `count', and the
101 integer part of the result is returned at the location pointed to by
102 `z0Ptr'. The fractional part of the result may be slightly corrupted as
103 described above, and is returned at the location pointed to by `z1Ptr'.)
104 -------------------------------------------------------------------------------
107 shift64ExtraRightJamming(
108 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
111 int8 negCount = ( - count ) & 63;
117 else if ( count < 64 ) {
118 z1 = ( a0<<negCount ) | ( a1 != 0 );
123 z1 = a0 | ( a1 != 0 );
126 z1 = ( ( a0 | a1 ) != 0 );
136 -------------------------------------------------------------------------------
137 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
138 number of bits given in `count'. Any bits shifted off are lost. The value
139 of `count' can be arbitrarily large; in particular, if `count' is greater
140 than 128, the result will be 0. The result is broken into two 64-bit pieces
141 which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
142 -------------------------------------------------------------------------------
146 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
149 int8 negCount = ( - count ) & 63;
155 else if ( count < 64 ) {
156 z1 = ( a0<<negCount ) | ( a1>>count );
160 z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
169 -------------------------------------------------------------------------------
170 Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
171 number of bits given in `count'. If any nonzero bits are shifted off, they
172 are ``jammed'' into the least significant bit of the result by setting the
173 least significant bit to 1. The value of `count' can be arbitrarily large;
174 in particular, if `count' is greater than 128, the result will be either
175 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
176 nonzero. The result is broken into two 64-bit pieces which are stored at
177 the locations pointed to by `z0Ptr' and `z1Ptr'.
178 -------------------------------------------------------------------------------
181 shift128RightJamming(
182 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
185 int8 negCount = ( - count ) & 63;
191 else if ( count < 64 ) {
192 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
197 z1 = a0 | ( a1 != 0 );
199 else if ( count < 128 ) {
200 z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
203 z1 = ( ( a0 | a1 ) != 0 );
213 -------------------------------------------------------------------------------
214 Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
215 by 64 _plus_ the number of bits given in `count'. The shifted result is
216 at most 128 nonzero bits; these are broken into two 64-bit pieces which are
217 stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
218 off form a third 64-bit result as follows: The _last_ bit shifted off is
219 the most-significant bit of the extra result, and the other 63 bits of the
220 extra result are all zero if and only if _all_but_the_last_ bits shifted off
221 were all zero. This extra result is stored in the location pointed to by
222 `z2Ptr'. The value of `count' can be arbitrarily large.
223 (This routine makes more sense if `a0', `a1', and `a2' are considered
224 to form a fixed-point value with binary point between `a1' and `a2'. This
225 fixed-point value is shifted right by the number of bits given in `count',
226 and the integer part of the result is returned at the locations pointed to
227 by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
228 corrupted as described above, and is returned at the location pointed to by
230 -------------------------------------------------------------------------------
233 shift128ExtraRightJamming(
244 int8 negCount = ( - count ) & 63;
254 z1 = ( a0<<negCount ) | ( a1>>count );
266 z1 = a0>>( count & 63 );
269 z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
284 -------------------------------------------------------------------------------
285 Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
286 number of bits given in `count'. Any bits shifted off are lost. The value
287 of `count' must be less than 64. The result is broken into two 64-bit
288 pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
289 -------------------------------------------------------------------------------
293 bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
298 ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
303 -------------------------------------------------------------------------------
304 Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
305 by the number of bits given in `count'. Any bits shifted off are lost.
306 The value of `count' must be less than 64. The result is broken into three
307 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
308 `z1Ptr', and `z2Ptr'.
309 -------------------------------------------------------------------------------
329 negCount = ( ( - count ) & 63 );
340 -------------------------------------------------------------------------------
341 Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
342 value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
343 any carry out is lost. The result is broken into two 64-bit pieces which
344 are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
345 -------------------------------------------------------------------------------
349 bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
355 *z0Ptr = a0 + b0 + ( z1 < a1 );
360 -------------------------------------------------------------------------------
361 Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
362 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
363 modulo 2^192, so any carry out is lost. The result is broken into three
364 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
365 `z1Ptr', and `z2Ptr'.
366 -------------------------------------------------------------------------------
385 carry1 = ( z2 < a2 );
387 carry0 = ( z1 < a1 );
390 z0 += ( z1 < carry1 );
399 -------------------------------------------------------------------------------
400 Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
401 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
402 2^128, so any borrow out (carry out) is lost. The result is broken into two
403 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
405 -------------------------------------------------------------------------------
409 bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
413 *z0Ptr = a0 - b0 - ( a1 < b1 );
418 -------------------------------------------------------------------------------
419 Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
420 from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
421 Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
422 result is broken into three 64-bit pieces which are stored at the locations
423 pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
424 -------------------------------------------------------------------------------
440 int8 borrow0, borrow1;
443 borrow1 = ( a2 < b2 );
445 borrow0 = ( a1 < b1 );
447 z0 -= ( z1 < borrow1 );
457 -------------------------------------------------------------------------------
458 Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
459 into two 64-bit pieces which are stored at the locations pointed to by
461 -------------------------------------------------------------------------------
463 INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
465 bits32 aHigh, aLow, bHigh, bLow;
466 bits64 z0, zMiddleA, zMiddleB, z1;
472 z1 = ( (bits64) aLow ) * bLow;
473 zMiddleA = ( (bits64) aLow ) * bHigh;
474 zMiddleB = ( (bits64) aHigh ) * bLow;
475 z0 = ( (bits64) aHigh ) * bHigh;
476 zMiddleA += zMiddleB;
477 z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
480 z0 += ( z1 < zMiddleA );
487 -------------------------------------------------------------------------------
488 Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
489 `b' to obtain a 192-bit product. The product is broken into three 64-bit
490 pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
492 -------------------------------------------------------------------------------
504 bits64 z0, z1, z2, more1;
506 mul64To128( a1, b, &z1, &z2 );
507 mul64To128( a0, b, &z0, &more1 );
508 add128( z0, more1, 0, z1, &z0, &z1 );
516 -------------------------------------------------------------------------------
517 Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
518 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
519 product. The product is broken into four 64-bit pieces which are stored at
520 the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
521 -------------------------------------------------------------------------------
535 bits64 z0, z1, z2, z3;
538 mul64To128( a1, b1, &z2, &z3 );
539 mul64To128( a1, b0, &z1, &more2 );
540 add128( z1, more2, 0, z2, &z1, &z2 );
541 mul64To128( a0, b0, &z0, &more1 );
542 add128( z0, more1, 0, z1, &z0, &z1 );
543 mul64To128( a0, b1, &more1, &more2 );
544 add128( more1, more2, 0, z2, &more1, &z2 );
545 add128( z0, z1, 0, more1, &z0, &z1 );
554 -------------------------------------------------------------------------------
555 Returns an approximation to the 64-bit integer quotient obtained by dividing
556 `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
557 divisor `b' must be at least 2^63. If q is the exact quotient truncated
558 toward zero, the approximation returned lies between q and q + 2 inclusive.
559 If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
560 unsigned integer is returned.
561 -------------------------------------------------------------------------------
563 static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
566 bits64 rem0, rem1, term0, term1;
569 if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
571 z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
572 mul64To128( b, z, &term0, &term1 );
573 sub128( a0, a1, term0, term1, &rem0, &rem1 );
574 while ( ( (sbits64) rem0 ) < 0 ) {
575 z -= LIT64( 0x100000000 );
577 add128( rem0, rem1, b0, b1, &rem0, &rem1 );
579 rem0 = ( rem0<<32 ) | ( rem1>>32 );
580 z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
585 #if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128)
587 -------------------------------------------------------------------------------
588 Returns an approximation to the square root of the 32-bit significand given
589 by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
590 `aExp' (the least significant bit) is 1, the integer returned approximates
591 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
592 is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
593 case, the approximation returned lies strictly within +/-2 of the exact
595 -------------------------------------------------------------------------------
597 static bits32 estimateSqrt32( int16 aExp, bits32 a )
599 static const bits16 sqrtOddAdjustments[] = {
600 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
601 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
603 static const bits16 sqrtEvenAdjustments[] = {
604 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
605 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
610 idx = ( a>>27 ) & 15;
612 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ idx ];
613 z = ( ( a / z )<<14 ) + ( z<<15 );
617 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ idx ];
619 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
620 if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
622 return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
628 -------------------------------------------------------------------------------
629 Returns the number of leading 0 bits before the most-significant 1 bit of
630 `a'. If `a' is zero, 32 is returned.
631 -------------------------------------------------------------------------------
633 static int8 countLeadingZeros32( bits32 a )
635 static const int8 countLeadingZerosHigh[] = {
636 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
637 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
638 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
639 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
640 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
641 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
642 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
643 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
644 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
645 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
646 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
647 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
648 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
649 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
650 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
651 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
660 if ( a < 0x1000000 ) {
664 shiftCount += countLeadingZerosHigh[ a>>24 ];
670 -------------------------------------------------------------------------------
671 Returns the number of leading 0 bits before the most-significant 1 bit of
672 `a'. If `a' is zero, 64 is returned.
673 -------------------------------------------------------------------------------
675 static int8 countLeadingZeros64( bits64 a )
680 if ( a < ( (bits64) 1 )<<32 ) {
686 shiftCount += countLeadingZeros32( a );
692 -------------------------------------------------------------------------------
693 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
694 is equal to the 128-bit value formed by concatenating `b0' and `b1'.
695 Otherwise, returns 0.
696 -------------------------------------------------------------------------------
698 INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
701 return ( a0 == b0 ) && ( a1 == b1 );
706 -------------------------------------------------------------------------------
707 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
708 than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
709 Otherwise, returns 0.
710 -------------------------------------------------------------------------------
712 INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
715 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
720 -------------------------------------------------------------------------------
721 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
722 than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
724 -------------------------------------------------------------------------------
726 INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
729 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
734 -------------------------------------------------------------------------------
735 Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
736 not equal to the 128-bit value formed by concatenating `b0' and `b1'.
737 Otherwise, returns 0.
738 -------------------------------------------------------------------------------
740 INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
743 return ( a0 != b0 ) || ( a1 != b1 );