1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/FoldingSet.h"
19 #include "llvm/ADT/Hashing.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/ErrorHandling.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
31 /// A macro used to combine two fcCategory enums into one key which can be used
32 /// in a switch statement to classify how the interaction of two APFloat's
33 /// categories affects an operation.
35 /// TODO: If clang source code is ever allowed to use constexpr in its own
36 /// codebase, change this into a static inline function.
37 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
39 /* Assumed in hexadecimal significand parsing, and conversion to
40 hexadecimal strings. */
41 static_assert(integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
44 /* Represents floating point arithmetic semantics. */
46 /* The largest E such that 2^E is representable; this matches the
47 definition of IEEE 754. */
48 APFloatBase::ExponentType maxExponent;
50 /* The smallest E such that 2^E is a normalized number; this
51 matches the definition of IEEE 754. */
52 APFloatBase::ExponentType minExponent;
54 /* Number of bits in the significand. This includes the integer
56 unsigned int precision;
58 /* Number of bits actually used in the semantics. */
59 unsigned int sizeInBits;
62 static const fltSemantics semIEEEhalf = {15, -14, 11, 16};
63 static const fltSemantics semIEEEsingle = {127, -126, 24, 32};
64 static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
65 static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};
66 static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
67 static const fltSemantics semBogus = {0, 0, 0, 0};
69 /* The PowerPC format consists of two doubles. It does not map cleanly
70 onto the usual format above. It is approximated using twice the
71 mantissa bits. Note that for exponents near the double minimum,
72 we no longer can represent the full 106 mantissa bits, so those
73 will be treated as denormal numbers.
75 FIXME: While this approximation is equivalent to what GCC uses for
76 compile-time arithmetic on PPC double-double numbers, it is not able
77 to represent all possible values held by a PPC double-double number,
78 for example: (long double) 1.0 + (long double) 0x1p-106
79 Should this be replaced by a full emulation of PPC double-double?
81 Note: we need to make the value different from semBogus as otherwise
82 an unsafe optimization may collapse both values to a single address,
83 and we heavily rely on them having distinct addresses. */
84 static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0};
86 /* There are temporary semantics for the real PPCDoubleDouble implementation.
87 Currently, APFloat of PPCDoubleDouble holds one PPCDoubleDoubleImpl as the
88 high part of double double, and one IEEEdouble as the low part, so that
89 the old operations operate on PPCDoubleDoubleImpl, while the newly added
90 operations also populate the IEEEdouble.
92 TODO: Once all functions support DoubleAPFloat mode, we'll change all
93 PPCDoubleDoubleImpl to IEEEdouble and remove PPCDoubleDoubleImpl. */
94 static const fltSemantics semPPCDoubleDoubleImpl = {1023, -1022 + 53, 53 + 53,
97 const fltSemantics &APFloatBase::IEEEhalf() {
100 const fltSemantics &APFloatBase::IEEEsingle() {
101 return semIEEEsingle;
103 const fltSemantics &APFloatBase::IEEEdouble() {
104 return semIEEEdouble;
106 const fltSemantics &APFloatBase::IEEEquad() {
109 const fltSemantics &APFloatBase::x87DoubleExtended() {
110 return semX87DoubleExtended;
112 const fltSemantics &APFloatBase::Bogus() {
115 const fltSemantics &APFloatBase::PPCDoubleDouble() {
116 return semPPCDoubleDouble;
119 /* A tight upper bound on number of parts required to hold the value
122 power * 815 / (351 * integerPartWidth) + 1
124 However, whilst the result may require only this many parts,
125 because we are multiplying two values to get it, the
126 multiplication may require an extra part with the excess part
127 being zero (consider the trivial case of 1 * 1, tcFullMultiply
128 requires two parts to hold the single-part result). So we add an
129 extra one to guarantee enough space whilst multiplying. */
130 const unsigned int maxExponent = 16383;
131 const unsigned int maxPrecision = 113;
132 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
133 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
134 / (351 * integerPartWidth));
136 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
137 return semantics.precision;
139 APFloatBase::ExponentType
140 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
141 return semantics.maxExponent;
143 APFloatBase::ExponentType
144 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
145 return semantics.minExponent;
147 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
148 return semantics.sizeInBits;
151 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
152 return Sem.sizeInBits;
155 /* A bunch of private, handy routines. */
157 static inline unsigned int
158 partCountForBits(unsigned int bits)
160 return ((bits) + integerPartWidth - 1) / integerPartWidth;
163 /* Returns 0U-9U. Return values >= 10U are not digits. */
164 static inline unsigned int
165 decDigitValue(unsigned int c)
170 /* Return the value of a decimal exponent of the form
173 If the exponent overflows, returns a large exponent with the
176 readExponent(StringRef::iterator begin, StringRef::iterator end)
179 unsigned int absExponent;
180 const unsigned int overlargeExponent = 24000; /* FIXME. */
181 StringRef::iterator p = begin;
183 assert(p != end && "Exponent has no digits");
185 isNegative = (*p == '-');
186 if (*p == '-' || *p == '+') {
188 assert(p != end && "Exponent has no digits");
191 absExponent = decDigitValue(*p++);
192 assert(absExponent < 10U && "Invalid character in exponent");
194 for (; p != end; ++p) {
197 value = decDigitValue(*p);
198 assert(value < 10U && "Invalid character in exponent");
200 value += absExponent * 10;
201 if (absExponent >= overlargeExponent) {
202 absExponent = overlargeExponent;
203 p = end; /* outwit assert below */
209 assert(p == end && "Invalid exponent in exponent");
212 return -(int) absExponent;
214 return (int) absExponent;
217 /* This is ugly and needs cleaning up, but I don't immediately see
218 how whilst remaining safe. */
220 totalExponent(StringRef::iterator p, StringRef::iterator end,
221 int exponentAdjustment)
223 int unsignedExponent;
224 bool negative, overflow;
227 assert(p != end && "Exponent has no digits");
229 negative = *p == '-';
230 if (*p == '-' || *p == '+') {
232 assert(p != end && "Exponent has no digits");
235 unsignedExponent = 0;
237 for (; p != end; ++p) {
240 value = decDigitValue(*p);
241 assert(value < 10U && "Invalid character in exponent");
243 unsignedExponent = unsignedExponent * 10 + value;
244 if (unsignedExponent > 32767) {
250 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
254 exponent = unsignedExponent;
256 exponent = -exponent;
257 exponent += exponentAdjustment;
258 if (exponent > 32767 || exponent < -32768)
263 exponent = negative ? -32768: 32767;
268 static StringRef::iterator
269 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
270 StringRef::iterator *dot)
272 StringRef::iterator p = begin;
274 while (p != end && *p == '0')
277 if (p != end && *p == '.') {
280 assert(end - begin != 1 && "Significand has no digits");
282 while (p != end && *p == '0')
289 /* Given a normal decimal floating point number of the form
293 where the decimal point and exponent are optional, fill out the
294 structure D. Exponent is appropriate if the significand is
295 treated as an integer, and normalizedExponent if the significand
296 is taken to have the decimal point after a single leading
299 If the value is zero, V->firstSigDigit points to a non-digit, and
300 the return exponent is zero.
303 const char *firstSigDigit;
304 const char *lastSigDigit;
306 int normalizedExponent;
310 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
313 StringRef::iterator dot = end;
314 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
316 D->firstSigDigit = p;
318 D->normalizedExponent = 0;
320 for (; p != end; ++p) {
322 assert(dot == end && "String contains multiple dots");
327 if (decDigitValue(*p) >= 10U)
332 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
333 assert(p != begin && "Significand has no digits");
334 assert((dot == end || p - begin != 1) && "Significand has no digits");
336 /* p points to the first non-digit in the string */
337 D->exponent = readExponent(p + 1, end);
339 /* Implied decimal point? */
344 /* If number is all zeroes accept any exponent. */
345 if (p != D->firstSigDigit) {
346 /* Drop insignificant trailing zeroes. */
351 while (p != begin && *p == '0');
352 while (p != begin && *p == '.');
355 /* Adjust the exponents for any decimal point. */
356 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
357 D->normalizedExponent = (D->exponent +
358 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
359 - (dot > D->firstSigDigit && dot < p)));
365 /* Return the trailing fraction of a hexadecimal number.
366 DIGITVALUE is the first hex digit of the fraction, P points to
369 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
370 unsigned int digitValue)
372 unsigned int hexDigit;
374 /* If the first trailing digit isn't 0 or 8 we can work out the
375 fraction immediately. */
377 return lfMoreThanHalf;
378 else if (digitValue < 8 && digitValue > 0)
379 return lfLessThanHalf;
381 // Otherwise we need to find the first non-zero digit.
382 while (p != end && (*p == '0' || *p == '.'))
385 assert(p != end && "Invalid trailing hexadecimal fraction!");
387 hexDigit = hexDigitValue(*p);
389 /* If we ran off the end it is exactly zero or one-half, otherwise
392 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
394 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
397 /* Return the fraction lost were a bignum truncated losing the least
398 significant BITS bits. */
400 lostFractionThroughTruncation(const integerPart *parts,
401 unsigned int partCount,
406 lsb = APInt::tcLSB(parts, partCount);
408 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
410 return lfExactlyZero;
412 return lfExactlyHalf;
413 if (bits <= partCount * integerPartWidth &&
414 APInt::tcExtractBit(parts, bits - 1))
415 return lfMoreThanHalf;
417 return lfLessThanHalf;
420 /* Shift DST right BITS bits noting lost fraction. */
422 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
424 lostFraction lost_fraction;
426 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
428 APInt::tcShiftRight(dst, parts, bits);
430 return lost_fraction;
433 /* Combine the effect of two lost fractions. */
435 combineLostFractions(lostFraction moreSignificant,
436 lostFraction lessSignificant)
438 if (lessSignificant != lfExactlyZero) {
439 if (moreSignificant == lfExactlyZero)
440 moreSignificant = lfLessThanHalf;
441 else if (moreSignificant == lfExactlyHalf)
442 moreSignificant = lfMoreThanHalf;
445 return moreSignificant;
448 /* The error from the true value, in half-ulps, on multiplying two
449 floating point numbers, which differ from the value they
450 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
451 than the returned value.
453 See "How to Read Floating Point Numbers Accurately" by William D
456 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
458 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
460 if (HUerr1 + HUerr2 == 0)
461 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
463 return inexactMultiply + 2 * (HUerr1 + HUerr2);
466 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
467 when the least significant BITS are truncated. BITS cannot be
470 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
472 unsigned int count, partBits;
473 integerPart part, boundary;
478 count = bits / integerPartWidth;
479 partBits = bits % integerPartWidth + 1;
481 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
484 boundary = (integerPart) 1 << (partBits - 1);
489 if (part - boundary <= boundary - part)
490 return part - boundary;
492 return boundary - part;
495 if (part == boundary) {
498 return ~(integerPart) 0; /* A lot. */
501 } else if (part == boundary - 1) {
504 return ~(integerPart) 0; /* A lot. */
509 return ~(integerPart) 0; /* A lot. */
512 /* Place pow(5, power) in DST, and return the number of parts used.
513 DST must be at least one part larger than size of the answer. */
515 powerOf5(integerPart *dst, unsigned int power)
517 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
519 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
520 pow5s[0] = 78125 * 5;
522 unsigned int partsCount[16] = { 1 };
523 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
525 assert(power <= maxExponent);
530 *p1 = firstEightPowers[power & 7];
536 for (unsigned int n = 0; power; power >>= 1, n++) {
541 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
543 pc = partsCount[n - 1];
544 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
546 if (pow5[pc - 1] == 0)
554 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
556 if (p2[result - 1] == 0)
559 /* Now result is in p1 with partsCount parts and p2 is scratch
570 APInt::tcAssign(dst, p1, result);
575 /* Zero at the end to avoid modular arithmetic when adding one; used
576 when rounding up during hexadecimal output. */
577 static const char hexDigitsLower[] = "0123456789abcdef0";
578 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
579 static const char infinityL[] = "infinity";
580 static const char infinityU[] = "INFINITY";
581 static const char NaNL[] = "nan";
582 static const char NaNU[] = "NAN";
584 /* Write out an integerPart in hexadecimal, starting with the most
585 significant nibble. Write out exactly COUNT hexdigits, return
588 partAsHex (char *dst, integerPart part, unsigned int count,
589 const char *hexDigitChars)
591 unsigned int result = count;
593 assert(count != 0 && count <= integerPartWidth / 4);
595 part >>= (integerPartWidth - 4 * count);
597 dst[count] = hexDigitChars[part & 0xf];
604 /* Write out an unsigned decimal integer. */
606 writeUnsignedDecimal (char *dst, unsigned int n)
622 /* Write out a signed decimal integer. */
624 writeSignedDecimal (char *dst, int value)
628 dst = writeUnsignedDecimal(dst, -(unsigned) value);
630 dst = writeUnsignedDecimal(dst, value);
637 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
640 semantics = ourSemantics;
643 significand.parts = new integerPart[count];
646 void IEEEFloat::freeSignificand() {
648 delete [] significand.parts;
651 void IEEEFloat::assign(const IEEEFloat &rhs) {
652 assert(semantics == rhs.semantics);
655 category = rhs.category;
656 exponent = rhs.exponent;
657 if (isFiniteNonZero() || category == fcNaN)
658 copySignificand(rhs);
661 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
662 assert(isFiniteNonZero() || category == fcNaN);
663 assert(rhs.partCount() >= partCount());
665 APInt::tcAssign(significandParts(), rhs.significandParts(),
669 /* Make this number a NaN, with an arbitrary but deterministic value
670 for the significand. If double or longer, this is a signalling NaN,
671 which may not be ideal. If float, this is QNaN(0). */
672 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
676 integerPart *significand = significandParts();
677 unsigned numParts = partCount();
679 // Set the significand bits to the fill.
680 if (!fill || fill->getNumWords() < numParts)
681 APInt::tcSet(significand, 0, numParts);
683 APInt::tcAssign(significand, fill->getRawData(),
684 std::min(fill->getNumWords(), numParts));
686 // Zero out the excess bits of the significand.
687 unsigned bitsToPreserve = semantics->precision - 1;
688 unsigned part = bitsToPreserve / 64;
689 bitsToPreserve %= 64;
690 significand[part] &= ((1ULL << bitsToPreserve) - 1);
691 for (part++; part != numParts; ++part)
692 significand[part] = 0;
695 unsigned QNaNBit = semantics->precision - 2;
698 // We always have to clear the QNaN bit to make it an SNaN.
699 APInt::tcClearBit(significand, QNaNBit);
701 // If there are no bits set in the payload, we have to set
702 // *something* to make it a NaN instead of an infinity;
703 // conventionally, this is the next bit down from the QNaN bit.
704 if (APInt::tcIsZero(significand, numParts))
705 APInt::tcSetBit(significand, QNaNBit - 1);
707 // We always have to set the QNaN bit to make it a QNaN.
708 APInt::tcSetBit(significand, QNaNBit);
711 // For x87 extended precision, we want to make a NaN, not a
712 // pseudo-NaN. Maybe we should expose the ability to make
714 if (semantics == &semX87DoubleExtended)
715 APInt::tcSetBit(significand, QNaNBit + 1);
718 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
720 if (semantics != rhs.semantics) {
722 initialize(rhs.semantics);
730 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
733 semantics = rhs.semantics;
734 significand = rhs.significand;
735 exponent = rhs.exponent;
736 category = rhs.category;
739 rhs.semantics = &semBogus;
743 bool IEEEFloat::isDenormal() const {
744 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
745 (APInt::tcExtractBit(significandParts(),
746 semantics->precision - 1) == 0);
749 bool IEEEFloat::isSmallest() const {
750 // The smallest number by magnitude in our format will be the smallest
751 // denormal, i.e. the floating point number with exponent being minimum
752 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
753 return isFiniteNonZero() && exponent == semantics->minExponent &&
754 significandMSB() == 0;
757 bool IEEEFloat::isSignificandAllOnes() const {
758 // Test if the significand excluding the integral bit is all ones. This allows
759 // us to test for binade boundaries.
760 const integerPart *Parts = significandParts();
761 const unsigned PartCount = partCount();
762 for (unsigned i = 0; i < PartCount - 1; i++)
766 // Set the unused high bits to all ones when we compare.
767 const unsigned NumHighBits =
768 PartCount*integerPartWidth - semantics->precision + 1;
769 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
770 "fill than integerPartWidth");
771 const integerPart HighBitFill =
772 ~integerPart(0) << (integerPartWidth - NumHighBits);
773 if (~(Parts[PartCount - 1] | HighBitFill))
779 bool IEEEFloat::isSignificandAllZeros() const {
780 // Test if the significand excluding the integral bit is all zeros. This
781 // allows us to test for binade boundaries.
782 const integerPart *Parts = significandParts();
783 const unsigned PartCount = partCount();
785 for (unsigned i = 0; i < PartCount - 1; i++)
789 const unsigned NumHighBits =
790 PartCount*integerPartWidth - semantics->precision + 1;
791 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
792 "clear than integerPartWidth");
793 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
795 if (Parts[PartCount - 1] & HighBitMask)
801 bool IEEEFloat::isLargest() const {
802 // The largest number by magnitude in our format will be the floating point
803 // number with maximum exponent and with significand that is all ones.
804 return isFiniteNonZero() && exponent == semantics->maxExponent
805 && isSignificandAllOnes();
808 bool IEEEFloat::isInteger() const {
809 // This could be made more efficient; I'm going for obviously correct.
810 if (!isFinite()) return false;
811 IEEEFloat truncated = *this;
812 truncated.roundToIntegral(rmTowardZero);
813 return compare(truncated) == cmpEqual;
816 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
819 if (semantics != rhs.semantics ||
820 category != rhs.category ||
823 if (category==fcZero || category==fcInfinity)
826 if (isFiniteNonZero() && exponent != rhs.exponent)
829 return std::equal(significandParts(), significandParts() + partCount(),
830 rhs.significandParts());
833 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
834 initialize(&ourSemantics);
838 exponent = ourSemantics.precision - 1;
839 significandParts()[0] = value;
840 normalize(rmNearestTiesToEven, lfExactlyZero);
843 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
844 initialize(&ourSemantics);
849 // Delegate to the previous constructor, because later copy constructor may
850 // actually inspects category, which can't be garbage.
851 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
852 : IEEEFloat(ourSemantics) {}
854 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
855 initialize(rhs.semantics);
859 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
860 *this = std::move(rhs);
863 IEEEFloat::~IEEEFloat() { freeSignificand(); }
865 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
866 void IEEEFloat::Profile(FoldingSetNodeID &ID) const {
867 ID.Add(bitcastToAPInt());
870 unsigned int IEEEFloat::partCount() const {
871 return partCountForBits(semantics->precision + 1);
874 const integerPart *IEEEFloat::significandParts() const {
875 return const_cast<IEEEFloat *>(this)->significandParts();
878 integerPart *IEEEFloat::significandParts() {
880 return significand.parts;
882 return &significand.part;
885 void IEEEFloat::zeroSignificand() {
886 APInt::tcSet(significandParts(), 0, partCount());
889 /* Increment an fcNormal floating point number's significand. */
890 void IEEEFloat::incrementSignificand() {
893 carry = APInt::tcIncrement(significandParts(), partCount());
895 /* Our callers should never cause us to overflow. */
900 /* Add the significand of the RHS. Returns the carry flag. */
901 integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
904 parts = significandParts();
906 assert(semantics == rhs.semantics);
907 assert(exponent == rhs.exponent);
909 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
912 /* Subtract the significand of the RHS with a borrow flag. Returns
914 integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
915 integerPart borrow) {
918 parts = significandParts();
920 assert(semantics == rhs.semantics);
921 assert(exponent == rhs.exponent);
923 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
927 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
928 on to the full-precision result of the multiplication. Returns the
930 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
931 const IEEEFloat *addend) {
932 unsigned int omsb; // One, not zero, based MSB.
933 unsigned int partsCount, newPartsCount, precision;
934 integerPart *lhsSignificand;
935 integerPart scratch[4];
936 integerPart *fullSignificand;
937 lostFraction lost_fraction;
940 assert(semantics == rhs.semantics);
942 precision = semantics->precision;
944 // Allocate space for twice as many bits as the original significand, plus one
945 // extra bit for the addition to overflow into.
946 newPartsCount = partCountForBits(precision * 2 + 1);
948 if (newPartsCount > 4)
949 fullSignificand = new integerPart[newPartsCount];
951 fullSignificand = scratch;
953 lhsSignificand = significandParts();
954 partsCount = partCount();
956 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
957 rhs.significandParts(), partsCount, partsCount);
959 lost_fraction = lfExactlyZero;
960 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
961 exponent += rhs.exponent;
963 // Assume the operands involved in the multiplication are single-precision
964 // FP, and the two multiplicants are:
965 // *this = a23 . a22 ... a0 * 2^e1
966 // rhs = b23 . b22 ... b0 * 2^e2
967 // the result of multiplication is:
968 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
969 // Note that there are three significant bits at the left-hand side of the
970 // radix point: two for the multiplication, and an overflow bit for the
971 // addition (that will always be zero at this point). Move the radix point
972 // toward left by two bits, and adjust exponent accordingly.
975 if (addend && addend->isNonZero()) {
976 // The intermediate result of the multiplication has "2 * precision"
977 // signicant bit; adjust the addend to be consistent with mul result.
979 Significand savedSignificand = significand;
980 const fltSemantics *savedSemantics = semantics;
981 fltSemantics extendedSemantics;
983 unsigned int extendedPrecision;
985 // Normalize our MSB to one below the top bit to allow for overflow.
986 extendedPrecision = 2 * precision + 1;
987 if (omsb != extendedPrecision - 1) {
988 assert(extendedPrecision > omsb);
989 APInt::tcShiftLeft(fullSignificand, newPartsCount,
990 (extendedPrecision - 1) - omsb);
991 exponent -= (extendedPrecision - 1) - omsb;
994 /* Create new semantics. */
995 extendedSemantics = *semantics;
996 extendedSemantics.precision = extendedPrecision;
998 if (newPartsCount == 1)
999 significand.part = fullSignificand[0];
1001 significand.parts = fullSignificand;
1002 semantics = &extendedSemantics;
1004 IEEEFloat extendedAddend(*addend);
1005 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1006 assert(status == opOK);
1009 // Shift the significand of the addend right by one bit. This guarantees
1010 // that the high bit of the significand is zero (same as fullSignificand),
1011 // so the addition will overflow (if it does overflow at all) into the top bit.
1012 lost_fraction = extendedAddend.shiftSignificandRight(1);
1013 assert(lost_fraction == lfExactlyZero &&
1014 "Lost precision while shifting addend for fused-multiply-add.");
1016 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1018 /* Restore our state. */
1019 if (newPartsCount == 1)
1020 fullSignificand[0] = significand.part;
1021 significand = savedSignificand;
1022 semantics = savedSemantics;
1024 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1027 // Convert the result having "2 * precision" significant-bits back to the one
1028 // having "precision" significant-bits. First, move the radix point from
1029 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1030 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1031 exponent -= precision + 1;
1033 // In case MSB resides at the left-hand side of radix point, shift the
1034 // mantissa right by some amount to make sure the MSB reside right before
1035 // the radix point (i.e. "MSB . rest-significant-bits").
1037 // Note that the result is not normalized when "omsb < precision". So, the
1038 // caller needs to call IEEEFloat::normalize() if normalized value is
1040 if (omsb > precision) {
1041 unsigned int bits, significantParts;
1044 bits = omsb - precision;
1045 significantParts = partCountForBits(omsb);
1046 lf = shiftRight(fullSignificand, significantParts, bits);
1047 lost_fraction = combineLostFractions(lf, lost_fraction);
1051 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1053 if (newPartsCount > 4)
1054 delete [] fullSignificand;
1056 return lost_fraction;
1059 /* Multiply the significands of LHS and RHS to DST. */
1060 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1061 unsigned int bit, i, partsCount;
1062 const integerPart *rhsSignificand;
1063 integerPart *lhsSignificand, *dividend, *divisor;
1064 integerPart scratch[4];
1065 lostFraction lost_fraction;
1067 assert(semantics == rhs.semantics);
1069 lhsSignificand = significandParts();
1070 rhsSignificand = rhs.significandParts();
1071 partsCount = partCount();
1074 dividend = new integerPart[partsCount * 2];
1078 divisor = dividend + partsCount;
1080 /* Copy the dividend and divisor as they will be modified in-place. */
1081 for (i = 0; i < partsCount; i++) {
1082 dividend[i] = lhsSignificand[i];
1083 divisor[i] = rhsSignificand[i];
1084 lhsSignificand[i] = 0;
1087 exponent -= rhs.exponent;
1089 unsigned int precision = semantics->precision;
1091 /* Normalize the divisor. */
1092 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1095 APInt::tcShiftLeft(divisor, partsCount, bit);
1098 /* Normalize the dividend. */
1099 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1102 APInt::tcShiftLeft(dividend, partsCount, bit);
1105 /* Ensure the dividend >= divisor initially for the loop below.
1106 Incidentally, this means that the division loop below is
1107 guaranteed to set the integer bit to one. */
1108 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1110 APInt::tcShiftLeft(dividend, partsCount, 1);
1111 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1114 /* Long division. */
1115 for (bit = precision; bit; bit -= 1) {
1116 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1117 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1118 APInt::tcSetBit(lhsSignificand, bit - 1);
1121 APInt::tcShiftLeft(dividend, partsCount, 1);
1124 /* Figure out the lost fraction. */
1125 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1128 lost_fraction = lfMoreThanHalf;
1130 lost_fraction = lfExactlyHalf;
1131 else if (APInt::tcIsZero(dividend, partsCount))
1132 lost_fraction = lfExactlyZero;
1134 lost_fraction = lfLessThanHalf;
1139 return lost_fraction;
1142 unsigned int IEEEFloat::significandMSB() const {
1143 return APInt::tcMSB(significandParts(), partCount());
1146 unsigned int IEEEFloat::significandLSB() const {
1147 return APInt::tcLSB(significandParts(), partCount());
1150 /* Note that a zero result is NOT normalized to fcZero. */
1151 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1152 /* Our exponent should not overflow. */
1153 assert((ExponentType) (exponent + bits) >= exponent);
1157 return shiftRight(significandParts(), partCount(), bits);
1160 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1161 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1162 assert(bits < semantics->precision);
1165 unsigned int partsCount = partCount();
1167 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1170 assert(!APInt::tcIsZero(significandParts(), partsCount));
1174 IEEEFloat::cmpResult
1175 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1178 assert(semantics == rhs.semantics);
1179 assert(isFiniteNonZero());
1180 assert(rhs.isFiniteNonZero());
1182 compare = exponent - rhs.exponent;
1184 /* If exponents are equal, do an unsigned bignum comparison of the
1187 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1191 return cmpGreaterThan;
1192 else if (compare < 0)
1198 /* Handle overflow. Sign is preserved. We either become infinity or
1199 the largest finite number. */
1200 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1202 if (rounding_mode == rmNearestTiesToEven ||
1203 rounding_mode == rmNearestTiesToAway ||
1204 (rounding_mode == rmTowardPositive && !sign) ||
1205 (rounding_mode == rmTowardNegative && sign)) {
1206 category = fcInfinity;
1207 return (opStatus) (opOverflow | opInexact);
1210 /* Otherwise we become the largest finite number. */
1211 category = fcNormal;
1212 exponent = semantics->maxExponent;
1213 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1214 semantics->precision);
1219 /* Returns TRUE if, when truncating the current number, with BIT the
1220 new LSB, with the given lost fraction and rounding mode, the result
1221 would need to be rounded away from zero (i.e., by increasing the
1222 signficand). This routine must work for fcZero of both signs, and
1223 fcNormal numbers. */
1224 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1225 lostFraction lost_fraction,
1226 unsigned int bit) const {
1227 /* NaNs and infinities should not have lost fractions. */
1228 assert(isFiniteNonZero() || category == fcZero);
1230 /* Current callers never pass this so we don't handle it. */
1231 assert(lost_fraction != lfExactlyZero);
1233 switch (rounding_mode) {
1234 case rmNearestTiesToAway:
1235 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1237 case rmNearestTiesToEven:
1238 if (lost_fraction == lfMoreThanHalf)
1241 /* Our zeroes don't have a significand to test. */
1242 if (lost_fraction == lfExactlyHalf && category != fcZero)
1243 return APInt::tcExtractBit(significandParts(), bit);
1250 case rmTowardPositive:
1253 case rmTowardNegative:
1256 llvm_unreachable("Invalid rounding mode found");
1259 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1260 lostFraction lost_fraction) {
1261 unsigned int omsb; /* One, not zero, based MSB. */
1264 if (!isFiniteNonZero())
1267 /* Before rounding normalize the exponent of fcNormal numbers. */
1268 omsb = significandMSB() + 1;
1271 /* OMSB is numbered from 1. We want to place it in the integer
1272 bit numbered PRECISION if possible, with a compensating change in
1274 exponentChange = omsb - semantics->precision;
1276 /* If the resulting exponent is too high, overflow according to
1277 the rounding mode. */
1278 if (exponent + exponentChange > semantics->maxExponent)
1279 return handleOverflow(rounding_mode);
1281 /* Subnormal numbers have exponent minExponent, and their MSB
1282 is forced based on that. */
1283 if (exponent + exponentChange < semantics->minExponent)
1284 exponentChange = semantics->minExponent - exponent;
1286 /* Shifting left is easy as we don't lose precision. */
1287 if (exponentChange < 0) {
1288 assert(lost_fraction == lfExactlyZero);
1290 shiftSignificandLeft(-exponentChange);
1295 if (exponentChange > 0) {
1298 /* Shift right and capture any new lost fraction. */
1299 lf = shiftSignificandRight(exponentChange);
1301 lost_fraction = combineLostFractions(lf, lost_fraction);
1303 /* Keep OMSB up-to-date. */
1304 if (omsb > (unsigned) exponentChange)
1305 omsb -= exponentChange;
1311 /* Now round the number according to rounding_mode given the lost
1314 /* As specified in IEEE 754, since we do not trap we do not report
1315 underflow for exact results. */
1316 if (lost_fraction == lfExactlyZero) {
1317 /* Canonicalize zeroes. */
1324 /* Increment the significand if we're rounding away from zero. */
1325 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1327 exponent = semantics->minExponent;
1329 incrementSignificand();
1330 omsb = significandMSB() + 1;
1332 /* Did the significand increment overflow? */
1333 if (omsb == (unsigned) semantics->precision + 1) {
1334 /* Renormalize by incrementing the exponent and shifting our
1335 significand right one. However if we already have the
1336 maximum exponent we overflow to infinity. */
1337 if (exponent == semantics->maxExponent) {
1338 category = fcInfinity;
1340 return (opStatus) (opOverflow | opInexact);
1343 shiftSignificandRight(1);
1349 /* The normal case - we were and are not denormal, and any
1350 significand increment above didn't overflow. */
1351 if (omsb == semantics->precision)
1354 /* We have a non-zero denormal. */
1355 assert(omsb < semantics->precision);
1357 /* Canonicalize zeroes. */
1361 /* The fcZero case is a denormal that underflowed to zero. */
1362 return (opStatus) (opUnderflow | opInexact);
1365 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1367 switch (PackCategoriesIntoKey(category, rhs.category)) {
1369 llvm_unreachable(nullptr);
1371 case PackCategoriesIntoKey(fcNaN, fcZero):
1372 case PackCategoriesIntoKey(fcNaN, fcNormal):
1373 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1374 case PackCategoriesIntoKey(fcNaN, fcNaN):
1375 case PackCategoriesIntoKey(fcNormal, fcZero):
1376 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1377 case PackCategoriesIntoKey(fcInfinity, fcZero):
1380 case PackCategoriesIntoKey(fcZero, fcNaN):
1381 case PackCategoriesIntoKey(fcNormal, fcNaN):
1382 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1383 // We need to be sure to flip the sign here for subtraction because we
1384 // don't have a separate negate operation so -NaN becomes 0 - NaN here.
1385 sign = rhs.sign ^ subtract;
1387 copySignificand(rhs);
1390 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1391 case PackCategoriesIntoKey(fcZero, fcInfinity):
1392 category = fcInfinity;
1393 sign = rhs.sign ^ subtract;
1396 case PackCategoriesIntoKey(fcZero, fcNormal):
1398 sign = rhs.sign ^ subtract;
1401 case PackCategoriesIntoKey(fcZero, fcZero):
1402 /* Sign depends on rounding mode; handled by caller. */
1405 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1406 /* Differently signed infinities can only be validly
1408 if (((sign ^ rhs.sign)!=0) != subtract) {
1415 case PackCategoriesIntoKey(fcNormal, fcNormal):
1420 /* Add or subtract two normal numbers. */
1421 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1424 lostFraction lost_fraction;
1427 /* Determine if the operation on the absolute values is effectively
1428 an addition or subtraction. */
1429 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1431 /* Are we bigger exponent-wise than the RHS? */
1432 bits = exponent - rhs.exponent;
1434 /* Subtraction is more subtle than one might naively expect. */
1436 IEEEFloat temp_rhs(rhs);
1440 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1441 lost_fraction = lfExactlyZero;
1442 } else if (bits > 0) {
1443 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1444 shiftSignificandLeft(1);
1447 lost_fraction = shiftSignificandRight(-bits - 1);
1448 temp_rhs.shiftSignificandLeft(1);
1453 carry = temp_rhs.subtractSignificand
1454 (*this, lost_fraction != lfExactlyZero);
1455 copySignificand(temp_rhs);
1458 carry = subtractSignificand
1459 (temp_rhs, lost_fraction != lfExactlyZero);
1462 /* Invert the lost fraction - it was on the RHS and
1464 if (lost_fraction == lfLessThanHalf)
1465 lost_fraction = lfMoreThanHalf;
1466 else if (lost_fraction == lfMoreThanHalf)
1467 lost_fraction = lfLessThanHalf;
1469 /* The code above is intended to ensure that no borrow is
1475 IEEEFloat temp_rhs(rhs);
1477 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1478 carry = addSignificand(temp_rhs);
1480 lost_fraction = shiftSignificandRight(-bits);
1481 carry = addSignificand(rhs);
1484 /* We have a guard bit; generating a carry cannot happen. */
1489 return lost_fraction;
1492 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1493 switch (PackCategoriesIntoKey(category, rhs.category)) {
1495 llvm_unreachable(nullptr);
1497 case PackCategoriesIntoKey(fcNaN, fcZero):
1498 case PackCategoriesIntoKey(fcNaN, fcNormal):
1499 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1500 case PackCategoriesIntoKey(fcNaN, fcNaN):
1504 case PackCategoriesIntoKey(fcZero, fcNaN):
1505 case PackCategoriesIntoKey(fcNormal, fcNaN):
1506 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1509 copySignificand(rhs);
1512 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1513 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1514 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1515 category = fcInfinity;
1518 case PackCategoriesIntoKey(fcZero, fcNormal):
1519 case PackCategoriesIntoKey(fcNormal, fcZero):
1520 case PackCategoriesIntoKey(fcZero, fcZero):
1524 case PackCategoriesIntoKey(fcZero, fcInfinity):
1525 case PackCategoriesIntoKey(fcInfinity, fcZero):
1529 case PackCategoriesIntoKey(fcNormal, fcNormal):
1534 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1535 switch (PackCategoriesIntoKey(category, rhs.category)) {
1537 llvm_unreachable(nullptr);
1539 case PackCategoriesIntoKey(fcZero, fcNaN):
1540 case PackCategoriesIntoKey(fcNormal, fcNaN):
1541 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1543 copySignificand(rhs);
1544 case PackCategoriesIntoKey(fcNaN, fcZero):
1545 case PackCategoriesIntoKey(fcNaN, fcNormal):
1546 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1547 case PackCategoriesIntoKey(fcNaN, fcNaN):
1549 case PackCategoriesIntoKey(fcInfinity, fcZero):
1550 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1551 case PackCategoriesIntoKey(fcZero, fcInfinity):
1552 case PackCategoriesIntoKey(fcZero, fcNormal):
1555 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1559 case PackCategoriesIntoKey(fcNormal, fcZero):
1560 category = fcInfinity;
1563 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1564 case PackCategoriesIntoKey(fcZero, fcZero):
1568 case PackCategoriesIntoKey(fcNormal, fcNormal):
1573 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1574 switch (PackCategoriesIntoKey(category, rhs.category)) {
1576 llvm_unreachable(nullptr);
1578 case PackCategoriesIntoKey(fcNaN, fcZero):
1579 case PackCategoriesIntoKey(fcNaN, fcNormal):
1580 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1581 case PackCategoriesIntoKey(fcNaN, fcNaN):
1582 case PackCategoriesIntoKey(fcZero, fcInfinity):
1583 case PackCategoriesIntoKey(fcZero, fcNormal):
1584 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1587 case PackCategoriesIntoKey(fcZero, fcNaN):
1588 case PackCategoriesIntoKey(fcNormal, fcNaN):
1589 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1592 copySignificand(rhs);
1595 case PackCategoriesIntoKey(fcNormal, fcZero):
1596 case PackCategoriesIntoKey(fcInfinity, fcZero):
1597 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1598 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1599 case PackCategoriesIntoKey(fcZero, fcZero):
1603 case PackCategoriesIntoKey(fcNormal, fcNormal):
1609 void IEEEFloat::changeSign() {
1610 /* Look mummy, this one's easy. */
1614 void IEEEFloat::clearSign() {
1615 /* So is this one. */
1619 void IEEEFloat::copySign(const IEEEFloat &rhs) {
1624 /* Normalized addition or subtraction. */
1625 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
1626 roundingMode rounding_mode,
1630 fs = addOrSubtractSpecials(rhs, subtract);
1632 /* This return code means it was not a simple case. */
1633 if (fs == opDivByZero) {
1634 lostFraction lost_fraction;
1636 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1637 fs = normalize(rounding_mode, lost_fraction);
1639 /* Can only be zero if we lost no fraction. */
1640 assert(category != fcZero || lost_fraction == lfExactlyZero);
1643 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1644 positive zero unless rounding to minus infinity, except that
1645 adding two like-signed zeroes gives that zero. */
1646 if (category == fcZero) {
1647 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1648 sign = (rounding_mode == rmTowardNegative);
1654 /* Normalized addition. */
1655 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
1656 roundingMode rounding_mode) {
1657 return addOrSubtract(rhs, rounding_mode, false);
1660 /* Normalized subtraction. */
1661 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
1662 roundingMode rounding_mode) {
1663 return addOrSubtract(rhs, rounding_mode, true);
1666 /* Normalized multiply. */
1667 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
1668 roundingMode rounding_mode) {
1672 fs = multiplySpecials(rhs);
1674 if (isFiniteNonZero()) {
1675 lostFraction lost_fraction = multiplySignificand(rhs, nullptr);
1676 fs = normalize(rounding_mode, lost_fraction);
1677 if (lost_fraction != lfExactlyZero)
1678 fs = (opStatus) (fs | opInexact);
1684 /* Normalized divide. */
1685 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
1686 roundingMode rounding_mode) {
1690 fs = divideSpecials(rhs);
1692 if (isFiniteNonZero()) {
1693 lostFraction lost_fraction = divideSignificand(rhs);
1694 fs = normalize(rounding_mode, lost_fraction);
1695 if (lost_fraction != lfExactlyZero)
1696 fs = (opStatus) (fs | opInexact);
1702 /* Normalized remainder. This is not currently correct in all cases. */
1703 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
1705 IEEEFloat V = *this;
1706 unsigned int origSign = sign;
1708 fs = V.divide(rhs, rmNearestTiesToEven);
1709 if (fs == opDivByZero)
1712 int parts = partCount();
1713 integerPart *x = new integerPart[parts];
1715 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1716 rmNearestTiesToEven, &ignored);
1717 if (fs==opInvalidOp) {
1722 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1723 rmNearestTiesToEven);
1724 assert(fs==opOK); // should always work
1726 fs = V.multiply(rhs, rmNearestTiesToEven);
1727 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1729 fs = subtract(V, rmNearestTiesToEven);
1730 assert(fs==opOK || fs==opInexact); // likewise
1733 sign = origSign; // IEEE754 requires this
1738 /* Normalized llvm frem (C fmod).
1739 This is not currently correct in all cases. */
1740 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
1742 fs = modSpecials(rhs);
1744 if (isFiniteNonZero() && rhs.isFiniteNonZero()) {
1745 IEEEFloat V = *this;
1746 unsigned int origSign = sign;
1748 fs = V.divide(rhs, rmNearestTiesToEven);
1749 if (fs == opDivByZero)
1752 int parts = partCount();
1753 integerPart *x = new integerPart[parts];
1755 fs = V.convertToInteger(x, parts * integerPartWidth, true,
1756 rmTowardZero, &ignored);
1757 if (fs==opInvalidOp) {
1762 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1763 rmNearestTiesToEven);
1764 assert(fs==opOK); // should always work
1766 fs = V.multiply(rhs, rmNearestTiesToEven);
1767 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1769 fs = subtract(V, rmNearestTiesToEven);
1770 assert(fs==opOK || fs==opInexact); // likewise
1773 sign = origSign; // IEEE754 requires this
1779 /* Normalized fused-multiply-add. */
1780 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
1781 const IEEEFloat &addend,
1782 roundingMode rounding_mode) {
1785 /* Post-multiplication sign, before addition. */
1786 sign ^= multiplicand.sign;
1788 /* If and only if all arguments are normal do we need to do an
1789 extended-precision calculation. */
1790 if (isFiniteNonZero() &&
1791 multiplicand.isFiniteNonZero() &&
1792 addend.isFinite()) {
1793 lostFraction lost_fraction;
1795 lost_fraction = multiplySignificand(multiplicand, &addend);
1796 fs = normalize(rounding_mode, lost_fraction);
1797 if (lost_fraction != lfExactlyZero)
1798 fs = (opStatus) (fs | opInexact);
1800 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1801 positive zero unless rounding to minus infinity, except that
1802 adding two like-signed zeroes gives that zero. */
1803 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
1804 sign = (rounding_mode == rmTowardNegative);
1806 fs = multiplySpecials(multiplicand);
1808 /* FS can only be opOK or opInvalidOp. There is no more work
1809 to do in the latter case. The IEEE-754R standard says it is
1810 implementation-defined in this case whether, if ADDEND is a
1811 quiet NaN, we raise invalid op; this implementation does so.
1813 If we need to do the addition we can do so with normal
1816 fs = addOrSubtract(addend, rounding_mode, false);
1822 /* Rounding-mode corrrect round to integral value. */
1823 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
1826 // If the exponent is large enough, we know that this value is already
1827 // integral, and the arithmetic below would potentially cause it to saturate
1828 // to +/-Inf. Bail out early instead.
1829 if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics))
1832 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1833 // precision of our format, and then subtract it back off again. The choice
1834 // of rounding modes for the addition/subtraction determines the rounding mode
1835 // for our integral rounding as well.
1836 // NOTE: When the input value is negative, we do subtraction followed by
1837 // addition instead.
1838 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1839 IntegerConstant <<= semanticsPrecision(*semantics)-1;
1840 IEEEFloat MagicConstant(*semantics);
1841 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1842 rmNearestTiesToEven);
1843 MagicConstant.copySign(*this);
1848 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1849 bool inputSign = isNegative();
1851 fs = add(MagicConstant, rounding_mode);
1852 if (fs != opOK && fs != opInexact)
1855 fs = subtract(MagicConstant, rounding_mode);
1857 // Restore the input sign.
1858 if (inputSign != isNegative())
1865 /* Comparison requires normalized numbers. */
1866 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
1869 assert(semantics == rhs.semantics);
1871 switch (PackCategoriesIntoKey(category, rhs.category)) {
1873 llvm_unreachable(nullptr);
1875 case PackCategoriesIntoKey(fcNaN, fcZero):
1876 case PackCategoriesIntoKey(fcNaN, fcNormal):
1877 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1878 case PackCategoriesIntoKey(fcNaN, fcNaN):
1879 case PackCategoriesIntoKey(fcZero, fcNaN):
1880 case PackCategoriesIntoKey(fcNormal, fcNaN):
1881 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1882 return cmpUnordered;
1884 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1885 case PackCategoriesIntoKey(fcInfinity, fcZero):
1886 case PackCategoriesIntoKey(fcNormal, fcZero):
1890 return cmpGreaterThan;
1892 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1893 case PackCategoriesIntoKey(fcZero, fcInfinity):
1894 case PackCategoriesIntoKey(fcZero, fcNormal):
1896 return cmpGreaterThan;
1900 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1901 if (sign == rhs.sign)
1906 return cmpGreaterThan;
1908 case PackCategoriesIntoKey(fcZero, fcZero):
1911 case PackCategoriesIntoKey(fcNormal, fcNormal):
1915 /* Two normal numbers. Do they have the same sign? */
1916 if (sign != rhs.sign) {
1918 result = cmpLessThan;
1920 result = cmpGreaterThan;
1922 /* Compare absolute values; invert result if negative. */
1923 result = compareAbsoluteValue(rhs);
1926 if (result == cmpLessThan)
1927 result = cmpGreaterThan;
1928 else if (result == cmpGreaterThan)
1929 result = cmpLessThan;
1936 /// IEEEFloat::convert - convert a value of one floating point type to another.
1937 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1938 /// records whether the transformation lost information, i.e. whether
1939 /// converting the result back to the original type will produce the
1940 /// original value (this is almost the same as return value==fsOK, but there
1941 /// are edge cases where this is not so).
1943 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
1944 roundingMode rounding_mode,
1946 lostFraction lostFraction;
1947 unsigned int newPartCount, oldPartCount;
1950 const fltSemantics &fromSemantics = *semantics;
1952 lostFraction = lfExactlyZero;
1953 newPartCount = partCountForBits(toSemantics.precision + 1);
1954 oldPartCount = partCount();
1955 shift = toSemantics.precision - fromSemantics.precision;
1957 bool X86SpecialNan = false;
1958 if (&fromSemantics == &semX87DoubleExtended &&
1959 &toSemantics != &semX87DoubleExtended && category == fcNaN &&
1960 (!(*significandParts() & 0x8000000000000000ULL) ||
1961 !(*significandParts() & 0x4000000000000000ULL))) {
1962 // x86 has some unusual NaNs which cannot be represented in any other
1963 // format; note them here.
1964 X86SpecialNan = true;
1967 // If this is a truncation of a denormal number, and the target semantics
1968 // has larger exponent range than the source semantics (this can happen
1969 // when truncating from PowerPC double-double to double format), the
1970 // right shift could lose result mantissa bits. Adjust exponent instead
1971 // of performing excessive shift.
1972 if (shift < 0 && isFiniteNonZero()) {
1973 int exponentChange = significandMSB() + 1 - fromSemantics.precision;
1974 if (exponent + exponentChange < toSemantics.minExponent)
1975 exponentChange = toSemantics.minExponent - exponent;
1976 if (exponentChange < shift)
1977 exponentChange = shift;
1978 if (exponentChange < 0) {
1979 shift -= exponentChange;
1980 exponent += exponentChange;
1984 // If this is a truncation, perform the shift before we narrow the storage.
1985 if (shift < 0 && (isFiniteNonZero() || category==fcNaN))
1986 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
1988 // Fix the storage so it can hold to new value.
1989 if (newPartCount > oldPartCount) {
1990 // The new type requires more storage; make it available.
1991 integerPart *newParts;
1992 newParts = new integerPart[newPartCount];
1993 APInt::tcSet(newParts, 0, newPartCount);
1994 if (isFiniteNonZero() || category==fcNaN)
1995 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1997 significand.parts = newParts;
1998 } else if (newPartCount == 1 && oldPartCount != 1) {
1999 // Switch to built-in storage for a single part.
2000 integerPart newPart = 0;
2001 if (isFiniteNonZero() || category==fcNaN)
2002 newPart = significandParts()[0];
2004 significand.part = newPart;
2007 // Now that we have the right storage, switch the semantics.
2008 semantics = &toSemantics;
2010 // If this is an extension, perform the shift now that the storage is
2012 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2013 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2015 if (isFiniteNonZero()) {
2016 fs = normalize(rounding_mode, lostFraction);
2017 *losesInfo = (fs != opOK);
2018 } else if (category == fcNaN) {
2019 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2021 // For x87 extended precision, we want to make a NaN, not a special NaN if
2022 // the input wasn't special either.
2023 if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2024 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2026 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
2027 // does not give you back the same bits. This is dubious, and we
2028 // don't currently do it. You're really supposed to get
2029 // an invalid operation signal at runtime, but nobody does that.
2039 /* Convert a floating point number to an integer according to the
2040 rounding mode. If the rounded integer value is out of range this
2041 returns an invalid operation exception and the contents of the
2042 destination parts are unspecified. If the rounded value is in
2043 range but the floating point number is not the exact integer, the C
2044 standard doesn't require an inexact exception to be raised. IEEE
2045 854 does require it so we do that.
2047 Note that for conversions to integer type the C standard requires
2048 round-to-zero to always be used. */
2049 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2050 integerPart *parts, unsigned int width, bool isSigned,
2051 roundingMode rounding_mode, bool *isExact) const {
2052 lostFraction lost_fraction;
2053 const integerPart *src;
2054 unsigned int dstPartsCount, truncatedBits;
2058 /* Handle the three special cases first. */
2059 if (category == fcInfinity || category == fcNaN)
2062 dstPartsCount = partCountForBits(width);
2064 if (category == fcZero) {
2065 APInt::tcSet(parts, 0, dstPartsCount);
2066 // Negative zero can't be represented as an int.
2071 src = significandParts();
2073 /* Step 1: place our absolute value, with any fraction truncated, in
2076 /* Our absolute value is less than one; truncate everything. */
2077 APInt::tcSet(parts, 0, dstPartsCount);
2078 /* For exponent -1 the integer bit represents .5, look at that.
2079 For smaller exponents leftmost truncated bit is 0. */
2080 truncatedBits = semantics->precision -1U - exponent;
2082 /* We want the most significant (exponent + 1) bits; the rest are
2084 unsigned int bits = exponent + 1U;
2086 /* Hopelessly large in magnitude? */
2090 if (bits < semantics->precision) {
2091 /* We truncate (semantics->precision - bits) bits. */
2092 truncatedBits = semantics->precision - bits;
2093 APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
2095 /* We want at least as many bits as are available. */
2096 APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
2097 APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
2102 /* Step 2: work out any lost fraction, and increment the absolute
2103 value if we would round away from zero. */
2104 if (truncatedBits) {
2105 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2107 if (lost_fraction != lfExactlyZero &&
2108 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2109 if (APInt::tcIncrement(parts, dstPartsCount))
2110 return opInvalidOp; /* Overflow. */
2113 lost_fraction = lfExactlyZero;
2116 /* Step 3: check if we fit in the destination. */
2117 unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
2121 /* Negative numbers cannot be represented as unsigned. */
2125 /* It takes omsb bits to represent the unsigned integer value.
2126 We lose a bit for the sign, but care is needed as the
2127 maximally negative integer is a special case. */
2128 if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
2131 /* This case can happen because of rounding. */
2136 APInt::tcNegate (parts, dstPartsCount);
2138 if (omsb >= width + !isSigned)
2142 if (lost_fraction == lfExactlyZero) {
2149 /* Same as convertToSignExtendedInteger, except we provide
2150 deterministic values in case of an invalid operation exception,
2151 namely zero for NaNs and the minimal or maximal value respectively
2152 for underflow or overflow.
2153 The *isExact output tells whether the result is exact, in the sense
2154 that converting it back to the original floating point type produces
2155 the original value. This is almost equivalent to result==opOK,
2156 except for negative zeroes.
2158 IEEEFloat::opStatus IEEEFloat::convertToInteger(integerPart *parts,
2161 roundingMode rounding_mode,
2162 bool *isExact) const {
2165 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2168 if (fs == opInvalidOp) {
2169 unsigned int bits, dstPartsCount;
2171 dstPartsCount = partCountForBits(width);
2173 if (category == fcNaN)
2178 bits = width - isSigned;
2180 APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
2181 if (sign && isSigned)
2182 APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
2188 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
2189 an APSInt, whose initial bit-width and signed-ness are used to determine the
2190 precision of the conversion.
2192 IEEEFloat::opStatus IEEEFloat::convertToInteger(APSInt &result,
2193 roundingMode rounding_mode,
2194 bool *isExact) const {
2195 unsigned bitWidth = result.getBitWidth();
2196 SmallVector<uint64_t, 4> parts(result.getNumWords());
2197 opStatus status = convertToInteger(
2198 parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
2199 // Keeps the original signed-ness.
2200 result = APInt(bitWidth, parts);
2204 /* Convert an unsigned integer SRC to a floating point number,
2205 rounding according to ROUNDING_MODE. The sign of the floating
2206 point number is not modified. */
2207 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2208 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2209 unsigned int omsb, precision, dstCount;
2211 lostFraction lost_fraction;
2213 category = fcNormal;
2214 omsb = APInt::tcMSB(src, srcCount) + 1;
2215 dst = significandParts();
2216 dstCount = partCount();
2217 precision = semantics->precision;
2219 /* We want the most significant PRECISION bits of SRC. There may not
2220 be that many; extract what we can. */
2221 if (precision <= omsb) {
2222 exponent = omsb - 1;
2223 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2225 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2227 exponent = precision - 1;
2228 lost_fraction = lfExactlyZero;
2229 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2232 return normalize(rounding_mode, lost_fraction);
2235 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2236 roundingMode rounding_mode) {
2237 unsigned int partCount = Val.getNumWords();
2241 if (isSigned && api.isNegative()) {
2246 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2249 /* Convert a two's complement integer SRC to a floating point number,
2250 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2251 integer is signed, in which case it must be sign-extended. */
2253 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2254 unsigned int srcCount, bool isSigned,
2255 roundingMode rounding_mode) {
2259 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2262 /* If we're signed and negative negate a copy. */
2264 copy = new integerPart[srcCount];
2265 APInt::tcAssign(copy, src, srcCount);
2266 APInt::tcNegate(copy, srcCount);
2267 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2271 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2277 /* FIXME: should this just take a const APInt reference? */
2279 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2280 unsigned int width, bool isSigned,
2281 roundingMode rounding_mode) {
2282 unsigned int partCount = partCountForBits(width);
2283 APInt api = APInt(width, makeArrayRef(parts, partCount));
2286 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2291 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2295 IEEEFloat::convertFromHexadecimalString(StringRef s,
2296 roundingMode rounding_mode) {
2297 lostFraction lost_fraction = lfExactlyZero;
2299 category = fcNormal;
2303 integerPart *significand = significandParts();
2304 unsigned partsCount = partCount();
2305 unsigned bitPos = partsCount * integerPartWidth;
2306 bool computedTrailingFraction = false;
2308 // Skip leading zeroes and any (hexa)decimal point.
2309 StringRef::iterator begin = s.begin();
2310 StringRef::iterator end = s.end();
2311 StringRef::iterator dot;
2312 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2313 StringRef::iterator firstSignificantDigit = p;
2316 integerPart hex_value;
2319 assert(dot == end && "String contains multiple dots");
2324 hex_value = hexDigitValue(*p);
2325 if (hex_value == -1U)
2330 // Store the number while we have space.
2333 hex_value <<= bitPos % integerPartWidth;
2334 significand[bitPos / integerPartWidth] |= hex_value;
2335 } else if (!computedTrailingFraction) {
2336 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2337 computedTrailingFraction = true;
2341 /* Hex floats require an exponent but not a hexadecimal point. */
2342 assert(p != end && "Hex strings require an exponent");
2343 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2344 assert(p != begin && "Significand has no digits");
2345 assert((dot == end || p - begin != 1) && "Significand has no digits");
2347 /* Ignore the exponent if we are zero. */
2348 if (p != firstSignificantDigit) {
2351 /* Implicit hexadecimal point? */
2355 /* Calculate the exponent adjustment implicit in the number of
2356 significant digits. */
2357 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2358 if (expAdjustment < 0)
2360 expAdjustment = expAdjustment * 4 - 1;
2362 /* Adjust for writing the significand starting at the most
2363 significant nibble. */
2364 expAdjustment += semantics->precision;
2365 expAdjustment -= partsCount * integerPartWidth;
2367 /* Adjust for the given exponent. */
2368 exponent = totalExponent(p + 1, end, expAdjustment);
2371 return normalize(rounding_mode, lost_fraction);
2375 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2376 unsigned sigPartCount, int exp,
2377 roundingMode rounding_mode) {
2378 unsigned int parts, pow5PartCount;
2379 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2380 integerPart pow5Parts[maxPowerOfFiveParts];
2383 isNearest = (rounding_mode == rmNearestTiesToEven ||
2384 rounding_mode == rmNearestTiesToAway);
2386 parts = partCountForBits(semantics->precision + 11);
2388 /* Calculate pow(5, abs(exp)). */
2389 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2391 for (;; parts *= 2) {
2392 opStatus sigStatus, powStatus;
2393 unsigned int excessPrecision, truncatedBits;
2395 calcSemantics.precision = parts * integerPartWidth - 1;
2396 excessPrecision = calcSemantics.precision - semantics->precision;
2397 truncatedBits = excessPrecision;
2399 IEEEFloat decSig(calcSemantics, uninitialized);
2400 decSig.makeZero(sign);
2401 IEEEFloat pow5(calcSemantics);
2403 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2404 rmNearestTiesToEven);
2405 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2406 rmNearestTiesToEven);
2407 /* Add exp, as 10^n = 5^n * 2^n. */
2408 decSig.exponent += exp;
2410 lostFraction calcLostFraction;
2411 integerPart HUerr, HUdistance;
2412 unsigned int powHUerr;
2415 /* multiplySignificand leaves the precision-th bit set to 1. */
2416 calcLostFraction = decSig.multiplySignificand(pow5, nullptr);
2417 powHUerr = powStatus != opOK;
2419 calcLostFraction = decSig.divideSignificand(pow5);
2420 /* Denormal numbers have less precision. */
2421 if (decSig.exponent < semantics->minExponent) {
2422 excessPrecision += (semantics->minExponent - decSig.exponent);
2423 truncatedBits = excessPrecision;
2424 if (excessPrecision > calcSemantics.precision)
2425 excessPrecision = calcSemantics.precision;
2427 /* Extra half-ulp lost in reciprocal of exponent. */
2428 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2431 /* Both multiplySignificand and divideSignificand return the
2432 result with the integer bit set. */
2433 assert(APInt::tcExtractBit
2434 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2436 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2438 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2439 excessPrecision, isNearest);
2441 /* Are we guaranteed to round correctly if we truncate? */
2442 if (HUdistance >= HUerr) {
2443 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2444 calcSemantics.precision - excessPrecision,
2446 /* Take the exponent of decSig. If we tcExtract-ed less bits
2447 above we must adjust our exponent to compensate for the
2448 implicit right shift. */
2449 exponent = (decSig.exponent + semantics->precision
2450 - (calcSemantics.precision - excessPrecision));
2451 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2454 return normalize(rounding_mode, calcLostFraction);
2460 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2464 /* Scan the text. */
2465 StringRef::iterator p = str.begin();
2466 interpretDecimal(p, str.end(), &D);
2468 /* Handle the quick cases. First the case of no significant digits,
2469 i.e. zero, and then exponents that are obviously too large or too
2470 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2471 definitely overflows if
2473 (exp - 1) * L >= maxExponent
2475 and definitely underflows to zero where
2477 (exp + 1) * L <= minExponent - precision
2479 With integer arithmetic the tightest bounds for L are
2481 93/28 < L < 196/59 [ numerator <= 256 ]
2482 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2485 // Test if we have a zero number allowing for strings with no null terminators
2486 // and zero decimals with non-zero exponents.
2488 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2489 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2490 // be at most one dot. On the other hand, if we have a zero with a non-zero
2491 // exponent, then we know that D.firstSigDigit will be non-numeric.
2492 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2496 /* Check whether the normalized exponent is high enough to overflow
2497 max during the log-rebasing in the max-exponent check below. */
2498 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2499 fs = handleOverflow(rounding_mode);
2501 /* If it wasn't, then it also wasn't high enough to overflow max
2502 during the log-rebasing in the min-exponent check. Check that it
2503 won't overflow min in either check, then perform the min-exponent
2505 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2506 (D.normalizedExponent + 1) * 28738 <=
2507 8651 * (semantics->minExponent - (int) semantics->precision)) {
2508 /* Underflow to zero and round. */
2509 category = fcNormal;
2511 fs = normalize(rounding_mode, lfLessThanHalf);
2513 /* We can finally safely perform the max-exponent check. */
2514 } else if ((D.normalizedExponent - 1) * 42039
2515 >= 12655 * semantics->maxExponent) {
2516 /* Overflow and round. */
2517 fs = handleOverflow(rounding_mode);
2519 integerPart *decSignificand;
2520 unsigned int partCount;
2522 /* A tight upper bound on number of bits required to hold an
2523 N-digit decimal integer is N * 196 / 59. Allocate enough space
2524 to hold the full significand, and an extra part required by
2526 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2527 partCount = partCountForBits(1 + 196 * partCount / 59);
2528 decSignificand = new integerPart[partCount + 1];
2531 /* Convert to binary efficiently - we do almost all multiplication
2532 in an integerPart. When this would overflow do we do a single
2533 bignum multiplication, and then revert again to multiplication
2534 in an integerPart. */
2536 integerPart decValue, val, multiplier;
2544 if (p == str.end()) {
2548 decValue = decDigitValue(*p++);
2549 assert(decValue < 10U && "Invalid character in significand");
2551 val = val * 10 + decValue;
2552 /* The maximum number that can be multiplied by ten with any
2553 digit added without overflowing an integerPart. */
2554 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2556 /* Multiply out the current part. */
2557 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2558 partCount, partCount + 1, false);
2560 /* If we used another part (likely but not guaranteed), increase
2562 if (decSignificand[partCount])
2564 } while (p <= D.lastSigDigit);
2566 category = fcNormal;
2567 fs = roundSignificandWithExponent(decSignificand, partCount,
2568 D.exponent, rounding_mode);
2570 delete [] decSignificand;
2576 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
2577 if (str.equals("inf") || str.equals("INFINITY")) {
2582 if (str.equals("-inf") || str.equals("-INFINITY")) {
2587 if (str.equals("nan") || str.equals("NaN")) {
2588 makeNaN(false, false);
2592 if (str.equals("-nan") || str.equals("-NaN")) {
2593 makeNaN(false, true);
2600 IEEEFloat::opStatus IEEEFloat::convertFromString(StringRef str,
2601 roundingMode rounding_mode) {
2602 assert(!str.empty() && "Invalid string length");
2604 // Handle special cases.
2605 if (convertFromStringSpecials(str))
2608 /* Handle a leading minus sign. */
2609 StringRef::iterator p = str.begin();
2610 size_t slen = str.size();
2611 sign = *p == '-' ? 1 : 0;
2612 if (*p == '-' || *p == '+') {
2615 assert(slen && "String has no digits");
2618 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2619 assert(slen - 2 && "Invalid string");
2620 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2624 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2627 /* Write out a hexadecimal representation of the floating point value
2628 to DST, which must be of sufficient size, in the C99 form
2629 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2630 excluding the terminating NUL.
2632 If UPPERCASE, the output is in upper case, otherwise in lower case.
2634 HEXDIGITS digits appear altogether, rounding the value if
2635 necessary. If HEXDIGITS is 0, the minimal precision to display the
2636 number precisely is used instead. If nothing would appear after
2637 the decimal point it is suppressed.
2639 The decimal exponent is always printed and has at least one digit.
2640 Zero values display an exponent of zero. Infinities and NaNs
2641 appear as "infinity" or "nan" respectively.
2643 The above rules are as specified by C99. There is ambiguity about
2644 what the leading hexadecimal digit should be. This implementation
2645 uses whatever is necessary so that the exponent is displayed as
2646 stored. This implies the exponent will fall within the IEEE format
2647 range, and the leading hexadecimal digit will be 0 (for denormals),
2648 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2649 any other digits zero).
2651 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
2653 roundingMode rounding_mode) const {
2662 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2663 dst += sizeof infinityL - 1;
2667 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2668 dst += sizeof NaNU - 1;
2673 *dst++ = upperCase ? 'X': 'x';
2675 if (hexDigits > 1) {
2677 memset (dst, '0', hexDigits - 1);
2678 dst += hexDigits - 1;
2680 *dst++ = upperCase ? 'P': 'p';
2685 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2691 return static_cast<unsigned int>(dst - p);
2694 /* Does the hard work of outputting the correctly rounded hexadecimal
2695 form of a normal floating point number with the specified number of
2696 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2697 digits necessary to print the value precisely is output. */
2698 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2700 roundingMode rounding_mode) const {
2701 unsigned int count, valueBits, shift, partsCount, outputDigits;
2702 const char *hexDigitChars;
2703 const integerPart *significand;
2708 *dst++ = upperCase ? 'X': 'x';
2711 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2713 significand = significandParts();
2714 partsCount = partCount();
2716 /* +3 because the first digit only uses the single integer bit, so
2717 we have 3 virtual zero most-significant-bits. */
2718 valueBits = semantics->precision + 3;
2719 shift = integerPartWidth - valueBits % integerPartWidth;
2721 /* The natural number of digits required ignoring trailing
2722 insignificant zeroes. */
2723 outputDigits = (valueBits - significandLSB () + 3) / 4;
2725 /* hexDigits of zero means use the required number for the
2726 precision. Otherwise, see if we are truncating. If we are,
2727 find out if we need to round away from zero. */
2729 if (hexDigits < outputDigits) {
2730 /* We are dropping non-zero bits, so need to check how to round.
2731 "bits" is the number of dropped bits. */
2733 lostFraction fraction;
2735 bits = valueBits - hexDigits * 4;
2736 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2737 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2739 outputDigits = hexDigits;
2742 /* Write the digits consecutively, and start writing in the location
2743 of the hexadecimal point. We move the most significant digit
2744 left and add the hexadecimal point later. */
2747 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2749 while (outputDigits && count) {
2752 /* Put the most significant integerPartWidth bits in "part". */
2753 if (--count == partsCount)
2754 part = 0; /* An imaginary higher zero part. */
2756 part = significand[count] << shift;
2759 part |= significand[count - 1] >> (integerPartWidth - shift);
2761 /* Convert as much of "part" to hexdigits as we can. */
2762 unsigned int curDigits = integerPartWidth / 4;
2764 if (curDigits > outputDigits)
2765 curDigits = outputDigits;
2766 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2767 outputDigits -= curDigits;
2773 /* Note that hexDigitChars has a trailing '0'. */
2776 *q = hexDigitChars[hexDigitValue (*q) + 1];
2777 } while (*q == '0');
2780 /* Add trailing zeroes. */
2781 memset (dst, '0', outputDigits);
2782 dst += outputDigits;
2785 /* Move the most significant digit to before the point, and if there
2786 is something after the decimal point add it. This must come
2787 after rounding above. */
2794 /* Finally output the exponent. */
2795 *dst++ = upperCase ? 'P': 'p';
2797 return writeSignedDecimal (dst, exponent);
2800 hash_code hash_value(const IEEEFloat &Arg) {
2801 if (!Arg.isFiniteNonZero())
2802 return hash_combine((uint8_t)Arg.category,
2803 // NaN has no sign, fix it at zero.
2804 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2805 Arg.semantics->precision);
2807 // Normal floats need their exponent and significand hashed.
2808 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2809 Arg.semantics->precision, Arg.exponent,
2811 Arg.significandParts(),
2812 Arg.significandParts() + Arg.partCount()));
2815 // Conversion from APFloat to/from host float/double. It may eventually be
2816 // possible to eliminate these and have everybody deal with APFloats, but that
2817 // will take a while. This approach will not easily extend to long double.
2818 // Current implementation requires integerPartWidth==64, which is correct at
2819 // the moment but could be made more general.
2821 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2822 // the actual IEEE respresentations. We compensate for that here.
2824 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
2825 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
2826 assert(partCount()==2);
2828 uint64_t myexponent, mysignificand;
2830 if (isFiniteNonZero()) {
2831 myexponent = exponent+16383; //bias
2832 mysignificand = significandParts()[0];
2833 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2834 myexponent = 0; // denormal
2835 } else if (category==fcZero) {
2838 } else if (category==fcInfinity) {
2839 myexponent = 0x7fff;
2840 mysignificand = 0x8000000000000000ULL;
2842 assert(category == fcNaN && "Unknown category");
2843 myexponent = 0x7fff;
2844 mysignificand = significandParts()[0];
2848 words[0] = mysignificand;
2849 words[1] = ((uint64_t)(sign & 1) << 15) |
2850 (myexponent & 0x7fffLL);
2851 return APInt(80, words);
2854 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
2855 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleImpl);
2856 assert(partCount()==2);
2862 // Convert number to double. To avoid spurious underflows, we re-
2863 // normalize against the "double" minExponent first, and only *then*
2864 // truncate the mantissa. The result of that second conversion
2865 // may be inexact, but should never underflow.
2866 // Declare fltSemantics before APFloat that uses it (and
2867 // saves pointer to it) to ensure correct destruction order.
2868 fltSemantics extendedSemantics = *semantics;
2869 extendedSemantics.minExponent = semIEEEdouble.minExponent;
2870 IEEEFloat extended(*this);
2871 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2872 assert(fs == opOK && !losesInfo);
2875 IEEEFloat u(extended);
2876 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
2877 assert(fs == opOK || fs == opInexact);
2879 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2881 // If conversion was exact or resulted in a special case, we're done;
2882 // just set the second double to zero. Otherwise, re-convert back to
2883 // the extended format and compute the difference. This now should
2884 // convert exactly to double.
2885 if (u.isFiniteNonZero() && losesInfo) {
2886 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2887 assert(fs == opOK && !losesInfo);
2890 IEEEFloat v(extended);
2891 v.subtract(u, rmNearestTiesToEven);
2892 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
2893 assert(fs == opOK && !losesInfo);
2895 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2900 return APInt(128, words);
2903 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
2904 assert(semantics == (const llvm::fltSemantics*)&semIEEEquad);
2905 assert(partCount()==2);
2907 uint64_t myexponent, mysignificand, mysignificand2;
2909 if (isFiniteNonZero()) {
2910 myexponent = exponent+16383; //bias
2911 mysignificand = significandParts()[0];
2912 mysignificand2 = significandParts()[1];
2913 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2914 myexponent = 0; // denormal
2915 } else if (category==fcZero) {
2917 mysignificand = mysignificand2 = 0;
2918 } else if (category==fcInfinity) {
2919 myexponent = 0x7fff;
2920 mysignificand = mysignificand2 = 0;
2922 assert(category == fcNaN && "Unknown category!");
2923 myexponent = 0x7fff;
2924 mysignificand = significandParts()[0];
2925 mysignificand2 = significandParts()[1];
2929 words[0] = mysignificand;
2930 words[1] = ((uint64_t)(sign & 1) << 63) |
2931 ((myexponent & 0x7fff) << 48) |
2932 (mysignificand2 & 0xffffffffffffLL);
2934 return APInt(128, words);
2937 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
2938 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble);
2939 assert(partCount()==1);
2941 uint64_t myexponent, mysignificand;
2943 if (isFiniteNonZero()) {
2944 myexponent = exponent+1023; //bias
2945 mysignificand = *significandParts();
2946 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2947 myexponent = 0; // denormal
2948 } else if (category==fcZero) {
2951 } else if (category==fcInfinity) {
2955 assert(category == fcNaN && "Unknown category!");
2957 mysignificand = *significandParts();
2960 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2961 ((myexponent & 0x7ff) << 52) |
2962 (mysignificand & 0xfffffffffffffLL))));
2965 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
2966 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle);
2967 assert(partCount()==1);
2969 uint32_t myexponent, mysignificand;
2971 if (isFiniteNonZero()) {
2972 myexponent = exponent+127; //bias
2973 mysignificand = (uint32_t)*significandParts();
2974 if (myexponent == 1 && !(mysignificand & 0x800000))
2975 myexponent = 0; // denormal
2976 } else if (category==fcZero) {
2979 } else if (category==fcInfinity) {
2983 assert(category == fcNaN && "Unknown category!");
2985 mysignificand = (uint32_t)*significandParts();
2988 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2989 (mysignificand & 0x7fffff)));
2992 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
2993 assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf);
2994 assert(partCount()==1);
2996 uint32_t myexponent, mysignificand;
2998 if (isFiniteNonZero()) {
2999 myexponent = exponent+15; //bias
3000 mysignificand = (uint32_t)*significandParts();
3001 if (myexponent == 1 && !(mysignificand & 0x400))
3002 myexponent = 0; // denormal
3003 } else if (category==fcZero) {
3006 } else if (category==fcInfinity) {
3010 assert(category == fcNaN && "Unknown category!");
3012 mysignificand = (uint32_t)*significandParts();
3015 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
3016 (mysignificand & 0x3ff)));
3019 // This function creates an APInt that is just a bit map of the floating
3020 // point constant as it would appear in memory. It is not a conversion,
3021 // and treating the result as a normal integer is unlikely to be useful.
3023 APInt IEEEFloat::bitcastToAPInt() const {
3024 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3025 return convertHalfAPFloatToAPInt();
3027 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3028 return convertFloatAPFloatToAPInt();
3030 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3031 return convertDoubleAPFloatToAPInt();
3033 if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3034 return convertQuadrupleAPFloatToAPInt();
3036 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleImpl)
3037 return convertPPCDoubleDoubleAPFloatToAPInt();
3039 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3041 return convertF80LongDoubleAPFloatToAPInt();
3044 float IEEEFloat::convertToFloat() const {
3045 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3046 "Float semantics are not IEEEsingle");
3047 APInt api = bitcastToAPInt();
3048 return api.bitsToFloat();
3051 double IEEEFloat::convertToDouble() const {
3052 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3053 "Float semantics are not IEEEdouble");
3054 APInt api = bitcastToAPInt();
3055 return api.bitsToDouble();
3058 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3059 /// does not support these bit patterns:
3060 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3061 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3062 /// exponent = 0, integer bit 1 ("pseudodenormal")
3063 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3064 /// At the moment, the first two are treated as NaNs, the second two as Normal.
3065 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3066 assert(api.getBitWidth()==80);
3067 uint64_t i1 = api.getRawData()[0];
3068 uint64_t i2 = api.getRawData()[1];
3069 uint64_t myexponent = (i2 & 0x7fff);
3070 uint64_t mysignificand = i1;
3072 initialize(&semX87DoubleExtended);
3073 assert(partCount()==2);
3075 sign = static_cast<unsigned int>(i2>>15);
3076 if (myexponent==0 && mysignificand==0) {
3077 // exponent, significand meaningless
3079 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3080 // exponent, significand meaningless
3081 category = fcInfinity;
3082 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
3083 // exponent meaningless
3085 significandParts()[0] = mysignificand;
3086 significandParts()[1] = 0;
3088 category = fcNormal;
3089 exponent = myexponent - 16383;
3090 significandParts()[0] = mysignificand;
3091 significandParts()[1] = 0;
3092 if (myexponent==0) // denormal
3097 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3098 assert(api.getBitWidth()==128);
3099 uint64_t i1 = api.getRawData()[0];
3100 uint64_t i2 = api.getRawData()[1];
3104 // Get the first double and convert to our format.
3105 initFromDoubleAPInt(APInt(64, i1));
3106 fs = convert(semPPCDoubleDoubleImpl, rmNearestTiesToEven, &losesInfo);
3107 assert(fs == opOK && !losesInfo);
3110 // Unless we have a special case, add in second double.
3111 if (isFiniteNonZero()) {
3112 IEEEFloat v(semIEEEdouble, APInt(64, i2));
3113 fs = v.convert(semPPCDoubleDoubleImpl, rmNearestTiesToEven, &losesInfo);
3114 assert(fs == opOK && !losesInfo);
3117 add(v, rmNearestTiesToEven);
3121 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3122 assert(api.getBitWidth()==128);
3123 uint64_t i1 = api.getRawData()[0];
3124 uint64_t i2 = api.getRawData()[1];
3125 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3126 uint64_t mysignificand = i1;
3127 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3129 initialize(&semIEEEquad);
3130 assert(partCount()==2);
3132 sign = static_cast<unsigned int>(i2>>63);
3133 if (myexponent==0 &&
3134 (mysignificand==0 && mysignificand2==0)) {
3135 // exponent, significand meaningless
3137 } else if (myexponent==0x7fff &&
3138 (mysignificand==0 && mysignificand2==0)) {
3139 // exponent, significand meaningless
3140 category = fcInfinity;
3141 } else if (myexponent==0x7fff &&
3142 (mysignificand!=0 || mysignificand2 !=0)) {
3143 // exponent meaningless
3145 significandParts()[0] = mysignificand;
3146 significandParts()[1] = mysignificand2;
3148 category = fcNormal;
3149 exponent = myexponent - 16383;
3150 significandParts()[0] = mysignificand;
3151 significandParts()[1] = mysignificand2;
3152 if (myexponent==0) // denormal
3155 significandParts()[1] |= 0x1000000000000LL; // integer bit
3159 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3160 assert(api.getBitWidth()==64);
3161 uint64_t i = *api.getRawData();
3162 uint64_t myexponent = (i >> 52) & 0x7ff;
3163 uint64_t mysignificand = i & 0xfffffffffffffLL;
3165 initialize(&semIEEEdouble);
3166 assert(partCount()==1);
3168 sign = static_cast<unsigned int>(i>>63);
3169 if (myexponent==0 && mysignificand==0) {
3170 // exponent, significand meaningless
3172 } else if (myexponent==0x7ff && mysignificand==0) {
3173 // exponent, significand meaningless
3174 category = fcInfinity;
3175 } else if (myexponent==0x7ff && mysignificand!=0) {
3176 // exponent meaningless
3178 *significandParts() = mysignificand;
3180 category = fcNormal;
3181 exponent = myexponent - 1023;
3182 *significandParts() = mysignificand;
3183 if (myexponent==0) // denormal
3186 *significandParts() |= 0x10000000000000LL; // integer bit
3190 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3191 assert(api.getBitWidth()==32);
3192 uint32_t i = (uint32_t)*api.getRawData();
3193 uint32_t myexponent = (i >> 23) & 0xff;
3194 uint32_t mysignificand = i & 0x7fffff;
3196 initialize(&semIEEEsingle);
3197 assert(partCount()==1);
3200 if (myexponent==0 && mysignificand==0) {
3201 // exponent, significand meaningless
3203 } else if (myexponent==0xff && mysignificand==0) {
3204 // exponent, significand meaningless
3205 category = fcInfinity;
3206 } else if (myexponent==0xff && mysignificand!=0) {
3207 // sign, exponent, significand meaningless
3209 *significandParts() = mysignificand;
3211 category = fcNormal;
3212 exponent = myexponent - 127; //bias
3213 *significandParts() = mysignificand;
3214 if (myexponent==0) // denormal
3217 *significandParts() |= 0x800000; // integer bit
3221 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3222 assert(api.getBitWidth()==16);
3223 uint32_t i = (uint32_t)*api.getRawData();
3224 uint32_t myexponent = (i >> 10) & 0x1f;
3225 uint32_t mysignificand = i & 0x3ff;
3227 initialize(&semIEEEhalf);
3228 assert(partCount()==1);
3231 if (myexponent==0 && mysignificand==0) {
3232 // exponent, significand meaningless
3234 } else if (myexponent==0x1f && mysignificand==0) {
3235 // exponent, significand meaningless
3236 category = fcInfinity;
3237 } else if (myexponent==0x1f && mysignificand!=0) {
3238 // sign, exponent, significand meaningless
3240 *significandParts() = mysignificand;
3242 category = fcNormal;
3243 exponent = myexponent - 15; //bias
3244 *significandParts() = mysignificand;
3245 if (myexponent==0) // denormal
3248 *significandParts() |= 0x400; // integer bit
3252 /// Treat api as containing the bits of a floating point number. Currently
3253 /// we infer the floating point type from the size of the APInt. The
3254 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3255 /// when the size is anything else).
3256 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3257 if (Sem == &semIEEEhalf)
3258 return initFromHalfAPInt(api);
3259 if (Sem == &semIEEEsingle)
3260 return initFromFloatAPInt(api);
3261 if (Sem == &semIEEEdouble)
3262 return initFromDoubleAPInt(api);
3263 if (Sem == &semX87DoubleExtended)
3264 return initFromF80LongDoubleAPInt(api);
3265 if (Sem == &semIEEEquad)
3266 return initFromQuadrupleAPInt(api);
3267 if (Sem == &semPPCDoubleDoubleImpl)
3268 return initFromPPCDoubleDoubleAPInt(api);
3270 llvm_unreachable(nullptr);
3273 /// Make this number the largest magnitude normal number in the given
3275 void IEEEFloat::makeLargest(bool Negative) {
3276 // We want (in interchange format):
3277 // sign = {Negative}
3279 // significand = 1..1
3280 category = fcNormal;
3282 exponent = semantics->maxExponent;
3284 // Use memset to set all but the highest integerPart to all ones.
3285 integerPart *significand = significandParts();
3286 unsigned PartCount = partCount();
3287 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3289 // Set the high integerPart especially setting all unused top bits for
3290 // internal consistency.
3291 const unsigned NumUnusedHighBits =
3292 PartCount*integerPartWidth - semantics->precision;
3293 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3294 ? (~integerPart(0) >> NumUnusedHighBits)
3298 /// Make this number the smallest magnitude denormal number in the given
3300 void IEEEFloat::makeSmallest(bool Negative) {
3301 // We want (in interchange format):
3302 // sign = {Negative}
3304 // significand = 0..01
3305 category = fcNormal;
3307 exponent = semantics->minExponent;
3308 APInt::tcSet(significandParts(), 1, partCount());
3311 void IEEEFloat::makeSmallestNormalized(bool Negative) {
3312 // We want (in interchange format):
3313 // sign = {Negative}
3315 // significand = 10..0
3317 category = fcNormal;
3320 exponent = semantics->minExponent;
3321 significandParts()[partCountForBits(semantics->precision) - 1] |=
3322 (((integerPart)1) << ((semantics->precision - 1) % integerPartWidth));
3325 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
3326 initFromAPInt(&Sem, API);
3329 IEEEFloat::IEEEFloat(float f) {
3330 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
3333 IEEEFloat::IEEEFloat(double d) {
3334 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
3338 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3339 Buffer.append(Str.begin(), Str.end());
3342 /// Removes data from the given significand until it is no more
3343 /// precise than is required for the desired precision.
3344 void AdjustToPrecision(APInt &significand,
3345 int &exp, unsigned FormatPrecision) {
3346 unsigned bits = significand.getActiveBits();
3348 // 196/59 is a very slight overestimate of lg_2(10).
3349 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3351 if (bits <= bitsRequired) return;
3353 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3354 if (!tensRemovable) return;
3356 exp += tensRemovable;
3358 APInt divisor(significand.getBitWidth(), 1);
3359 APInt powten(significand.getBitWidth(), 10);
3361 if (tensRemovable & 1)
3363 tensRemovable >>= 1;
3364 if (!tensRemovable) break;
3368 significand = significand.udiv(divisor);
3370 // Truncate the significand down to its active bit count.
3371 significand = significand.trunc(significand.getActiveBits());
3375 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3376 int &exp, unsigned FormatPrecision) {
3377 unsigned N = buffer.size();
3378 if (N <= FormatPrecision) return;
3380 // The most significant figures are the last ones in the buffer.
3381 unsigned FirstSignificant = N - FormatPrecision;
3384 // FIXME: this probably shouldn't use 'round half up'.
3386 // Rounding down is just a truncation, except we also want to drop
3387 // trailing zeros from the new result.
3388 if (buffer[FirstSignificant - 1] < '5') {
3389 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3392 exp += FirstSignificant;
3393 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3397 // Rounding up requires a decimal add-with-carry. If we continue
3398 // the carry, the newly-introduced zeros will just be truncated.
3399 for (unsigned I = FirstSignificant; I != N; ++I) {
3400 if (buffer[I] == '9') {
3408 // If we carried through, we have exactly one digit of precision.
3409 if (FirstSignificant == N) {
3410 exp += FirstSignificant;
3412 buffer.push_back('1');
3416 exp += FirstSignificant;
3417 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3421 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
3422 unsigned FormatMaxPadding) const {
3426 return append(Str, "-Inf");
3428 return append(Str, "+Inf");
3430 case fcNaN: return append(Str, "NaN");
3436 if (!FormatMaxPadding)
3437 append(Str, "0.0E+0");
3449 // Decompose the number into an APInt and an exponent.
3450 int exp = exponent - ((int) semantics->precision - 1);
3451 APInt significand(semantics->precision,
3452 makeArrayRef(significandParts(),
3453 partCountForBits(semantics->precision)));
3455 // Set FormatPrecision if zero. We want to do this before we
3456 // truncate trailing zeros, as those are part of the precision.
3457 if (!FormatPrecision) {
3458 // We use enough digits so the number can be round-tripped back to an
3459 // APFloat. The formula comes from "How to Print Floating-Point Numbers
3460 // Accurately" by Steele and White.
3461 // FIXME: Using a formula based purely on the precision is conservative;
3462 // we can print fewer digits depending on the actual value being printed.
3464 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
3465 FormatPrecision = 2 + semantics->precision * 59 / 196;
3468 // Ignore trailing binary zeros.
3469 int trailingZeros = significand.countTrailingZeros();
3470 exp += trailingZeros;
3471 significand = significand.lshr(trailingZeros);
3473 // Change the exponent from 2^e to 10^e.
3476 } else if (exp > 0) {
3478 significand = significand.zext(semantics->precision + exp);
3479 significand <<= exp;
3481 } else { /* exp < 0 */
3484 // We transform this using the identity:
3485 // (N)(2^-e) == (N)(5^e)(10^-e)
3486 // This means we have to multiply N (the significand) by 5^e.
3487 // To avoid overflow, we have to operate on numbers large
3488 // enough to store N * 5^e:
3489 // log2(N * 5^e) == log2(N) + e * log2(5)
3490 // <= semantics->precision + e * 137 / 59
3491 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3493 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3495 // Multiply significand by 5^e.
3496 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3497 significand = significand.zext(precision);
3498 APInt five_to_the_i(precision, 5);
3500 if (texp & 1) significand *= five_to_the_i;
3504 five_to_the_i *= five_to_the_i;
3508 AdjustToPrecision(significand, exp, FormatPrecision);
3510 SmallVector<char, 256> buffer;
3513 unsigned precision = significand.getBitWidth();
3514 APInt ten(precision, 10);
3515 APInt digit(precision, 0);
3517 bool inTrail = true;
3518 while (significand != 0) {
3519 // digit <- significand % 10
3520 // significand <- significand / 10
3521 APInt::udivrem(significand, ten, significand, digit);
3523 unsigned d = digit.getZExtValue();
3525 // Drop trailing zeros.
3526 if (inTrail && !d) exp++;
3528 buffer.push_back((char) ('0' + d));
3533 assert(!buffer.empty() && "no characters in buffer!");
3535 // Drop down to FormatPrecision.
3536 // TODO: don't do more precise calculations above than are required.
3537 AdjustToPrecision(buffer, exp, FormatPrecision);
3539 unsigned NDigits = buffer.size();
3541 // Check whether we should use scientific notation.
3542 bool FormatScientific;
3543 if (!FormatMaxPadding)
3544 FormatScientific = true;
3549 // But we shouldn't make the number look more precise than it is.
3550 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3551 NDigits + (unsigned) exp > FormatPrecision);
3553 // Power of the most significant digit.
3554 int MSD = exp + (int) (NDigits - 1);
3557 FormatScientific = false;
3559 // 765e-5 == 0.00765
3561 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3566 // Scientific formatting is pretty straightforward.
3567 if (FormatScientific) {
3568 exp += (NDigits - 1);
3570 Str.push_back(buffer[NDigits-1]);
3575 for (unsigned I = 1; I != NDigits; ++I)
3576 Str.push_back(buffer[NDigits-1-I]);
3579 Str.push_back(exp >= 0 ? '+' : '-');
3580 if (exp < 0) exp = -exp;
3581 SmallVector<char, 6> expbuf;
3583 expbuf.push_back((char) ('0' + (exp % 10)));
3586 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3587 Str.push_back(expbuf[E-1-I]);
3591 // Non-scientific, positive exponents.
3593 for (unsigned I = 0; I != NDigits; ++I)
3594 Str.push_back(buffer[NDigits-1-I]);
3595 for (unsigned I = 0; I != (unsigned) exp; ++I)
3600 // Non-scientific, negative exponents.
3602 // The number of digits to the left of the decimal point.
3603 int NWholeDigits = exp + (int) NDigits;
3606 if (NWholeDigits > 0) {
3607 for (; I != (unsigned) NWholeDigits; ++I)
3608 Str.push_back(buffer[NDigits-I-1]);
3611 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3615 for (unsigned Z = 1; Z != NZeros; ++Z)
3619 for (; I != NDigits; ++I)
3620 Str.push_back(buffer[NDigits-I-1]);
3623 bool IEEEFloat::getExactInverse(IEEEFloat *inv) const {
3624 // Special floats and denormals have no exact inverse.
3625 if (!isFiniteNonZero())
3628 // Check that the number is a power of two by making sure that only the
3629 // integer bit is set in the significand.
3630 if (significandLSB() != semantics->precision - 1)
3634 IEEEFloat reciprocal(*semantics, 1ULL);
3635 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3638 // Avoid multiplication with a denormal, it is not safe on all platforms and
3639 // may be slower than a normal division.
3640 if (reciprocal.isDenormal())
3643 assert(reciprocal.isFiniteNonZero() &&
3644 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3652 bool IEEEFloat::isSignaling() const {
3656 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
3657 // first bit of the trailing significand being 0.
3658 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
3661 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
3663 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
3664 /// appropriate sign switching before/after the computation.
3665 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
3666 // If we are performing nextDown, swap sign so we have -x.
3670 // Compute nextUp(x)
3671 opStatus result = opOK;
3673 // Handle each float category separately.
3676 // nextUp(+inf) = +inf
3679 // nextUp(-inf) = -getLargest()
3683 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
3684 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
3685 // change the payload.
3686 if (isSignaling()) {
3687 result = opInvalidOp;
3688 // For consistency, propagate the sign of the sNaN to the qNaN.
3689 makeNaN(false, isNegative(), nullptr);
3693 // nextUp(pm 0) = +getSmallest()
3694 makeSmallest(false);
3697 // nextUp(-getSmallest()) = -0
3698 if (isSmallest() && isNegative()) {
3699 APInt::tcSet(significandParts(), 0, partCount());
3705 // nextUp(getLargest()) == INFINITY
3706 if (isLargest() && !isNegative()) {
3707 APInt::tcSet(significandParts(), 0, partCount());
3708 category = fcInfinity;
3709 exponent = semantics->maxExponent + 1;
3713 // nextUp(normal) == normal + inc.
3715 // If we are negative, we need to decrement the significand.
3717 // We only cross a binade boundary that requires adjusting the exponent
3719 // 1. exponent != semantics->minExponent. This implies we are not in the
3720 // smallest binade or are dealing with denormals.
3721 // 2. Our significand excluding the integral bit is all zeros.
3722 bool WillCrossBinadeBoundary =
3723 exponent != semantics->minExponent && isSignificandAllZeros();
3725 // Decrement the significand.
3727 // We always do this since:
3728 // 1. If we are dealing with a non-binade decrement, by definition we
3729 // just decrement the significand.
3730 // 2. If we are dealing with a normal -> normal binade decrement, since
3731 // we have an explicit integral bit the fact that all bits but the
3732 // integral bit are zero implies that subtracting one will yield a
3733 // significand with 0 integral bit and 1 in all other spots. Thus we
3734 // must just adjust the exponent and set the integral bit to 1.
3735 // 3. If we are dealing with a normal -> denormal binade decrement,
3736 // since we set the integral bit to 0 when we represent denormals, we
3737 // just decrement the significand.
3738 integerPart *Parts = significandParts();
3739 APInt::tcDecrement(Parts, partCount());
3741 if (WillCrossBinadeBoundary) {
3742 // Our result is a normal number. Do the following:
3743 // 1. Set the integral bit to 1.
3744 // 2. Decrement the exponent.
3745 APInt::tcSetBit(Parts, semantics->precision - 1);
3749 // If we are positive, we need to increment the significand.
3751 // We only cross a binade boundary that requires adjusting the exponent if
3752 // the input is not a denormal and all of said input's significand bits
3753 // are set. If all of said conditions are true: clear the significand, set
3754 // the integral bit to 1, and increment the exponent. If we have a
3755 // denormal always increment since moving denormals and the numbers in the
3756 // smallest normal binade have the same exponent in our representation.
3757 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
3759 if (WillCrossBinadeBoundary) {
3760 integerPart *Parts = significandParts();
3761 APInt::tcSet(Parts, 0, partCount());
3762 APInt::tcSetBit(Parts, semantics->precision - 1);
3763 assert(exponent != semantics->maxExponent &&
3764 "We can not increment an exponent beyond the maxExponent allowed"
3765 " by the given floating point semantics.");
3768 incrementSignificand();
3774 // If we are performing nextDown, swap sign so we have -nextUp(-x)
3781 void IEEEFloat::makeInf(bool Negative) {
3782 category = fcInfinity;
3784 exponent = semantics->maxExponent + 1;
3785 APInt::tcSet(significandParts(), 0, partCount());
3788 void IEEEFloat::makeZero(bool Negative) {
3791 exponent = semantics->minExponent-1;
3792 APInt::tcSet(significandParts(), 0, partCount());
3795 void IEEEFloat::makeQuiet() {
3797 APInt::tcSetBit(significandParts(), semantics->precision - 2);
3800 int ilogb(const IEEEFloat &Arg) {
3802 return IEEEFloat::IEK_NaN;
3804 return IEEEFloat::IEK_Zero;
3805 if (Arg.isInfinity())
3806 return IEEEFloat::IEK_Inf;
3807 if (!Arg.isDenormal())
3808 return Arg.exponent;
3810 IEEEFloat Normalized(Arg);
3811 int SignificandBits = Arg.getSemantics().precision - 1;
3813 Normalized.exponent += SignificandBits;
3814 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
3815 return Normalized.exponent - SignificandBits;
3818 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
3819 auto MaxExp = X.getSemantics().maxExponent;
3820 auto MinExp = X.getSemantics().minExponent;
3822 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
3823 // overflow; clamp it to a safe range before adding, but ensure that the range
3824 // is large enough that the clamp does not change the result. The range we
3825 // need to support is the difference between the largest possible exponent and
3826 // the normalized exponent of half the smallest denormal.
3828 int SignificandBits = X.getSemantics().precision - 1;
3829 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
3831 // Clamp to one past the range ends to let normalize handle overlflow.
3832 X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement);
3833 X.normalize(RoundingMode, lfExactlyZero);
3839 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
3842 // Quiet signalling nans.
3843 if (Exp == IEEEFloat::IEK_NaN) {
3844 IEEEFloat Quiet(Val);
3849 if (Exp == IEEEFloat::IEK_Inf)
3852 // 1 is added because frexp is defined to return a normalized fraction in
3853 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
3854 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
3855 return scalbn(Val, -Exp, RM);
3858 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
3859 : Semantics(&S), Floats(new APFloat[2]{APFloat(semPPCDoubleDoubleImpl),
3860 APFloat(semIEEEdouble)}) {
3861 assert(Semantics == &semPPCDoubleDouble);
3864 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
3866 Floats(new APFloat[2]{APFloat(semPPCDoubleDoubleImpl, uninitialized),
3867 APFloat(semIEEEdouble, uninitialized)}) {
3868 assert(Semantics == &semPPCDoubleDouble);
3871 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
3872 : Semantics(&S), Floats(new APFloat[2]{APFloat(semPPCDoubleDoubleImpl, I),
3873 APFloat(semIEEEdouble)}) {
3874 assert(Semantics == &semPPCDoubleDouble);
3877 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
3878 : Semantics(&S), Floats(new APFloat[2]{
3879 APFloat(semPPCDoubleDoubleImpl, I),
3880 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
3881 assert(Semantics == &semPPCDoubleDouble);
3884 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
3887 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
3888 assert(Semantics == &semPPCDoubleDouble);
3889 // TODO Check for First == &IEEEdouble once the transition is done.
3890 assert(&Floats[0].getSemantics() == &semPPCDoubleDoubleImpl ||
3891 &Floats[0].getSemantics() == &semIEEEdouble);
3892 assert(&Floats[1].getSemantics() == &semIEEEdouble);
3895 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
3896 : Semantics(RHS.Semantics),
3897 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
3898 APFloat(RHS.Floats[1])}
3900 assert(Semantics == &semPPCDoubleDouble);
3903 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
3904 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
3905 RHS.Semantics = &semBogus;
3906 assert(Semantics == &semPPCDoubleDouble);
3909 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
3910 if (Semantics == RHS.Semantics && RHS.Floats) {
3911 Floats[0] = RHS.Floats[0];
3912 Floats[1] = RHS.Floats[1];
3913 } else if (this != &RHS) {
3914 this->~DoubleAPFloat();
3915 new (this) DoubleAPFloat(RHS);
3920 // "Software for Doubled-Precision Floating-Point Computations",
3921 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
3922 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
3923 const APFloat &c, const APFloat &cc,
3927 Status |= z.add(c, RM);
3928 if (!z.isFinite()) {
3929 if (!z.isInfinity()) {
3930 Floats[0] = std::move(z);
3931 Floats[1].makeZero(false);
3932 return (opStatus)Status;
3935 auto AComparedToC = a.compareAbsoluteValue(c);
3937 Status |= z.add(aa, RM);
3938 if (AComparedToC == APFloat::cmpGreaterThan) {
3939 // z = cc + aa + c + a;
3940 Status |= z.add(c, RM);
3941 Status |= z.add(a, RM);
3943 // z = cc + aa + a + c;
3944 Status |= z.add(a, RM);
3945 Status |= z.add(c, RM);
3947 if (!z.isFinite()) {
3948 Floats[0] = std::move(z);
3949 Floats[1].makeZero(false);
3950 return (opStatus)Status;
3954 Status |= zz.add(cc, RM);
3955 if (AComparedToC == APFloat::cmpGreaterThan) {
3956 // Floats[1] = a - z + c + zz;
3958 Status |= Floats[1].subtract(z, RM);
3959 Status |= Floats[1].add(c, RM);
3960 Status |= Floats[1].add(zz, RM);
3962 // Floats[1] = c - z + a + zz;
3964 Status |= Floats[1].subtract(z, RM);
3965 Status |= Floats[1].add(a, RM);
3966 Status |= Floats[1].add(zz, RM);
3971 Status |= q.subtract(z, RM);
3973 // zz = q + c + (a - (q + z)) + aa + cc;
3974 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
3976 Status |= zz.add(c, RM);
3977 Status |= q.add(z, RM);
3978 Status |= q.subtract(a, RM);
3980 Status |= zz.add(q, RM);
3981 Status |= zz.add(aa, RM);
3982 Status |= zz.add(cc, RM);
3983 if (zz.isZero() && !zz.isNegative()) {
3984 Floats[0] = std::move(z);
3985 Floats[1].makeZero(false);
3989 Status |= Floats[0].add(zz, RM);
3990 if (!Floats[0].isFinite()) {
3991 Floats[1].makeZero(false);
3992 return (opStatus)Status;
3994 Floats[1] = std::move(z);
3995 Status |= Floats[1].subtract(Floats[0], RM);
3996 Status |= Floats[1].add(zz, RM);
3998 return (opStatus)Status;
4001 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4002 const DoubleAPFloat &RHS,
4005 if (LHS.getCategory() == fcNaN) {
4009 if (RHS.getCategory() == fcNaN) {
4013 if (LHS.getCategory() == fcZero) {
4017 if (RHS.getCategory() == fcZero) {
4021 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4022 LHS.isNegative() != RHS.isNegative()) {
4023 Out.makeNaN(false, Out.isNegative(), nullptr);
4026 if (LHS.getCategory() == fcInfinity) {
4030 if (RHS.getCategory() == fcInfinity) {
4034 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4036 // These conversions will go away once PPCDoubleDoubleImpl goes away.
4037 // (PPCDoubleDoubleImpl, IEEEDouble) -> (IEEEDouble, IEEEDouble)
4038 APFloat A(semIEEEdouble,
4039 APInt(64, LHS.Floats[0].bitcastToAPInt().getRawData()[0])),
4041 C(semIEEEdouble, APInt(64, RHS.Floats[0].bitcastToAPInt().getRawData()[0])),
4043 assert(&AA.getSemantics() == &semIEEEdouble);
4044 assert(&CC.getSemantics() == &semIEEEdouble);
4045 Out.Floats[0] = APFloat(semIEEEdouble);
4046 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4048 auto Ret = Out.addImpl(A, AA, C, CC, RM);
4050 // (IEEEDouble, IEEEDouble) -> (PPCDoubleDoubleImpl, IEEEDouble)
4051 uint64_t Buffer[] = {Out.Floats[0].bitcastToAPInt().getRawData()[0],
4052 Out.Floats[1].bitcastToAPInt().getRawData()[0]};
4053 Out.Floats[0] = APFloat(semPPCDoubleDoubleImpl, APInt(128, 2, Buffer));
4057 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4059 return addWithSpecial(*this, RHS, *this, RM);
4062 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4065 auto Ret = add(RHS, RM);
4070 void DoubleAPFloat::changeSign() {
4071 Floats[0].changeSign();
4072 Floats[1].changeSign();
4076 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
4077 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
4078 if (Result != cmpEqual)
4080 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
4081 if (Result == cmpLessThan || Result == cmpGreaterThan) {
4082 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
4083 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
4084 if (Against && !RHSAgainst)
4086 if (!Against && RHSAgainst)
4087 return cmpGreaterThan;
4088 if (!Against && !RHSAgainst)
4090 if (Against && RHSAgainst)
4091 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
4096 APFloat::fltCategory DoubleAPFloat::getCategory() const {
4097 return Floats[0].getCategory();
4100 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
4102 void DoubleAPFloat::makeInf(bool Neg) {
4103 Floats[0].makeInf(Neg);
4104 Floats[1].makeZero(false);
4107 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
4108 Floats[0].makeNaN(SNaN, Neg, fill);
4109 Floats[1].makeZero(false);
4112 } // End detail namespace
4114 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
4115 if (usesLayout<IEEEFloat>(Semantics)) {
4116 new (&IEEE) IEEEFloat(std::move(F));
4119 if (usesLayout<DoubleAPFloat>(Semantics)) {
4121 DoubleAPFloat(Semantics, APFloat(std::move(F), F.getSemantics()),
4122 APFloat(semIEEEdouble));
4125 llvm_unreachable("Unexpected semantics");
4128 APFloat::opStatus APFloat::convertFromString(StringRef Str, roundingMode RM) {
4129 return getIEEE().convertFromString(Str, RM);
4132 hash_code hash_value(const APFloat &Arg) { return hash_value(Arg.getIEEE()); }
4134 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
4135 : APFloat(Semantics) {
4136 convertFromString(S, rmNearestTiesToEven);
4139 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
4140 roundingMode RM, bool *losesInfo) {
4141 if (&getSemantics() == &ToSemantics)
4143 if (usesLayout<IEEEFloat>(getSemantics()) &&
4144 usesLayout<IEEEFloat>(ToSemantics))
4145 return U.IEEE.convert(ToSemantics, RM, losesInfo);
4146 if (usesLayout<IEEEFloat>(getSemantics()) &&
4147 usesLayout<DoubleAPFloat>(ToSemantics)) {
4148 assert(&ToSemantics == &semPPCDoubleDouble);
4149 auto Ret = U.IEEE.convert(semPPCDoubleDoubleImpl, RM, losesInfo);
4150 *this = APFloat(DoubleAPFloat(semPPCDoubleDouble, std::move(*this),
4151 APFloat(semIEEEdouble)),
4155 if (usesLayout<DoubleAPFloat>(getSemantics()) &&
4156 usesLayout<IEEEFloat>(ToSemantics)) {
4157 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
4158 *this = APFloat(std::move(getIEEE()), ToSemantics);
4161 llvm_unreachable("Unexpected semantics");
4164 APFloat APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) {
4168 return APFloat(semIEEEhalf, APInt::getAllOnesValue(BitWidth));
4170 return APFloat(semIEEEsingle, APInt::getAllOnesValue(BitWidth));
4172 return APFloat(semIEEEdouble, APInt::getAllOnesValue(BitWidth));
4174 return APFloat(semX87DoubleExtended, APInt::getAllOnesValue(BitWidth));
4176 return APFloat(semIEEEquad, APInt::getAllOnesValue(BitWidth));
4178 llvm_unreachable("Unknown floating bit width");
4181 assert(BitWidth == 128);
4182 return APFloat(semPPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
4186 void APFloat::print(raw_ostream &OS) const {
4187 SmallVector<char, 16> Buffer;
4189 OS << Buffer << "\n";
4192 void APFloat::dump() const { print(dbgs()); }
4194 } // End llvm namespace