1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/FoldingSet.h"
19 #include "llvm/ADT/Hashing.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/ErrorHandling.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
29 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
31 if (usesLayout<IEEEFloat>(getSemantics())) \
32 return U.IEEE.METHOD_CALL; \
33 if (usesLayout<DoubleAPFloat>(getSemantics())) \
34 return U.Double.METHOD_CALL; \
35 llvm_unreachable("Unexpected semantics"); \
40 /// A macro used to combine two fcCategory enums into one key which can be used
41 /// in a switch statement to classify how the interaction of two APFloat's
42 /// categories affects an operation.
44 /// TODO: If clang source code is ever allowed to use constexpr in its own
45 /// codebase, change this into a static inline function.
46 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
48 /* Assumed in hexadecimal significand parsing, and conversion to
49 hexadecimal strings. */
50 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
53 /* Represents floating point arithmetic semantics. */
55 /* The largest E such that 2^E is representable; this matches the
56 definition of IEEE 754. */
57 APFloatBase::ExponentType maxExponent;
59 /* The smallest E such that 2^E is a normalized number; this
60 matches the definition of IEEE 754. */
61 APFloatBase::ExponentType minExponent;
63 /* Number of bits in the significand. This includes the integer
65 unsigned int precision;
67 /* Number of bits actually used in the semantics. */
68 unsigned int sizeInBits;
71 static const fltSemantics semIEEEhalf = {15, -14, 11, 16};
72 static const fltSemantics semIEEEsingle = {127, -126, 24, 32};
73 static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
74 static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};
75 static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
76 static const fltSemantics semBogus = {0, 0, 0, 0};
78 /* The IBM double-double semantics. Such a number consists of a pair of IEEE
79 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
80 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
81 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
82 to each other, and two 11-bit exponents.
84 Note: we need to make the value different from semBogus as otherwise
85 an unsafe optimization may collapse both values to a single address,
86 and we heavily rely on them having distinct addresses. */
87 static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0};
89 /* These are legacy semantics for the fallback, inaccrurate implementation of
90 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
91 operation. It's equivalent to having an IEEE number with consecutive 106
92 bits of mantissa and 11 bits of exponent.
94 It's not equivalent to IBM double-double. For example, a legit IBM
95 double-double, 1 + epsilon:
97 1 + epsilon = 1 + (1 >> 1076)
99 is not representable by a consecutive 106 bits of mantissa.
101 Currently, these semantics are used in the following way:
103 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
104 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
105 semPPCDoubleDoubleLegacy -> IEEE operations
107 We use bitcastToAPInt() to get the bit representation (in APInt) of the
108 underlying IEEEdouble, then use the APInt constructor to construct the
111 TODO: Implement all operations in semPPCDoubleDouble, and delete these
113 static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
116 const fltSemantics &APFloatBase::IEEEhalf() {
119 const fltSemantics &APFloatBase::IEEEsingle() {
120 return semIEEEsingle;
122 const fltSemantics &APFloatBase::IEEEdouble() {
123 return semIEEEdouble;
125 const fltSemantics &APFloatBase::IEEEquad() {
128 const fltSemantics &APFloatBase::x87DoubleExtended() {
129 return semX87DoubleExtended;
131 const fltSemantics &APFloatBase::Bogus() {
134 const fltSemantics &APFloatBase::PPCDoubleDouble() {
135 return semPPCDoubleDouble;
138 /* A tight upper bound on number of parts required to hold the value
141 power * 815 / (351 * integerPartWidth) + 1
143 However, whilst the result may require only this many parts,
144 because we are multiplying two values to get it, the
145 multiplication may require an extra part with the excess part
146 being zero (consider the trivial case of 1 * 1, tcFullMultiply
147 requires two parts to hold the single-part result). So we add an
148 extra one to guarantee enough space whilst multiplying. */
149 const unsigned int maxExponent = 16383;
150 const unsigned int maxPrecision = 113;
151 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
152 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
154 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
155 return semantics.precision;
157 APFloatBase::ExponentType
158 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
159 return semantics.maxExponent;
161 APFloatBase::ExponentType
162 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
163 return semantics.minExponent;
165 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
166 return semantics.sizeInBits;
169 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
170 return Sem.sizeInBits;
173 /* A bunch of private, handy routines. */
175 static inline unsigned int
176 partCountForBits(unsigned int bits)
178 return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
181 /* Returns 0U-9U. Return values >= 10U are not digits. */
182 static inline unsigned int
183 decDigitValue(unsigned int c)
188 /* Return the value of a decimal exponent of the form
191 If the exponent overflows, returns a large exponent with the
194 readExponent(StringRef::iterator begin, StringRef::iterator end)
197 unsigned int absExponent;
198 const unsigned int overlargeExponent = 24000; /* FIXME. */
199 StringRef::iterator p = begin;
201 assert(p != end && "Exponent has no digits");
203 isNegative = (*p == '-');
204 if (*p == '-' || *p == '+') {
206 assert(p != end && "Exponent has no digits");
209 absExponent = decDigitValue(*p++);
210 assert(absExponent < 10U && "Invalid character in exponent");
212 for (; p != end; ++p) {
215 value = decDigitValue(*p);
216 assert(value < 10U && "Invalid character in exponent");
218 value += absExponent * 10;
219 if (absExponent >= overlargeExponent) {
220 absExponent = overlargeExponent;
221 p = end; /* outwit assert below */
227 assert(p == end && "Invalid exponent in exponent");
230 return -(int) absExponent;
232 return (int) absExponent;
235 /* This is ugly and needs cleaning up, but I don't immediately see
236 how whilst remaining safe. */
238 totalExponent(StringRef::iterator p, StringRef::iterator end,
239 int exponentAdjustment)
241 int unsignedExponent;
242 bool negative, overflow;
245 assert(p != end && "Exponent has no digits");
247 negative = *p == '-';
248 if (*p == '-' || *p == '+') {
250 assert(p != end && "Exponent has no digits");
253 unsignedExponent = 0;
255 for (; p != end; ++p) {
258 value = decDigitValue(*p);
259 assert(value < 10U && "Invalid character in exponent");
261 unsignedExponent = unsignedExponent * 10 + value;
262 if (unsignedExponent > 32767) {
268 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
272 exponent = unsignedExponent;
274 exponent = -exponent;
275 exponent += exponentAdjustment;
276 if (exponent > 32767 || exponent < -32768)
281 exponent = negative ? -32768: 32767;
286 static StringRef::iterator
287 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
288 StringRef::iterator *dot)
290 StringRef::iterator p = begin;
292 while (p != end && *p == '0')
295 if (p != end && *p == '.') {
298 assert(end - begin != 1 && "Significand has no digits");
300 while (p != end && *p == '0')
307 /* Given a normal decimal floating point number of the form
311 where the decimal point and exponent are optional, fill out the
312 structure D. Exponent is appropriate if the significand is
313 treated as an integer, and normalizedExponent if the significand
314 is taken to have the decimal point after a single leading
317 If the value is zero, V->firstSigDigit points to a non-digit, and
318 the return exponent is zero.
321 const char *firstSigDigit;
322 const char *lastSigDigit;
324 int normalizedExponent;
328 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
331 StringRef::iterator dot = end;
332 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
334 D->firstSigDigit = p;
336 D->normalizedExponent = 0;
338 for (; p != end; ++p) {
340 assert(dot == end && "String contains multiple dots");
345 if (decDigitValue(*p) >= 10U)
350 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
351 assert(p != begin && "Significand has no digits");
352 assert((dot == end || p - begin != 1) && "Significand has no digits");
354 /* p points to the first non-digit in the string */
355 D->exponent = readExponent(p + 1, end);
357 /* Implied decimal point? */
362 /* If number is all zeroes accept any exponent. */
363 if (p != D->firstSigDigit) {
364 /* Drop insignificant trailing zeroes. */
369 while (p != begin && *p == '0');
370 while (p != begin && *p == '.');
373 /* Adjust the exponents for any decimal point. */
374 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
375 D->normalizedExponent = (D->exponent +
376 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
377 - (dot > D->firstSigDigit && dot < p)));
383 /* Return the trailing fraction of a hexadecimal number.
384 DIGITVALUE is the first hex digit of the fraction, P points to
387 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
388 unsigned int digitValue)
390 unsigned int hexDigit;
392 /* If the first trailing digit isn't 0 or 8 we can work out the
393 fraction immediately. */
395 return lfMoreThanHalf;
396 else if (digitValue < 8 && digitValue > 0)
397 return lfLessThanHalf;
399 // Otherwise we need to find the first non-zero digit.
400 while (p != end && (*p == '0' || *p == '.'))
403 assert(p != end && "Invalid trailing hexadecimal fraction!");
405 hexDigit = hexDigitValue(*p);
407 /* If we ran off the end it is exactly zero or one-half, otherwise
410 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
412 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
415 /* Return the fraction lost were a bignum truncated losing the least
416 significant BITS bits. */
418 lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
419 unsigned int partCount,
424 lsb = APInt::tcLSB(parts, partCount);
426 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
428 return lfExactlyZero;
430 return lfExactlyHalf;
431 if (bits <= partCount * APFloatBase::integerPartWidth &&
432 APInt::tcExtractBit(parts, bits - 1))
433 return lfMoreThanHalf;
435 return lfLessThanHalf;
438 /* Shift DST right BITS bits noting lost fraction. */
440 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
442 lostFraction lost_fraction;
444 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
446 APInt::tcShiftRight(dst, parts, bits);
448 return lost_fraction;
451 /* Combine the effect of two lost fractions. */
453 combineLostFractions(lostFraction moreSignificant,
454 lostFraction lessSignificant)
456 if (lessSignificant != lfExactlyZero) {
457 if (moreSignificant == lfExactlyZero)
458 moreSignificant = lfLessThanHalf;
459 else if (moreSignificant == lfExactlyHalf)
460 moreSignificant = lfMoreThanHalf;
463 return moreSignificant;
466 /* The error from the true value, in half-ulps, on multiplying two
467 floating point numbers, which differ from the value they
468 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
469 than the returned value.
471 See "How to Read Floating Point Numbers Accurately" by William D
474 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
476 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
478 if (HUerr1 + HUerr2 == 0)
479 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
481 return inexactMultiply + 2 * (HUerr1 + HUerr2);
484 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
485 when the least significant BITS are truncated. BITS cannot be
487 static APFloatBase::integerPart
488 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
490 unsigned int count, partBits;
491 APFloatBase::integerPart part, boundary;
496 count = bits / APFloatBase::integerPartWidth;
497 partBits = bits % APFloatBase::integerPartWidth + 1;
499 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
502 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
507 if (part - boundary <= boundary - part)
508 return part - boundary;
510 return boundary - part;
513 if (part == boundary) {
516 return ~(APFloatBase::integerPart) 0; /* A lot. */
519 } else if (part == boundary - 1) {
522 return ~(APFloatBase::integerPart) 0; /* A lot. */
527 return ~(APFloatBase::integerPart) 0; /* A lot. */
530 /* Place pow(5, power) in DST, and return the number of parts used.
531 DST must be at least one part larger than size of the answer. */
533 powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
534 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
535 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
536 pow5s[0] = 78125 * 5;
538 unsigned int partsCount[16] = { 1 };
539 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
541 assert(power <= maxExponent);
546 *p1 = firstEightPowers[power & 7];
552 for (unsigned int n = 0; power; power >>= 1, n++) {
557 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
559 pc = partsCount[n - 1];
560 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
562 if (pow5[pc - 1] == 0)
568 APFloatBase::integerPart *tmp;
570 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
572 if (p2[result - 1] == 0)
575 /* Now result is in p1 with partsCount parts and p2 is scratch
586 APInt::tcAssign(dst, p1, result);
591 /* Zero at the end to avoid modular arithmetic when adding one; used
592 when rounding up during hexadecimal output. */
593 static const char hexDigitsLower[] = "0123456789abcdef0";
594 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
595 static const char infinityL[] = "infinity";
596 static const char infinityU[] = "INFINITY";
597 static const char NaNL[] = "nan";
598 static const char NaNU[] = "NAN";
600 /* Write out an integerPart in hexadecimal, starting with the most
601 significant nibble. Write out exactly COUNT hexdigits, return
604 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
605 const char *hexDigitChars)
607 unsigned int result = count;
609 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
611 part >>= (APFloatBase::integerPartWidth - 4 * count);
613 dst[count] = hexDigitChars[part & 0xf];
620 /* Write out an unsigned decimal integer. */
622 writeUnsignedDecimal (char *dst, unsigned int n)
638 /* Write out a signed decimal integer. */
640 writeSignedDecimal (char *dst, int value)
644 dst = writeUnsignedDecimal(dst, -(unsigned) value);
646 dst = writeUnsignedDecimal(dst, value);
653 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
656 semantics = ourSemantics;
659 significand.parts = new integerPart[count];
662 void IEEEFloat::freeSignificand() {
664 delete [] significand.parts;
667 void IEEEFloat::assign(const IEEEFloat &rhs) {
668 assert(semantics == rhs.semantics);
671 category = rhs.category;
672 exponent = rhs.exponent;
673 if (isFiniteNonZero() || category == fcNaN)
674 copySignificand(rhs);
677 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
678 assert(isFiniteNonZero() || category == fcNaN);
679 assert(rhs.partCount() >= partCount());
681 APInt::tcAssign(significandParts(), rhs.significandParts(),
685 /* Make this number a NaN, with an arbitrary but deterministic value
686 for the significand. If double or longer, this is a signalling NaN,
687 which may not be ideal. If float, this is QNaN(0). */
688 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
692 integerPart *significand = significandParts();
693 unsigned numParts = partCount();
695 // Set the significand bits to the fill.
696 if (!fill || fill->getNumWords() < numParts)
697 APInt::tcSet(significand, 0, numParts);
699 APInt::tcAssign(significand, fill->getRawData(),
700 std::min(fill->getNumWords(), numParts));
702 // Zero out the excess bits of the significand.
703 unsigned bitsToPreserve = semantics->precision - 1;
704 unsigned part = bitsToPreserve / 64;
705 bitsToPreserve %= 64;
706 significand[part] &= ((1ULL << bitsToPreserve) - 1);
707 for (part++; part != numParts; ++part)
708 significand[part] = 0;
711 unsigned QNaNBit = semantics->precision - 2;
714 // We always have to clear the QNaN bit to make it an SNaN.
715 APInt::tcClearBit(significand, QNaNBit);
717 // If there are no bits set in the payload, we have to set
718 // *something* to make it a NaN instead of an infinity;
719 // conventionally, this is the next bit down from the QNaN bit.
720 if (APInt::tcIsZero(significand, numParts))
721 APInt::tcSetBit(significand, QNaNBit - 1);
723 // We always have to set the QNaN bit to make it a QNaN.
724 APInt::tcSetBit(significand, QNaNBit);
727 // For x87 extended precision, we want to make a NaN, not a
728 // pseudo-NaN. Maybe we should expose the ability to make
730 if (semantics == &semX87DoubleExtended)
731 APInt::tcSetBit(significand, QNaNBit + 1);
734 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
736 if (semantics != rhs.semantics) {
738 initialize(rhs.semantics);
746 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
749 semantics = rhs.semantics;
750 significand = rhs.significand;
751 exponent = rhs.exponent;
752 category = rhs.category;
755 rhs.semantics = &semBogus;
759 bool IEEEFloat::isDenormal() const {
760 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
761 (APInt::tcExtractBit(significandParts(),
762 semantics->precision - 1) == 0);
765 bool IEEEFloat::isSmallest() const {
766 // The smallest number by magnitude in our format will be the smallest
767 // denormal, i.e. the floating point number with exponent being minimum
768 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
769 return isFiniteNonZero() && exponent == semantics->minExponent &&
770 significandMSB() == 0;
773 bool IEEEFloat::isSignificandAllOnes() const {
774 // Test if the significand excluding the integral bit is all ones. This allows
775 // us to test for binade boundaries.
776 const integerPart *Parts = significandParts();
777 const unsigned PartCount = partCount();
778 for (unsigned i = 0; i < PartCount - 1; i++)
782 // Set the unused high bits to all ones when we compare.
783 const unsigned NumHighBits =
784 PartCount*integerPartWidth - semantics->precision + 1;
785 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
786 "fill than integerPartWidth");
787 const integerPart HighBitFill =
788 ~integerPart(0) << (integerPartWidth - NumHighBits);
789 if (~(Parts[PartCount - 1] | HighBitFill))
795 bool IEEEFloat::isSignificandAllZeros() const {
796 // Test if the significand excluding the integral bit is all zeros. This
797 // allows us to test for binade boundaries.
798 const integerPart *Parts = significandParts();
799 const unsigned PartCount = partCount();
801 for (unsigned i = 0; i < PartCount - 1; i++)
805 const unsigned NumHighBits =
806 PartCount*integerPartWidth - semantics->precision + 1;
807 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
808 "clear than integerPartWidth");
809 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
811 if (Parts[PartCount - 1] & HighBitMask)
817 bool IEEEFloat::isLargest() const {
818 // The largest number by magnitude in our format will be the floating point
819 // number with maximum exponent and with significand that is all ones.
820 return isFiniteNonZero() && exponent == semantics->maxExponent
821 && isSignificandAllOnes();
824 bool IEEEFloat::isInteger() const {
825 // This could be made more efficient; I'm going for obviously correct.
826 if (!isFinite()) return false;
827 IEEEFloat truncated = *this;
828 truncated.roundToIntegral(rmTowardZero);
829 return compare(truncated) == cmpEqual;
832 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
835 if (semantics != rhs.semantics ||
836 category != rhs.category ||
839 if (category==fcZero || category==fcInfinity)
842 if (isFiniteNonZero() && exponent != rhs.exponent)
845 return std::equal(significandParts(), significandParts() + partCount(),
846 rhs.significandParts());
849 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
850 initialize(&ourSemantics);
854 exponent = ourSemantics.precision - 1;
855 significandParts()[0] = value;
856 normalize(rmNearestTiesToEven, lfExactlyZero);
859 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
860 initialize(&ourSemantics);
865 // Delegate to the previous constructor, because later copy constructor may
866 // actually inspects category, which can't be garbage.
867 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
868 : IEEEFloat(ourSemantics) {}
870 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
871 initialize(rhs.semantics);
875 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
876 *this = std::move(rhs);
879 IEEEFloat::~IEEEFloat() { freeSignificand(); }
881 unsigned int IEEEFloat::partCount() const {
882 return partCountForBits(semantics->precision + 1);
885 const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
886 return const_cast<IEEEFloat *>(this)->significandParts();
889 IEEEFloat::integerPart *IEEEFloat::significandParts() {
891 return significand.parts;
893 return &significand.part;
896 void IEEEFloat::zeroSignificand() {
897 APInt::tcSet(significandParts(), 0, partCount());
900 /* Increment an fcNormal floating point number's significand. */
901 void IEEEFloat::incrementSignificand() {
904 carry = APInt::tcIncrement(significandParts(), partCount());
906 /* Our callers should never cause us to overflow. */
911 /* Add the significand of the RHS. Returns the carry flag. */
912 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
915 parts = significandParts();
917 assert(semantics == rhs.semantics);
918 assert(exponent == rhs.exponent);
920 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
923 /* Subtract the significand of the RHS with a borrow flag. Returns
925 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
926 integerPart borrow) {
929 parts = significandParts();
931 assert(semantics == rhs.semantics);
932 assert(exponent == rhs.exponent);
934 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
938 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
939 on to the full-precision result of the multiplication. Returns the
941 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
942 const IEEEFloat *addend) {
943 unsigned int omsb; // One, not zero, based MSB.
944 unsigned int partsCount, newPartsCount, precision;
945 integerPart *lhsSignificand;
946 integerPart scratch[4];
947 integerPart *fullSignificand;
948 lostFraction lost_fraction;
951 assert(semantics == rhs.semantics);
953 precision = semantics->precision;
955 // Allocate space for twice as many bits as the original significand, plus one
956 // extra bit for the addition to overflow into.
957 newPartsCount = partCountForBits(precision * 2 + 1);
959 if (newPartsCount > 4)
960 fullSignificand = new integerPart[newPartsCount];
962 fullSignificand = scratch;
964 lhsSignificand = significandParts();
965 partsCount = partCount();
967 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
968 rhs.significandParts(), partsCount, partsCount);
970 lost_fraction = lfExactlyZero;
971 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
972 exponent += rhs.exponent;
974 // Assume the operands involved in the multiplication are single-precision
975 // FP, and the two multiplicants are:
976 // *this = a23 . a22 ... a0 * 2^e1
977 // rhs = b23 . b22 ... b0 * 2^e2
978 // the result of multiplication is:
979 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
980 // Note that there are three significant bits at the left-hand side of the
981 // radix point: two for the multiplication, and an overflow bit for the
982 // addition (that will always be zero at this point). Move the radix point
983 // toward left by two bits, and adjust exponent accordingly.
986 if (addend && addend->isNonZero()) {
987 // The intermediate result of the multiplication has "2 * precision"
988 // signicant bit; adjust the addend to be consistent with mul result.
990 Significand savedSignificand = significand;
991 const fltSemantics *savedSemantics = semantics;
992 fltSemantics extendedSemantics;
994 unsigned int extendedPrecision;
996 // Normalize our MSB to one below the top bit to allow for overflow.
997 extendedPrecision = 2 * precision + 1;
998 if (omsb != extendedPrecision - 1) {
999 assert(extendedPrecision > omsb);
1000 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1001 (extendedPrecision - 1) - omsb);
1002 exponent -= (extendedPrecision - 1) - omsb;
1005 /* Create new semantics. */
1006 extendedSemantics = *semantics;
1007 extendedSemantics.precision = extendedPrecision;
1009 if (newPartsCount == 1)
1010 significand.part = fullSignificand[0];
1012 significand.parts = fullSignificand;
1013 semantics = &extendedSemantics;
1015 IEEEFloat extendedAddend(*addend);
1016 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1017 assert(status == opOK);
1020 // Shift the significand of the addend right by one bit. This guarantees
1021 // that the high bit of the significand is zero (same as fullSignificand),
1022 // so the addition will overflow (if it does overflow at all) into the top bit.
1023 lost_fraction = extendedAddend.shiftSignificandRight(1);
1024 assert(lost_fraction == lfExactlyZero &&
1025 "Lost precision while shifting addend for fused-multiply-add.");
1027 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1029 /* Restore our state. */
1030 if (newPartsCount == 1)
1031 fullSignificand[0] = significand.part;
1032 significand = savedSignificand;
1033 semantics = savedSemantics;
1035 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1038 // Convert the result having "2 * precision" significant-bits back to the one
1039 // having "precision" significant-bits. First, move the radix point from
1040 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1041 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1042 exponent -= precision + 1;
1044 // In case MSB resides at the left-hand side of radix point, shift the
1045 // mantissa right by some amount to make sure the MSB reside right before
1046 // the radix point (i.e. "MSB . rest-significant-bits").
1048 // Note that the result is not normalized when "omsb < precision". So, the
1049 // caller needs to call IEEEFloat::normalize() if normalized value is
1051 if (omsb > precision) {
1052 unsigned int bits, significantParts;
1055 bits = omsb - precision;
1056 significantParts = partCountForBits(omsb);
1057 lf = shiftRight(fullSignificand, significantParts, bits);
1058 lost_fraction = combineLostFractions(lf, lost_fraction);
1062 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1064 if (newPartsCount > 4)
1065 delete [] fullSignificand;
1067 return lost_fraction;
1070 /* Multiply the significands of LHS and RHS to DST. */
1071 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1072 unsigned int bit, i, partsCount;
1073 const integerPart *rhsSignificand;
1074 integerPart *lhsSignificand, *dividend, *divisor;
1075 integerPart scratch[4];
1076 lostFraction lost_fraction;
1078 assert(semantics == rhs.semantics);
1080 lhsSignificand = significandParts();
1081 rhsSignificand = rhs.significandParts();
1082 partsCount = partCount();
1085 dividend = new integerPart[partsCount * 2];
1089 divisor = dividend + partsCount;
1091 /* Copy the dividend and divisor as they will be modified in-place. */
1092 for (i = 0; i < partsCount; i++) {
1093 dividend[i] = lhsSignificand[i];
1094 divisor[i] = rhsSignificand[i];
1095 lhsSignificand[i] = 0;
1098 exponent -= rhs.exponent;
1100 unsigned int precision = semantics->precision;
1102 /* Normalize the divisor. */
1103 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1106 APInt::tcShiftLeft(divisor, partsCount, bit);
1109 /* Normalize the dividend. */
1110 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1113 APInt::tcShiftLeft(dividend, partsCount, bit);
1116 /* Ensure the dividend >= divisor initially for the loop below.
1117 Incidentally, this means that the division loop below is
1118 guaranteed to set the integer bit to one. */
1119 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1121 APInt::tcShiftLeft(dividend, partsCount, 1);
1122 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1125 /* Long division. */
1126 for (bit = precision; bit; bit -= 1) {
1127 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1128 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1129 APInt::tcSetBit(lhsSignificand, bit - 1);
1132 APInt::tcShiftLeft(dividend, partsCount, 1);
1135 /* Figure out the lost fraction. */
1136 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1139 lost_fraction = lfMoreThanHalf;
1141 lost_fraction = lfExactlyHalf;
1142 else if (APInt::tcIsZero(dividend, partsCount))
1143 lost_fraction = lfExactlyZero;
1145 lost_fraction = lfLessThanHalf;
1150 return lost_fraction;
1153 unsigned int IEEEFloat::significandMSB() const {
1154 return APInt::tcMSB(significandParts(), partCount());
1157 unsigned int IEEEFloat::significandLSB() const {
1158 return APInt::tcLSB(significandParts(), partCount());
1161 /* Note that a zero result is NOT normalized to fcZero. */
1162 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1163 /* Our exponent should not overflow. */
1164 assert((ExponentType) (exponent + bits) >= exponent);
1168 return shiftRight(significandParts(), partCount(), bits);
1171 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1172 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1173 assert(bits < semantics->precision);
1176 unsigned int partsCount = partCount();
1178 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1181 assert(!APInt::tcIsZero(significandParts(), partsCount));
1185 IEEEFloat::cmpResult
1186 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1189 assert(semantics == rhs.semantics);
1190 assert(isFiniteNonZero());
1191 assert(rhs.isFiniteNonZero());
1193 compare = exponent - rhs.exponent;
1195 /* If exponents are equal, do an unsigned bignum comparison of the
1198 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1202 return cmpGreaterThan;
1203 else if (compare < 0)
1209 /* Handle overflow. Sign is preserved. We either become infinity or
1210 the largest finite number. */
1211 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1213 if (rounding_mode == rmNearestTiesToEven ||
1214 rounding_mode == rmNearestTiesToAway ||
1215 (rounding_mode == rmTowardPositive && !sign) ||
1216 (rounding_mode == rmTowardNegative && sign)) {
1217 category = fcInfinity;
1218 return (opStatus) (opOverflow | opInexact);
1221 /* Otherwise we become the largest finite number. */
1222 category = fcNormal;
1223 exponent = semantics->maxExponent;
1224 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1225 semantics->precision);
1230 /* Returns TRUE if, when truncating the current number, with BIT the
1231 new LSB, with the given lost fraction and rounding mode, the result
1232 would need to be rounded away from zero (i.e., by increasing the
1233 signficand). This routine must work for fcZero of both signs, and
1234 fcNormal numbers. */
1235 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1236 lostFraction lost_fraction,
1237 unsigned int bit) const {
1238 /* NaNs and infinities should not have lost fractions. */
1239 assert(isFiniteNonZero() || category == fcZero);
1241 /* Current callers never pass this so we don't handle it. */
1242 assert(lost_fraction != lfExactlyZero);
1244 switch (rounding_mode) {
1245 case rmNearestTiesToAway:
1246 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1248 case rmNearestTiesToEven:
1249 if (lost_fraction == lfMoreThanHalf)
1252 /* Our zeroes don't have a significand to test. */
1253 if (lost_fraction == lfExactlyHalf && category != fcZero)
1254 return APInt::tcExtractBit(significandParts(), bit);
1261 case rmTowardPositive:
1264 case rmTowardNegative:
1267 llvm_unreachable("Invalid rounding mode found");
1270 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1271 lostFraction lost_fraction) {
1272 unsigned int omsb; /* One, not zero, based MSB. */
1275 if (!isFiniteNonZero())
1278 /* Before rounding normalize the exponent of fcNormal numbers. */
1279 omsb = significandMSB() + 1;
1282 /* OMSB is numbered from 1. We want to place it in the integer
1283 bit numbered PRECISION if possible, with a compensating change in
1285 exponentChange = omsb - semantics->precision;
1287 /* If the resulting exponent is too high, overflow according to
1288 the rounding mode. */
1289 if (exponent + exponentChange > semantics->maxExponent)
1290 return handleOverflow(rounding_mode);
1292 /* Subnormal numbers have exponent minExponent, and their MSB
1293 is forced based on that. */
1294 if (exponent + exponentChange < semantics->minExponent)
1295 exponentChange = semantics->minExponent - exponent;
1297 /* Shifting left is easy as we don't lose precision. */
1298 if (exponentChange < 0) {
1299 assert(lost_fraction == lfExactlyZero);
1301 shiftSignificandLeft(-exponentChange);
1306 if (exponentChange > 0) {
1309 /* Shift right and capture any new lost fraction. */
1310 lf = shiftSignificandRight(exponentChange);
1312 lost_fraction = combineLostFractions(lf, lost_fraction);
1314 /* Keep OMSB up-to-date. */
1315 if (omsb > (unsigned) exponentChange)
1316 omsb -= exponentChange;
1322 /* Now round the number according to rounding_mode given the lost
1325 /* As specified in IEEE 754, since we do not trap we do not report
1326 underflow for exact results. */
1327 if (lost_fraction == lfExactlyZero) {
1328 /* Canonicalize zeroes. */
1335 /* Increment the significand if we're rounding away from zero. */
1336 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1338 exponent = semantics->minExponent;
1340 incrementSignificand();
1341 omsb = significandMSB() + 1;
1343 /* Did the significand increment overflow? */
1344 if (omsb == (unsigned) semantics->precision + 1) {
1345 /* Renormalize by incrementing the exponent and shifting our
1346 significand right one. However if we already have the
1347 maximum exponent we overflow to infinity. */
1348 if (exponent == semantics->maxExponent) {
1349 category = fcInfinity;
1351 return (opStatus) (opOverflow | opInexact);
1354 shiftSignificandRight(1);
1360 /* The normal case - we were and are not denormal, and any
1361 significand increment above didn't overflow. */
1362 if (omsb == semantics->precision)
1365 /* We have a non-zero denormal. */
1366 assert(omsb < semantics->precision);
1368 /* Canonicalize zeroes. */
1372 /* The fcZero case is a denormal that underflowed to zero. */
1373 return (opStatus) (opUnderflow | opInexact);
1376 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1378 switch (PackCategoriesIntoKey(category, rhs.category)) {
1380 llvm_unreachable(nullptr);
1382 case PackCategoriesIntoKey(fcNaN, fcZero):
1383 case PackCategoriesIntoKey(fcNaN, fcNormal):
1384 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1385 case PackCategoriesIntoKey(fcNaN, fcNaN):
1386 case PackCategoriesIntoKey(fcNormal, fcZero):
1387 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1388 case PackCategoriesIntoKey(fcInfinity, fcZero):
1391 case PackCategoriesIntoKey(fcZero, fcNaN):
1392 case PackCategoriesIntoKey(fcNormal, fcNaN):
1393 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1394 // We need to be sure to flip the sign here for subtraction because we
1395 // don't have a separate negate operation so -NaN becomes 0 - NaN here.
1396 sign = rhs.sign ^ subtract;
1398 copySignificand(rhs);
1401 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1402 case PackCategoriesIntoKey(fcZero, fcInfinity):
1403 category = fcInfinity;
1404 sign = rhs.sign ^ subtract;
1407 case PackCategoriesIntoKey(fcZero, fcNormal):
1409 sign = rhs.sign ^ subtract;
1412 case PackCategoriesIntoKey(fcZero, fcZero):
1413 /* Sign depends on rounding mode; handled by caller. */
1416 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1417 /* Differently signed infinities can only be validly
1419 if (((sign ^ rhs.sign)!=0) != subtract) {
1426 case PackCategoriesIntoKey(fcNormal, fcNormal):
1431 /* Add or subtract two normal numbers. */
1432 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1435 lostFraction lost_fraction;
1438 /* Determine if the operation on the absolute values is effectively
1439 an addition or subtraction. */
1440 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1442 /* Are we bigger exponent-wise than the RHS? */
1443 bits = exponent - rhs.exponent;
1445 /* Subtraction is more subtle than one might naively expect. */
1447 IEEEFloat temp_rhs(rhs);
1451 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1452 lost_fraction = lfExactlyZero;
1453 } else if (bits > 0) {
1454 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1455 shiftSignificandLeft(1);
1458 lost_fraction = shiftSignificandRight(-bits - 1);
1459 temp_rhs.shiftSignificandLeft(1);
1464 carry = temp_rhs.subtractSignificand
1465 (*this, lost_fraction != lfExactlyZero);
1466 copySignificand(temp_rhs);
1469 carry = subtractSignificand
1470 (temp_rhs, lost_fraction != lfExactlyZero);
1473 /* Invert the lost fraction - it was on the RHS and
1475 if (lost_fraction == lfLessThanHalf)
1476 lost_fraction = lfMoreThanHalf;
1477 else if (lost_fraction == lfMoreThanHalf)
1478 lost_fraction = lfLessThanHalf;
1480 /* The code above is intended to ensure that no borrow is
1486 IEEEFloat temp_rhs(rhs);
1488 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1489 carry = addSignificand(temp_rhs);
1491 lost_fraction = shiftSignificandRight(-bits);
1492 carry = addSignificand(rhs);
1495 /* We have a guard bit; generating a carry cannot happen. */
1500 return lost_fraction;
1503 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1504 switch (PackCategoriesIntoKey(category, rhs.category)) {
1506 llvm_unreachable(nullptr);
1508 case PackCategoriesIntoKey(fcNaN, fcZero):
1509 case PackCategoriesIntoKey(fcNaN, fcNormal):
1510 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1511 case PackCategoriesIntoKey(fcNaN, fcNaN):
1515 case PackCategoriesIntoKey(fcZero, fcNaN):
1516 case PackCategoriesIntoKey(fcNormal, fcNaN):
1517 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1520 copySignificand(rhs);
1523 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1524 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1525 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1526 category = fcInfinity;
1529 case PackCategoriesIntoKey(fcZero, fcNormal):
1530 case PackCategoriesIntoKey(fcNormal, fcZero):
1531 case PackCategoriesIntoKey(fcZero, fcZero):
1535 case PackCategoriesIntoKey(fcZero, fcInfinity):
1536 case PackCategoriesIntoKey(fcInfinity, fcZero):
1540 case PackCategoriesIntoKey(fcNormal, fcNormal):
1545 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1546 switch (PackCategoriesIntoKey(category, rhs.category)) {
1548 llvm_unreachable(nullptr);
1550 case PackCategoriesIntoKey(fcZero, fcNaN):
1551 case PackCategoriesIntoKey(fcNormal, fcNaN):
1552 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1554 copySignificand(rhs);
1556 case PackCategoriesIntoKey(fcNaN, fcZero):
1557 case PackCategoriesIntoKey(fcNaN, fcNormal):
1558 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1559 case PackCategoriesIntoKey(fcNaN, fcNaN):
1562 case PackCategoriesIntoKey(fcInfinity, fcZero):
1563 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1564 case PackCategoriesIntoKey(fcZero, fcInfinity):
1565 case PackCategoriesIntoKey(fcZero, fcNormal):
1568 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1572 case PackCategoriesIntoKey(fcNormal, fcZero):
1573 category = fcInfinity;
1576 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1577 case PackCategoriesIntoKey(fcZero, fcZero):
1581 case PackCategoriesIntoKey(fcNormal, fcNormal):
1586 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1587 switch (PackCategoriesIntoKey(category, rhs.category)) {
1589 llvm_unreachable(nullptr);
1591 case PackCategoriesIntoKey(fcNaN, fcZero):
1592 case PackCategoriesIntoKey(fcNaN, fcNormal):
1593 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1594 case PackCategoriesIntoKey(fcNaN, fcNaN):
1595 case PackCategoriesIntoKey(fcZero, fcInfinity):
1596 case PackCategoriesIntoKey(fcZero, fcNormal):
1597 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1600 case PackCategoriesIntoKey(fcZero, fcNaN):
1601 case PackCategoriesIntoKey(fcNormal, fcNaN):
1602 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1605 copySignificand(rhs);
1608 case PackCategoriesIntoKey(fcNormal, fcZero):
1609 case PackCategoriesIntoKey(fcInfinity, fcZero):
1610 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1611 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1612 case PackCategoriesIntoKey(fcZero, fcZero):
1616 case PackCategoriesIntoKey(fcNormal, fcNormal):
1622 void IEEEFloat::changeSign() {
1623 /* Look mummy, this one's easy. */
1627 /* Normalized addition or subtraction. */
1628 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
1629 roundingMode rounding_mode,
1633 fs = addOrSubtractSpecials(rhs, subtract);
1635 /* This return code means it was not a simple case. */
1636 if (fs == opDivByZero) {
1637 lostFraction lost_fraction;
1639 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1640 fs = normalize(rounding_mode, lost_fraction);
1642 /* Can only be zero if we lost no fraction. */
1643 assert(category != fcZero || lost_fraction == lfExactlyZero);
1646 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1647 positive zero unless rounding to minus infinity, except that
1648 adding two like-signed zeroes gives that zero. */
1649 if (category == fcZero) {
1650 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1651 sign = (rounding_mode == rmTowardNegative);
1657 /* Normalized addition. */
1658 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
1659 roundingMode rounding_mode) {
1660 return addOrSubtract(rhs, rounding_mode, false);
1663 /* Normalized subtraction. */
1664 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
1665 roundingMode rounding_mode) {
1666 return addOrSubtract(rhs, rounding_mode, true);
1669 /* Normalized multiply. */
1670 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
1671 roundingMode rounding_mode) {
1675 fs = multiplySpecials(rhs);
1677 if (isFiniteNonZero()) {
1678 lostFraction lost_fraction = multiplySignificand(rhs, nullptr);
1679 fs = normalize(rounding_mode, lost_fraction);
1680 if (lost_fraction != lfExactlyZero)
1681 fs = (opStatus) (fs | opInexact);
1687 /* Normalized divide. */
1688 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
1689 roundingMode rounding_mode) {
1693 fs = divideSpecials(rhs);
1695 if (isFiniteNonZero()) {
1696 lostFraction lost_fraction = divideSignificand(rhs);
1697 fs = normalize(rounding_mode, lost_fraction);
1698 if (lost_fraction != lfExactlyZero)
1699 fs = (opStatus) (fs | opInexact);
1705 /* Normalized remainder. This is not currently correct in all cases. */
1706 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
1708 IEEEFloat V = *this;
1709 unsigned int origSign = sign;
1711 fs = V.divide(rhs, rmNearestTiesToEven);
1712 if (fs == opDivByZero)
1715 int parts = partCount();
1716 integerPart *x = new integerPart[parts];
1718 fs = V.convertToInteger(makeMutableArrayRef(x, parts),
1719 parts * integerPartWidth, true, rmNearestTiesToEven,
1721 if (fs == opInvalidOp) {
1726 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1727 rmNearestTiesToEven);
1728 assert(fs==opOK); // should always work
1730 fs = V.multiply(rhs, rmNearestTiesToEven);
1731 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1733 fs = subtract(V, rmNearestTiesToEven);
1734 assert(fs==opOK || fs==opInexact); // likewise
1737 sign = origSign; // IEEE754 requires this
1742 /* Normalized llvm frem (C fmod). */
1743 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
1745 fs = modSpecials(rhs);
1747 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
1748 compareAbsoluteValue(rhs) != cmpLessThan) {
1749 IEEEFloat V = scalbn(rhs, ilogb(*this) - ilogb(rhs), rmNearestTiesToEven);
1750 if (compareAbsoluteValue(V) == cmpLessThan)
1751 V = scalbn(V, -1, rmNearestTiesToEven);
1754 fs = subtract(V, rmNearestTiesToEven);
1760 /* Normalized fused-multiply-add. */
1761 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
1762 const IEEEFloat &addend,
1763 roundingMode rounding_mode) {
1766 /* Post-multiplication sign, before addition. */
1767 sign ^= multiplicand.sign;
1769 /* If and only if all arguments are normal do we need to do an
1770 extended-precision calculation. */
1771 if (isFiniteNonZero() &&
1772 multiplicand.isFiniteNonZero() &&
1773 addend.isFinite()) {
1774 lostFraction lost_fraction;
1776 lost_fraction = multiplySignificand(multiplicand, &addend);
1777 fs = normalize(rounding_mode, lost_fraction);
1778 if (lost_fraction != lfExactlyZero)
1779 fs = (opStatus) (fs | opInexact);
1781 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1782 positive zero unless rounding to minus infinity, except that
1783 adding two like-signed zeroes gives that zero. */
1784 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
1785 sign = (rounding_mode == rmTowardNegative);
1787 fs = multiplySpecials(multiplicand);
1789 /* FS can only be opOK or opInvalidOp. There is no more work
1790 to do in the latter case. The IEEE-754R standard says it is
1791 implementation-defined in this case whether, if ADDEND is a
1792 quiet NaN, we raise invalid op; this implementation does so.
1794 If we need to do the addition we can do so with normal
1797 fs = addOrSubtract(addend, rounding_mode, false);
1803 /* Rounding-mode corrrect round to integral value. */
1804 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
1807 // If the exponent is large enough, we know that this value is already
1808 // integral, and the arithmetic below would potentially cause it to saturate
1809 // to +/-Inf. Bail out early instead.
1810 if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics))
1813 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1814 // precision of our format, and then subtract it back off again. The choice
1815 // of rounding modes for the addition/subtraction determines the rounding mode
1816 // for our integral rounding as well.
1817 // NOTE: When the input value is negative, we do subtraction followed by
1818 // addition instead.
1819 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1820 IntegerConstant <<= semanticsPrecision(*semantics)-1;
1821 IEEEFloat MagicConstant(*semantics);
1822 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1823 rmNearestTiesToEven);
1824 MagicConstant.sign = sign;
1829 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1830 bool inputSign = isNegative();
1832 fs = add(MagicConstant, rounding_mode);
1833 if (fs != opOK && fs != opInexact)
1836 fs = subtract(MagicConstant, rounding_mode);
1838 // Restore the input sign.
1839 if (inputSign != isNegative())
1846 /* Comparison requires normalized numbers. */
1847 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
1850 assert(semantics == rhs.semantics);
1852 switch (PackCategoriesIntoKey(category, rhs.category)) {
1854 llvm_unreachable(nullptr);
1856 case PackCategoriesIntoKey(fcNaN, fcZero):
1857 case PackCategoriesIntoKey(fcNaN, fcNormal):
1858 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1859 case PackCategoriesIntoKey(fcNaN, fcNaN):
1860 case PackCategoriesIntoKey(fcZero, fcNaN):
1861 case PackCategoriesIntoKey(fcNormal, fcNaN):
1862 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1863 return cmpUnordered;
1865 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1866 case PackCategoriesIntoKey(fcInfinity, fcZero):
1867 case PackCategoriesIntoKey(fcNormal, fcZero):
1871 return cmpGreaterThan;
1873 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1874 case PackCategoriesIntoKey(fcZero, fcInfinity):
1875 case PackCategoriesIntoKey(fcZero, fcNormal):
1877 return cmpGreaterThan;
1881 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1882 if (sign == rhs.sign)
1887 return cmpGreaterThan;
1889 case PackCategoriesIntoKey(fcZero, fcZero):
1892 case PackCategoriesIntoKey(fcNormal, fcNormal):
1896 /* Two normal numbers. Do they have the same sign? */
1897 if (sign != rhs.sign) {
1899 result = cmpLessThan;
1901 result = cmpGreaterThan;
1903 /* Compare absolute values; invert result if negative. */
1904 result = compareAbsoluteValue(rhs);
1907 if (result == cmpLessThan)
1908 result = cmpGreaterThan;
1909 else if (result == cmpGreaterThan)
1910 result = cmpLessThan;
1917 /// IEEEFloat::convert - convert a value of one floating point type to another.
1918 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1919 /// records whether the transformation lost information, i.e. whether
1920 /// converting the result back to the original type will produce the
1921 /// original value (this is almost the same as return value==fsOK, but there
1922 /// are edge cases where this is not so).
1924 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
1925 roundingMode rounding_mode,
1927 lostFraction lostFraction;
1928 unsigned int newPartCount, oldPartCount;
1931 const fltSemantics &fromSemantics = *semantics;
1933 lostFraction = lfExactlyZero;
1934 newPartCount = partCountForBits(toSemantics.precision + 1);
1935 oldPartCount = partCount();
1936 shift = toSemantics.precision - fromSemantics.precision;
1938 bool X86SpecialNan = false;
1939 if (&fromSemantics == &semX87DoubleExtended &&
1940 &toSemantics != &semX87DoubleExtended && category == fcNaN &&
1941 (!(*significandParts() & 0x8000000000000000ULL) ||
1942 !(*significandParts() & 0x4000000000000000ULL))) {
1943 // x86 has some unusual NaNs which cannot be represented in any other
1944 // format; note them here.
1945 X86SpecialNan = true;
1948 // If this is a truncation of a denormal number, and the target semantics
1949 // has larger exponent range than the source semantics (this can happen
1950 // when truncating from PowerPC double-double to double format), the
1951 // right shift could lose result mantissa bits. Adjust exponent instead
1952 // of performing excessive shift.
1953 if (shift < 0 && isFiniteNonZero()) {
1954 int exponentChange = significandMSB() + 1 - fromSemantics.precision;
1955 if (exponent + exponentChange < toSemantics.minExponent)
1956 exponentChange = toSemantics.minExponent - exponent;
1957 if (exponentChange < shift)
1958 exponentChange = shift;
1959 if (exponentChange < 0) {
1960 shift -= exponentChange;
1961 exponent += exponentChange;
1965 // If this is a truncation, perform the shift before we narrow the storage.
1966 if (shift < 0 && (isFiniteNonZero() || category==fcNaN))
1967 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
1969 // Fix the storage so it can hold to new value.
1970 if (newPartCount > oldPartCount) {
1971 // The new type requires more storage; make it available.
1972 integerPart *newParts;
1973 newParts = new integerPart[newPartCount];
1974 APInt::tcSet(newParts, 0, newPartCount);
1975 if (isFiniteNonZero() || category==fcNaN)
1976 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1978 significand.parts = newParts;
1979 } else if (newPartCount == 1 && oldPartCount != 1) {
1980 // Switch to built-in storage for a single part.
1981 integerPart newPart = 0;
1982 if (isFiniteNonZero() || category==fcNaN)
1983 newPart = significandParts()[0];
1985 significand.part = newPart;
1988 // Now that we have the right storage, switch the semantics.
1989 semantics = &toSemantics;
1991 // If this is an extension, perform the shift now that the storage is
1993 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
1994 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1996 if (isFiniteNonZero()) {
1997 fs = normalize(rounding_mode, lostFraction);
1998 *losesInfo = (fs != opOK);
1999 } else if (category == fcNaN) {
2000 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2002 // For x87 extended precision, we want to make a NaN, not a special NaN if
2003 // the input wasn't special either.
2004 if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2005 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2007 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
2008 // does not give you back the same bits. This is dubious, and we
2009 // don't currently do it. You're really supposed to get
2010 // an invalid operation signal at runtime, but nobody does that.
2020 /* Convert a floating point number to an integer according to the
2021 rounding mode. If the rounded integer value is out of range this
2022 returns an invalid operation exception and the contents of the
2023 destination parts are unspecified. If the rounded value is in
2024 range but the floating point number is not the exact integer, the C
2025 standard doesn't require an inexact exception to be raised. IEEE
2026 854 does require it so we do that.
2028 Note that for conversions to integer type the C standard requires
2029 round-to-zero to always be used. */
2030 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2031 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2032 roundingMode rounding_mode, bool *isExact) const {
2033 lostFraction lost_fraction;
2034 const integerPart *src;
2035 unsigned int dstPartsCount, truncatedBits;
2039 /* Handle the three special cases first. */
2040 if (category == fcInfinity || category == fcNaN)
2043 dstPartsCount = partCountForBits(width);
2044 assert(dstPartsCount <= parts.size() && "Integer too big");
2046 if (category == fcZero) {
2047 APInt::tcSet(parts.data(), 0, dstPartsCount);
2048 // Negative zero can't be represented as an int.
2053 src = significandParts();
2055 /* Step 1: place our absolute value, with any fraction truncated, in
2058 /* Our absolute value is less than one; truncate everything. */
2059 APInt::tcSet(parts.data(), 0, dstPartsCount);
2060 /* For exponent -1 the integer bit represents .5, look at that.
2061 For smaller exponents leftmost truncated bit is 0. */
2062 truncatedBits = semantics->precision -1U - exponent;
2064 /* We want the most significant (exponent + 1) bits; the rest are
2066 unsigned int bits = exponent + 1U;
2068 /* Hopelessly large in magnitude? */
2072 if (bits < semantics->precision) {
2073 /* We truncate (semantics->precision - bits) bits. */
2074 truncatedBits = semantics->precision - bits;
2075 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2077 /* We want at least as many bits as are available. */
2078 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2080 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2081 bits - semantics->precision);
2086 /* Step 2: work out any lost fraction, and increment the absolute
2087 value if we would round away from zero. */
2088 if (truncatedBits) {
2089 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2091 if (lost_fraction != lfExactlyZero &&
2092 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2093 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2094 return opInvalidOp; /* Overflow. */
2097 lost_fraction = lfExactlyZero;
2100 /* Step 3: check if we fit in the destination. */
2101 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2105 /* Negative numbers cannot be represented as unsigned. */
2109 /* It takes omsb bits to represent the unsigned integer value.
2110 We lose a bit for the sign, but care is needed as the
2111 maximally negative integer is a special case. */
2112 if (omsb == width &&
2113 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2116 /* This case can happen because of rounding. */
2121 APInt::tcNegate (parts.data(), dstPartsCount);
2123 if (omsb >= width + !isSigned)
2127 if (lost_fraction == lfExactlyZero) {
2134 /* Same as convertToSignExtendedInteger, except we provide
2135 deterministic values in case of an invalid operation exception,
2136 namely zero for NaNs and the minimal or maximal value respectively
2137 for underflow or overflow.
2138 The *isExact output tells whether the result is exact, in the sense
2139 that converting it back to the original floating point type produces
2140 the original value. This is almost equivalent to result==opOK,
2141 except for negative zeroes.
2144 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2145 unsigned int width, bool isSigned,
2146 roundingMode rounding_mode, bool *isExact) const {
2149 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2152 if (fs == opInvalidOp) {
2153 unsigned int bits, dstPartsCount;
2155 dstPartsCount = partCountForBits(width);
2156 assert(dstPartsCount <= parts.size() && "Integer too big");
2158 if (category == fcNaN)
2163 bits = width - isSigned;
2165 APInt::tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2166 if (sign && isSigned)
2167 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2173 /* Convert an unsigned integer SRC to a floating point number,
2174 rounding according to ROUNDING_MODE. The sign of the floating
2175 point number is not modified. */
2176 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2177 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2178 unsigned int omsb, precision, dstCount;
2180 lostFraction lost_fraction;
2182 category = fcNormal;
2183 omsb = APInt::tcMSB(src, srcCount) + 1;
2184 dst = significandParts();
2185 dstCount = partCount();
2186 precision = semantics->precision;
2188 /* We want the most significant PRECISION bits of SRC. There may not
2189 be that many; extract what we can. */
2190 if (precision <= omsb) {
2191 exponent = omsb - 1;
2192 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2194 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2196 exponent = precision - 1;
2197 lost_fraction = lfExactlyZero;
2198 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2201 return normalize(rounding_mode, lost_fraction);
2204 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2205 roundingMode rounding_mode) {
2206 unsigned int partCount = Val.getNumWords();
2210 if (isSigned && api.isNegative()) {
2215 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2218 /* Convert a two's complement integer SRC to a floating point number,
2219 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2220 integer is signed, in which case it must be sign-extended. */
2222 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2223 unsigned int srcCount, bool isSigned,
2224 roundingMode rounding_mode) {
2228 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2231 /* If we're signed and negative negate a copy. */
2233 copy = new integerPart[srcCount];
2234 APInt::tcAssign(copy, src, srcCount);
2235 APInt::tcNegate(copy, srcCount);
2236 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2240 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2246 /* FIXME: should this just take a const APInt reference? */
2248 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2249 unsigned int width, bool isSigned,
2250 roundingMode rounding_mode) {
2251 unsigned int partCount = partCountForBits(width);
2252 APInt api = APInt(width, makeArrayRef(parts, partCount));
2255 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2260 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2264 IEEEFloat::convertFromHexadecimalString(StringRef s,
2265 roundingMode rounding_mode) {
2266 lostFraction lost_fraction = lfExactlyZero;
2268 category = fcNormal;
2272 integerPart *significand = significandParts();
2273 unsigned partsCount = partCount();
2274 unsigned bitPos = partsCount * integerPartWidth;
2275 bool computedTrailingFraction = false;
2277 // Skip leading zeroes and any (hexa)decimal point.
2278 StringRef::iterator begin = s.begin();
2279 StringRef::iterator end = s.end();
2280 StringRef::iterator dot;
2281 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2282 StringRef::iterator firstSignificantDigit = p;
2285 integerPart hex_value;
2288 assert(dot == end && "String contains multiple dots");
2293 hex_value = hexDigitValue(*p);
2294 if (hex_value == -1U)
2299 // Store the number while we have space.
2302 hex_value <<= bitPos % integerPartWidth;
2303 significand[bitPos / integerPartWidth] |= hex_value;
2304 } else if (!computedTrailingFraction) {
2305 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2306 computedTrailingFraction = true;
2310 /* Hex floats require an exponent but not a hexadecimal point. */
2311 assert(p != end && "Hex strings require an exponent");
2312 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2313 assert(p != begin && "Significand has no digits");
2314 assert((dot == end || p - begin != 1) && "Significand has no digits");
2316 /* Ignore the exponent if we are zero. */
2317 if (p != firstSignificantDigit) {
2320 /* Implicit hexadecimal point? */
2324 /* Calculate the exponent adjustment implicit in the number of
2325 significant digits. */
2326 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2327 if (expAdjustment < 0)
2329 expAdjustment = expAdjustment * 4 - 1;
2331 /* Adjust for writing the significand starting at the most
2332 significant nibble. */
2333 expAdjustment += semantics->precision;
2334 expAdjustment -= partsCount * integerPartWidth;
2336 /* Adjust for the given exponent. */
2337 exponent = totalExponent(p + 1, end, expAdjustment);
2340 return normalize(rounding_mode, lost_fraction);
2344 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2345 unsigned sigPartCount, int exp,
2346 roundingMode rounding_mode) {
2347 unsigned int parts, pow5PartCount;
2348 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2349 integerPart pow5Parts[maxPowerOfFiveParts];
2352 isNearest = (rounding_mode == rmNearestTiesToEven ||
2353 rounding_mode == rmNearestTiesToAway);
2355 parts = partCountForBits(semantics->precision + 11);
2357 /* Calculate pow(5, abs(exp)). */
2358 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2360 for (;; parts *= 2) {
2361 opStatus sigStatus, powStatus;
2362 unsigned int excessPrecision, truncatedBits;
2364 calcSemantics.precision = parts * integerPartWidth - 1;
2365 excessPrecision = calcSemantics.precision - semantics->precision;
2366 truncatedBits = excessPrecision;
2368 IEEEFloat decSig(calcSemantics, uninitialized);
2369 decSig.makeZero(sign);
2370 IEEEFloat pow5(calcSemantics);
2372 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2373 rmNearestTiesToEven);
2374 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2375 rmNearestTiesToEven);
2376 /* Add exp, as 10^n = 5^n * 2^n. */
2377 decSig.exponent += exp;
2379 lostFraction calcLostFraction;
2380 integerPart HUerr, HUdistance;
2381 unsigned int powHUerr;
2384 /* multiplySignificand leaves the precision-th bit set to 1. */
2385 calcLostFraction = decSig.multiplySignificand(pow5, nullptr);
2386 powHUerr = powStatus != opOK;
2388 calcLostFraction = decSig.divideSignificand(pow5);
2389 /* Denormal numbers have less precision. */
2390 if (decSig.exponent < semantics->minExponent) {
2391 excessPrecision += (semantics->minExponent - decSig.exponent);
2392 truncatedBits = excessPrecision;
2393 if (excessPrecision > calcSemantics.precision)
2394 excessPrecision = calcSemantics.precision;
2396 /* Extra half-ulp lost in reciprocal of exponent. */
2397 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2400 /* Both multiplySignificand and divideSignificand return the
2401 result with the integer bit set. */
2402 assert(APInt::tcExtractBit
2403 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2405 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2407 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2408 excessPrecision, isNearest);
2410 /* Are we guaranteed to round correctly if we truncate? */
2411 if (HUdistance >= HUerr) {
2412 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2413 calcSemantics.precision - excessPrecision,
2415 /* Take the exponent of decSig. If we tcExtract-ed less bits
2416 above we must adjust our exponent to compensate for the
2417 implicit right shift. */
2418 exponent = (decSig.exponent + semantics->precision
2419 - (calcSemantics.precision - excessPrecision));
2420 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2423 return normalize(rounding_mode, calcLostFraction);
2429 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2433 /* Scan the text. */
2434 StringRef::iterator p = str.begin();
2435 interpretDecimal(p, str.end(), &D);
2437 /* Handle the quick cases. First the case of no significant digits,
2438 i.e. zero, and then exponents that are obviously too large or too
2439 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2440 definitely overflows if
2442 (exp - 1) * L >= maxExponent
2444 and definitely underflows to zero where
2446 (exp + 1) * L <= minExponent - precision
2448 With integer arithmetic the tightest bounds for L are
2450 93/28 < L < 196/59 [ numerator <= 256 ]
2451 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2454 // Test if we have a zero number allowing for strings with no null terminators
2455 // and zero decimals with non-zero exponents.
2457 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2458 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2459 // be at most one dot. On the other hand, if we have a zero with a non-zero
2460 // exponent, then we know that D.firstSigDigit will be non-numeric.
2461 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2465 /* Check whether the normalized exponent is high enough to overflow
2466 max during the log-rebasing in the max-exponent check below. */
2467 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2468 fs = handleOverflow(rounding_mode);
2470 /* If it wasn't, then it also wasn't high enough to overflow max
2471 during the log-rebasing in the min-exponent check. Check that it
2472 won't overflow min in either check, then perform the min-exponent
2474 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2475 (D.normalizedExponent + 1) * 28738 <=
2476 8651 * (semantics->minExponent - (int) semantics->precision)) {
2477 /* Underflow to zero and round. */
2478 category = fcNormal;
2480 fs = normalize(rounding_mode, lfLessThanHalf);
2482 /* We can finally safely perform the max-exponent check. */
2483 } else if ((D.normalizedExponent - 1) * 42039
2484 >= 12655 * semantics->maxExponent) {
2485 /* Overflow and round. */
2486 fs = handleOverflow(rounding_mode);
2488 integerPart *decSignificand;
2489 unsigned int partCount;
2491 /* A tight upper bound on number of bits required to hold an
2492 N-digit decimal integer is N * 196 / 59. Allocate enough space
2493 to hold the full significand, and an extra part required by
2495 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2496 partCount = partCountForBits(1 + 196 * partCount / 59);
2497 decSignificand = new integerPart[partCount + 1];
2500 /* Convert to binary efficiently - we do almost all multiplication
2501 in an integerPart. When this would overflow do we do a single
2502 bignum multiplication, and then revert again to multiplication
2503 in an integerPart. */
2505 integerPart decValue, val, multiplier;
2513 if (p == str.end()) {
2517 decValue = decDigitValue(*p++);
2518 assert(decValue < 10U && "Invalid character in significand");
2520 val = val * 10 + decValue;
2521 /* The maximum number that can be multiplied by ten with any
2522 digit added without overflowing an integerPart. */
2523 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2525 /* Multiply out the current part. */
2526 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2527 partCount, partCount + 1, false);
2529 /* If we used another part (likely but not guaranteed), increase
2531 if (decSignificand[partCount])
2533 } while (p <= D.lastSigDigit);
2535 category = fcNormal;
2536 fs = roundSignificandWithExponent(decSignificand, partCount,
2537 D.exponent, rounding_mode);
2539 delete [] decSignificand;
2545 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
2546 if (str.equals("inf") || str.equals("INFINITY")) {
2551 if (str.equals("-inf") || str.equals("-INFINITY")) {
2556 if (str.equals("nan") || str.equals("NaN")) {
2557 makeNaN(false, false);
2561 if (str.equals("-nan") || str.equals("-NaN")) {
2562 makeNaN(false, true);
2569 IEEEFloat::opStatus IEEEFloat::convertFromString(StringRef str,
2570 roundingMode rounding_mode) {
2571 assert(!str.empty() && "Invalid string length");
2573 // Handle special cases.
2574 if (convertFromStringSpecials(str))
2577 /* Handle a leading minus sign. */
2578 StringRef::iterator p = str.begin();
2579 size_t slen = str.size();
2580 sign = *p == '-' ? 1 : 0;
2581 if (*p == '-' || *p == '+') {
2584 assert(slen && "String has no digits");
2587 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2588 assert(slen - 2 && "Invalid string");
2589 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2593 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2596 /* Write out a hexadecimal representation of the floating point value
2597 to DST, which must be of sufficient size, in the C99 form
2598 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2599 excluding the terminating NUL.
2601 If UPPERCASE, the output is in upper case, otherwise in lower case.
2603 HEXDIGITS digits appear altogether, rounding the value if
2604 necessary. If HEXDIGITS is 0, the minimal precision to display the
2605 number precisely is used instead. If nothing would appear after
2606 the decimal point it is suppressed.
2608 The decimal exponent is always printed and has at least one digit.
2609 Zero values display an exponent of zero. Infinities and NaNs
2610 appear as "infinity" or "nan" respectively.
2612 The above rules are as specified by C99. There is ambiguity about
2613 what the leading hexadecimal digit should be. This implementation
2614 uses whatever is necessary so that the exponent is displayed as
2615 stored. This implies the exponent will fall within the IEEE format
2616 range, and the leading hexadecimal digit will be 0 (for denormals),
2617 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2618 any other digits zero).
2620 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
2622 roundingMode rounding_mode) const {
2631 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2632 dst += sizeof infinityL - 1;
2636 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2637 dst += sizeof NaNU - 1;
2642 *dst++ = upperCase ? 'X': 'x';
2644 if (hexDigits > 1) {
2646 memset (dst, '0', hexDigits - 1);
2647 dst += hexDigits - 1;
2649 *dst++ = upperCase ? 'P': 'p';
2654 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2660 return static_cast<unsigned int>(dst - p);
2663 /* Does the hard work of outputting the correctly rounded hexadecimal
2664 form of a normal floating point number with the specified number of
2665 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2666 digits necessary to print the value precisely is output. */
2667 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2669 roundingMode rounding_mode) const {
2670 unsigned int count, valueBits, shift, partsCount, outputDigits;
2671 const char *hexDigitChars;
2672 const integerPart *significand;
2677 *dst++ = upperCase ? 'X': 'x';
2680 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2682 significand = significandParts();
2683 partsCount = partCount();
2685 /* +3 because the first digit only uses the single integer bit, so
2686 we have 3 virtual zero most-significant-bits. */
2687 valueBits = semantics->precision + 3;
2688 shift = integerPartWidth - valueBits % integerPartWidth;
2690 /* The natural number of digits required ignoring trailing
2691 insignificant zeroes. */
2692 outputDigits = (valueBits - significandLSB () + 3) / 4;
2694 /* hexDigits of zero means use the required number for the
2695 precision. Otherwise, see if we are truncating. If we are,
2696 find out if we need to round away from zero. */
2698 if (hexDigits < outputDigits) {
2699 /* We are dropping non-zero bits, so need to check how to round.
2700 "bits" is the number of dropped bits. */
2702 lostFraction fraction;
2704 bits = valueBits - hexDigits * 4;
2705 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2706 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2708 outputDigits = hexDigits;
2711 /* Write the digits consecutively, and start writing in the location
2712 of the hexadecimal point. We move the most significant digit
2713 left and add the hexadecimal point later. */
2716 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2718 while (outputDigits && count) {
2721 /* Put the most significant integerPartWidth bits in "part". */
2722 if (--count == partsCount)
2723 part = 0; /* An imaginary higher zero part. */
2725 part = significand[count] << shift;
2728 part |= significand[count - 1] >> (integerPartWidth - shift);
2730 /* Convert as much of "part" to hexdigits as we can. */
2731 unsigned int curDigits = integerPartWidth / 4;
2733 if (curDigits > outputDigits)
2734 curDigits = outputDigits;
2735 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2736 outputDigits -= curDigits;
2742 /* Note that hexDigitChars has a trailing '0'. */
2745 *q = hexDigitChars[hexDigitValue (*q) + 1];
2746 } while (*q == '0');
2749 /* Add trailing zeroes. */
2750 memset (dst, '0', outputDigits);
2751 dst += outputDigits;
2754 /* Move the most significant digit to before the point, and if there
2755 is something after the decimal point add it. This must come
2756 after rounding above. */
2763 /* Finally output the exponent. */
2764 *dst++ = upperCase ? 'P': 'p';
2766 return writeSignedDecimal (dst, exponent);
2769 hash_code hash_value(const IEEEFloat &Arg) {
2770 if (!Arg.isFiniteNonZero())
2771 return hash_combine((uint8_t)Arg.category,
2772 // NaN has no sign, fix it at zero.
2773 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2774 Arg.semantics->precision);
2776 // Normal floats need their exponent and significand hashed.
2777 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2778 Arg.semantics->precision, Arg.exponent,
2780 Arg.significandParts(),
2781 Arg.significandParts() + Arg.partCount()));
2784 // Conversion from APFloat to/from host float/double. It may eventually be
2785 // possible to eliminate these and have everybody deal with APFloats, but that
2786 // will take a while. This approach will not easily extend to long double.
2787 // Current implementation requires integerPartWidth==64, which is correct at
2788 // the moment but could be made more general.
2790 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2791 // the actual IEEE respresentations. We compensate for that here.
2793 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
2794 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
2795 assert(partCount()==2);
2797 uint64_t myexponent, mysignificand;
2799 if (isFiniteNonZero()) {
2800 myexponent = exponent+16383; //bias
2801 mysignificand = significandParts()[0];
2802 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2803 myexponent = 0; // denormal
2804 } else if (category==fcZero) {
2807 } else if (category==fcInfinity) {
2808 myexponent = 0x7fff;
2809 mysignificand = 0x8000000000000000ULL;
2811 assert(category == fcNaN && "Unknown category");
2812 myexponent = 0x7fff;
2813 mysignificand = significandParts()[0];
2817 words[0] = mysignificand;
2818 words[1] = ((uint64_t)(sign & 1) << 15) |
2819 (myexponent & 0x7fffLL);
2820 return APInt(80, words);
2823 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
2824 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
2825 assert(partCount()==2);
2831 // Convert number to double. To avoid spurious underflows, we re-
2832 // normalize against the "double" minExponent first, and only *then*
2833 // truncate the mantissa. The result of that second conversion
2834 // may be inexact, but should never underflow.
2835 // Declare fltSemantics before APFloat that uses it (and
2836 // saves pointer to it) to ensure correct destruction order.
2837 fltSemantics extendedSemantics = *semantics;
2838 extendedSemantics.minExponent = semIEEEdouble.minExponent;
2839 IEEEFloat extended(*this);
2840 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2841 assert(fs == opOK && !losesInfo);
2844 IEEEFloat u(extended);
2845 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
2846 assert(fs == opOK || fs == opInexact);
2848 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2850 // If conversion was exact or resulted in a special case, we're done;
2851 // just set the second double to zero. Otherwise, re-convert back to
2852 // the extended format and compute the difference. This now should
2853 // convert exactly to double.
2854 if (u.isFiniteNonZero() && losesInfo) {
2855 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2856 assert(fs == opOK && !losesInfo);
2859 IEEEFloat v(extended);
2860 v.subtract(u, rmNearestTiesToEven);
2861 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
2862 assert(fs == opOK && !losesInfo);
2864 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2869 return APInt(128, words);
2872 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
2873 assert(semantics == (const llvm::fltSemantics*)&semIEEEquad);
2874 assert(partCount()==2);
2876 uint64_t myexponent, mysignificand, mysignificand2;
2878 if (isFiniteNonZero()) {
2879 myexponent = exponent+16383; //bias
2880 mysignificand = significandParts()[0];
2881 mysignificand2 = significandParts()[1];
2882 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2883 myexponent = 0; // denormal
2884 } else if (category==fcZero) {
2886 mysignificand = mysignificand2 = 0;
2887 } else if (category==fcInfinity) {
2888 myexponent = 0x7fff;
2889 mysignificand = mysignificand2 = 0;
2891 assert(category == fcNaN && "Unknown category!");
2892 myexponent = 0x7fff;
2893 mysignificand = significandParts()[0];
2894 mysignificand2 = significandParts()[1];
2898 words[0] = mysignificand;
2899 words[1] = ((uint64_t)(sign & 1) << 63) |
2900 ((myexponent & 0x7fff) << 48) |
2901 (mysignificand2 & 0xffffffffffffLL);
2903 return APInt(128, words);
2906 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
2907 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble);
2908 assert(partCount()==1);
2910 uint64_t myexponent, mysignificand;
2912 if (isFiniteNonZero()) {
2913 myexponent = exponent+1023; //bias
2914 mysignificand = *significandParts();
2915 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2916 myexponent = 0; // denormal
2917 } else if (category==fcZero) {
2920 } else if (category==fcInfinity) {
2924 assert(category == fcNaN && "Unknown category!");
2926 mysignificand = *significandParts();
2929 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2930 ((myexponent & 0x7ff) << 52) |
2931 (mysignificand & 0xfffffffffffffLL))));
2934 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
2935 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle);
2936 assert(partCount()==1);
2938 uint32_t myexponent, mysignificand;
2940 if (isFiniteNonZero()) {
2941 myexponent = exponent+127; //bias
2942 mysignificand = (uint32_t)*significandParts();
2943 if (myexponent == 1 && !(mysignificand & 0x800000))
2944 myexponent = 0; // denormal
2945 } else if (category==fcZero) {
2948 } else if (category==fcInfinity) {
2952 assert(category == fcNaN && "Unknown category!");
2954 mysignificand = (uint32_t)*significandParts();
2957 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2958 (mysignificand & 0x7fffff)));
2961 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
2962 assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf);
2963 assert(partCount()==1);
2965 uint32_t myexponent, mysignificand;
2967 if (isFiniteNonZero()) {
2968 myexponent = exponent+15; //bias
2969 mysignificand = (uint32_t)*significandParts();
2970 if (myexponent == 1 && !(mysignificand & 0x400))
2971 myexponent = 0; // denormal
2972 } else if (category==fcZero) {
2975 } else if (category==fcInfinity) {
2979 assert(category == fcNaN && "Unknown category!");
2981 mysignificand = (uint32_t)*significandParts();
2984 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
2985 (mysignificand & 0x3ff)));
2988 // This function creates an APInt that is just a bit map of the floating
2989 // point constant as it would appear in memory. It is not a conversion,
2990 // and treating the result as a normal integer is unlikely to be useful.
2992 APInt IEEEFloat::bitcastToAPInt() const {
2993 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
2994 return convertHalfAPFloatToAPInt();
2996 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
2997 return convertFloatAPFloatToAPInt();
2999 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3000 return convertDoubleAPFloatToAPInt();
3002 if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3003 return convertQuadrupleAPFloatToAPInt();
3005 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3006 return convertPPCDoubleDoubleAPFloatToAPInt();
3008 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3010 return convertF80LongDoubleAPFloatToAPInt();
3013 float IEEEFloat::convertToFloat() const {
3014 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3015 "Float semantics are not IEEEsingle");
3016 APInt api = bitcastToAPInt();
3017 return api.bitsToFloat();
3020 double IEEEFloat::convertToDouble() const {
3021 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3022 "Float semantics are not IEEEdouble");
3023 APInt api = bitcastToAPInt();
3024 return api.bitsToDouble();
3027 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3028 /// does not support these bit patterns:
3029 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3030 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3031 /// exponent = 0, integer bit 1 ("pseudodenormal")
3032 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3033 /// At the moment, the first two are treated as NaNs, the second two as Normal.
3034 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3035 assert(api.getBitWidth()==80);
3036 uint64_t i1 = api.getRawData()[0];
3037 uint64_t i2 = api.getRawData()[1];
3038 uint64_t myexponent = (i2 & 0x7fff);
3039 uint64_t mysignificand = i1;
3041 initialize(&semX87DoubleExtended);
3042 assert(partCount()==2);
3044 sign = static_cast<unsigned int>(i2>>15);
3045 if (myexponent==0 && mysignificand==0) {
3046 // exponent, significand meaningless
3048 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3049 // exponent, significand meaningless
3050 category = fcInfinity;
3051 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
3052 // exponent meaningless
3054 significandParts()[0] = mysignificand;
3055 significandParts()[1] = 0;
3057 category = fcNormal;
3058 exponent = myexponent - 16383;
3059 significandParts()[0] = mysignificand;
3060 significandParts()[1] = 0;
3061 if (myexponent==0) // denormal
3066 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3067 assert(api.getBitWidth()==128);
3068 uint64_t i1 = api.getRawData()[0];
3069 uint64_t i2 = api.getRawData()[1];
3073 // Get the first double and convert to our format.
3074 initFromDoubleAPInt(APInt(64, i1));
3075 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3076 assert(fs == opOK && !losesInfo);
3079 // Unless we have a special case, add in second double.
3080 if (isFiniteNonZero()) {
3081 IEEEFloat v(semIEEEdouble, APInt(64, i2));
3082 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3083 assert(fs == opOK && !losesInfo);
3086 add(v, rmNearestTiesToEven);
3090 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3091 assert(api.getBitWidth()==128);
3092 uint64_t i1 = api.getRawData()[0];
3093 uint64_t i2 = api.getRawData()[1];
3094 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3095 uint64_t mysignificand = i1;
3096 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3098 initialize(&semIEEEquad);
3099 assert(partCount()==2);
3101 sign = static_cast<unsigned int>(i2>>63);
3102 if (myexponent==0 &&
3103 (mysignificand==0 && mysignificand2==0)) {
3104 // exponent, significand meaningless
3106 } else if (myexponent==0x7fff &&
3107 (mysignificand==0 && mysignificand2==0)) {
3108 // exponent, significand meaningless
3109 category = fcInfinity;
3110 } else if (myexponent==0x7fff &&
3111 (mysignificand!=0 || mysignificand2 !=0)) {
3112 // exponent meaningless
3114 significandParts()[0] = mysignificand;
3115 significandParts()[1] = mysignificand2;
3117 category = fcNormal;
3118 exponent = myexponent - 16383;
3119 significandParts()[0] = mysignificand;
3120 significandParts()[1] = mysignificand2;
3121 if (myexponent==0) // denormal
3124 significandParts()[1] |= 0x1000000000000LL; // integer bit
3128 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3129 assert(api.getBitWidth()==64);
3130 uint64_t i = *api.getRawData();
3131 uint64_t myexponent = (i >> 52) & 0x7ff;
3132 uint64_t mysignificand = i & 0xfffffffffffffLL;
3134 initialize(&semIEEEdouble);
3135 assert(partCount()==1);
3137 sign = static_cast<unsigned int>(i>>63);
3138 if (myexponent==0 && mysignificand==0) {
3139 // exponent, significand meaningless
3141 } else if (myexponent==0x7ff && mysignificand==0) {
3142 // exponent, significand meaningless
3143 category = fcInfinity;
3144 } else if (myexponent==0x7ff && mysignificand!=0) {
3145 // exponent meaningless
3147 *significandParts() = mysignificand;
3149 category = fcNormal;
3150 exponent = myexponent - 1023;
3151 *significandParts() = mysignificand;
3152 if (myexponent==0) // denormal
3155 *significandParts() |= 0x10000000000000LL; // integer bit
3159 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3160 assert(api.getBitWidth()==32);
3161 uint32_t i = (uint32_t)*api.getRawData();
3162 uint32_t myexponent = (i >> 23) & 0xff;
3163 uint32_t mysignificand = i & 0x7fffff;
3165 initialize(&semIEEEsingle);
3166 assert(partCount()==1);
3169 if (myexponent==0 && mysignificand==0) {
3170 // exponent, significand meaningless
3172 } else if (myexponent==0xff && mysignificand==0) {
3173 // exponent, significand meaningless
3174 category = fcInfinity;
3175 } else if (myexponent==0xff && mysignificand!=0) {
3176 // sign, exponent, significand meaningless
3178 *significandParts() = mysignificand;
3180 category = fcNormal;
3181 exponent = myexponent - 127; //bias
3182 *significandParts() = mysignificand;
3183 if (myexponent==0) // denormal
3186 *significandParts() |= 0x800000; // integer bit
3190 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3191 assert(api.getBitWidth()==16);
3192 uint32_t i = (uint32_t)*api.getRawData();
3193 uint32_t myexponent = (i >> 10) & 0x1f;
3194 uint32_t mysignificand = i & 0x3ff;
3196 initialize(&semIEEEhalf);
3197 assert(partCount()==1);
3200 if (myexponent==0 && mysignificand==0) {
3201 // exponent, significand meaningless
3203 } else if (myexponent==0x1f && mysignificand==0) {
3204 // exponent, significand meaningless
3205 category = fcInfinity;
3206 } else if (myexponent==0x1f && mysignificand!=0) {
3207 // sign, exponent, significand meaningless
3209 *significandParts() = mysignificand;
3211 category = fcNormal;
3212 exponent = myexponent - 15; //bias
3213 *significandParts() = mysignificand;
3214 if (myexponent==0) // denormal
3217 *significandParts() |= 0x400; // integer bit
3221 /// Treat api as containing the bits of a floating point number. Currently
3222 /// we infer the floating point type from the size of the APInt. The
3223 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3224 /// when the size is anything else).
3225 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3226 if (Sem == &semIEEEhalf)
3227 return initFromHalfAPInt(api);
3228 if (Sem == &semIEEEsingle)
3229 return initFromFloatAPInt(api);
3230 if (Sem == &semIEEEdouble)
3231 return initFromDoubleAPInt(api);
3232 if (Sem == &semX87DoubleExtended)
3233 return initFromF80LongDoubleAPInt(api);
3234 if (Sem == &semIEEEquad)
3235 return initFromQuadrupleAPInt(api);
3236 if (Sem == &semPPCDoubleDoubleLegacy)
3237 return initFromPPCDoubleDoubleAPInt(api);
3239 llvm_unreachable(nullptr);
3242 /// Make this number the largest magnitude normal number in the given
3244 void IEEEFloat::makeLargest(bool Negative) {
3245 // We want (in interchange format):
3246 // sign = {Negative}
3248 // significand = 1..1
3249 category = fcNormal;
3251 exponent = semantics->maxExponent;
3253 // Use memset to set all but the highest integerPart to all ones.
3254 integerPart *significand = significandParts();
3255 unsigned PartCount = partCount();
3256 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3258 // Set the high integerPart especially setting all unused top bits for
3259 // internal consistency.
3260 const unsigned NumUnusedHighBits =
3261 PartCount*integerPartWidth - semantics->precision;
3262 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3263 ? (~integerPart(0) >> NumUnusedHighBits)
3267 /// Make this number the smallest magnitude denormal number in the given
3269 void IEEEFloat::makeSmallest(bool Negative) {
3270 // We want (in interchange format):
3271 // sign = {Negative}
3273 // significand = 0..01
3274 category = fcNormal;
3276 exponent = semantics->minExponent;
3277 APInt::tcSet(significandParts(), 1, partCount());
3280 void IEEEFloat::makeSmallestNormalized(bool Negative) {
3281 // We want (in interchange format):
3282 // sign = {Negative}
3284 // significand = 10..0
3286 category = fcNormal;
3289 exponent = semantics->minExponent;
3290 significandParts()[partCountForBits(semantics->precision) - 1] |=
3291 (((integerPart)1) << ((semantics->precision - 1) % integerPartWidth));
3294 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
3295 initFromAPInt(&Sem, API);
3298 IEEEFloat::IEEEFloat(float f) {
3299 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
3302 IEEEFloat::IEEEFloat(double d) {
3303 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
3307 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3308 Buffer.append(Str.begin(), Str.end());
3311 /// Removes data from the given significand until it is no more
3312 /// precise than is required for the desired precision.
3313 void AdjustToPrecision(APInt &significand,
3314 int &exp, unsigned FormatPrecision) {
3315 unsigned bits = significand.getActiveBits();
3317 // 196/59 is a very slight overestimate of lg_2(10).
3318 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3320 if (bits <= bitsRequired) return;
3322 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3323 if (!tensRemovable) return;
3325 exp += tensRemovable;
3327 APInt divisor(significand.getBitWidth(), 1);
3328 APInt powten(significand.getBitWidth(), 10);
3330 if (tensRemovable & 1)
3332 tensRemovable >>= 1;
3333 if (!tensRemovable) break;
3337 significand = significand.udiv(divisor);
3339 // Truncate the significand down to its active bit count.
3340 significand = significand.trunc(significand.getActiveBits());
3344 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3345 int &exp, unsigned FormatPrecision) {
3346 unsigned N = buffer.size();
3347 if (N <= FormatPrecision) return;
3349 // The most significant figures are the last ones in the buffer.
3350 unsigned FirstSignificant = N - FormatPrecision;
3353 // FIXME: this probably shouldn't use 'round half up'.
3355 // Rounding down is just a truncation, except we also want to drop
3356 // trailing zeros from the new result.
3357 if (buffer[FirstSignificant - 1] < '5') {
3358 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3361 exp += FirstSignificant;
3362 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3366 // Rounding up requires a decimal add-with-carry. If we continue
3367 // the carry, the newly-introduced zeros will just be truncated.
3368 for (unsigned I = FirstSignificant; I != N; ++I) {
3369 if (buffer[I] == '9') {
3377 // If we carried through, we have exactly one digit of precision.
3378 if (FirstSignificant == N) {
3379 exp += FirstSignificant;
3381 buffer.push_back('1');
3385 exp += FirstSignificant;
3386 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3390 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
3391 unsigned FormatMaxPadding, bool TruncateZero) const {
3395 return append(Str, "-Inf");
3397 return append(Str, "+Inf");
3399 case fcNaN: return append(Str, "NaN");
3405 if (!FormatMaxPadding) {
3407 append(Str, "0.0E+0");
3410 if (FormatPrecision > 1)
3411 Str.append(FormatPrecision - 1, '0');
3412 append(Str, "e+00");
3425 // Decompose the number into an APInt and an exponent.
3426 int exp = exponent - ((int) semantics->precision - 1);
3427 APInt significand(semantics->precision,
3428 makeArrayRef(significandParts(),
3429 partCountForBits(semantics->precision)));
3431 // Set FormatPrecision if zero. We want to do this before we
3432 // truncate trailing zeros, as those are part of the precision.
3433 if (!FormatPrecision) {
3434 // We use enough digits so the number can be round-tripped back to an
3435 // APFloat. The formula comes from "How to Print Floating-Point Numbers
3436 // Accurately" by Steele and White.
3437 // FIXME: Using a formula based purely on the precision is conservative;
3438 // we can print fewer digits depending on the actual value being printed.
3440 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
3441 FormatPrecision = 2 + semantics->precision * 59 / 196;
3444 // Ignore trailing binary zeros.
3445 int trailingZeros = significand.countTrailingZeros();
3446 exp += trailingZeros;
3447 significand.lshrInPlace(trailingZeros);
3449 // Change the exponent from 2^e to 10^e.
3452 } else if (exp > 0) {
3454 significand = significand.zext(semantics->precision + exp);
3455 significand <<= exp;
3457 } else { /* exp < 0 */
3460 // We transform this using the identity:
3461 // (N)(2^-e) == (N)(5^e)(10^-e)
3462 // This means we have to multiply N (the significand) by 5^e.
3463 // To avoid overflow, we have to operate on numbers large
3464 // enough to store N * 5^e:
3465 // log2(N * 5^e) == log2(N) + e * log2(5)
3466 // <= semantics->precision + e * 137 / 59
3467 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3469 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3471 // Multiply significand by 5^e.
3472 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3473 significand = significand.zext(precision);
3474 APInt five_to_the_i(precision, 5);
3476 if (texp & 1) significand *= five_to_the_i;
3480 five_to_the_i *= five_to_the_i;
3484 AdjustToPrecision(significand, exp, FormatPrecision);
3486 SmallVector<char, 256> buffer;
3489 unsigned precision = significand.getBitWidth();
3490 APInt ten(precision, 10);
3491 APInt digit(precision, 0);
3493 bool inTrail = true;
3494 while (significand != 0) {
3495 // digit <- significand % 10
3496 // significand <- significand / 10
3497 APInt::udivrem(significand, ten, significand, digit);
3499 unsigned d = digit.getZExtValue();
3501 // Drop trailing zeros.
3502 if (inTrail && !d) exp++;
3504 buffer.push_back((char) ('0' + d));
3509 assert(!buffer.empty() && "no characters in buffer!");
3511 // Drop down to FormatPrecision.
3512 // TODO: don't do more precise calculations above than are required.
3513 AdjustToPrecision(buffer, exp, FormatPrecision);
3515 unsigned NDigits = buffer.size();
3517 // Check whether we should use scientific notation.
3518 bool FormatScientific;
3519 if (!FormatMaxPadding)
3520 FormatScientific = true;
3525 // But we shouldn't make the number look more precise than it is.
3526 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3527 NDigits + (unsigned) exp > FormatPrecision);
3529 // Power of the most significant digit.
3530 int MSD = exp + (int) (NDigits - 1);
3533 FormatScientific = false;
3535 // 765e-5 == 0.00765
3537 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3542 // Scientific formatting is pretty straightforward.
3543 if (FormatScientific) {
3544 exp += (NDigits - 1);
3546 Str.push_back(buffer[NDigits-1]);
3548 if (NDigits == 1 && TruncateZero)
3551 for (unsigned I = 1; I != NDigits; ++I)
3552 Str.push_back(buffer[NDigits-1-I]);
3553 // Fill with zeros up to FormatPrecision.
3554 if (!TruncateZero && FormatPrecision > NDigits - 1)
3555 Str.append(FormatPrecision - NDigits + 1, '0');
3556 // For !TruncateZero we use lower 'e'.
3557 Str.push_back(TruncateZero ? 'E' : 'e');
3559 Str.push_back(exp >= 0 ? '+' : '-');
3560 if (exp < 0) exp = -exp;
3561 SmallVector<char, 6> expbuf;
3563 expbuf.push_back((char) ('0' + (exp % 10)));
3566 // Exponent always at least two digits if we do not truncate zeros.
3567 if (!TruncateZero && expbuf.size() < 2)
3568 expbuf.push_back('0');
3569 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3570 Str.push_back(expbuf[E-1-I]);
3574 // Non-scientific, positive exponents.
3576 for (unsigned I = 0; I != NDigits; ++I)
3577 Str.push_back(buffer[NDigits-1-I]);
3578 for (unsigned I = 0; I != (unsigned) exp; ++I)
3583 // Non-scientific, negative exponents.
3585 // The number of digits to the left of the decimal point.
3586 int NWholeDigits = exp + (int) NDigits;
3589 if (NWholeDigits > 0) {
3590 for (; I != (unsigned) NWholeDigits; ++I)
3591 Str.push_back(buffer[NDigits-I-1]);
3594 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3598 for (unsigned Z = 1; Z != NZeros; ++Z)
3602 for (; I != NDigits; ++I)
3603 Str.push_back(buffer[NDigits-I-1]);
3606 bool IEEEFloat::getExactInverse(APFloat *inv) const {
3607 // Special floats and denormals have no exact inverse.
3608 if (!isFiniteNonZero())
3611 // Check that the number is a power of two by making sure that only the
3612 // integer bit is set in the significand.
3613 if (significandLSB() != semantics->precision - 1)
3617 IEEEFloat reciprocal(*semantics, 1ULL);
3618 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3621 // Avoid multiplication with a denormal, it is not safe on all platforms and
3622 // may be slower than a normal division.
3623 if (reciprocal.isDenormal())
3626 assert(reciprocal.isFiniteNonZero() &&
3627 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3630 *inv = APFloat(reciprocal, *semantics);
3635 bool IEEEFloat::isSignaling() const {
3639 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
3640 // first bit of the trailing significand being 0.
3641 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
3644 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
3646 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
3647 /// appropriate sign switching before/after the computation.
3648 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
3649 // If we are performing nextDown, swap sign so we have -x.
3653 // Compute nextUp(x)
3654 opStatus result = opOK;
3656 // Handle each float category separately.
3659 // nextUp(+inf) = +inf
3662 // nextUp(-inf) = -getLargest()
3666 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
3667 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
3668 // change the payload.
3669 if (isSignaling()) {
3670 result = opInvalidOp;
3671 // For consistency, propagate the sign of the sNaN to the qNaN.
3672 makeNaN(false, isNegative(), nullptr);
3676 // nextUp(pm 0) = +getSmallest()
3677 makeSmallest(false);
3680 // nextUp(-getSmallest()) = -0
3681 if (isSmallest() && isNegative()) {
3682 APInt::tcSet(significandParts(), 0, partCount());
3688 // nextUp(getLargest()) == INFINITY
3689 if (isLargest() && !isNegative()) {
3690 APInt::tcSet(significandParts(), 0, partCount());
3691 category = fcInfinity;
3692 exponent = semantics->maxExponent + 1;
3696 // nextUp(normal) == normal + inc.
3698 // If we are negative, we need to decrement the significand.
3700 // We only cross a binade boundary that requires adjusting the exponent
3702 // 1. exponent != semantics->minExponent. This implies we are not in the
3703 // smallest binade or are dealing with denormals.
3704 // 2. Our significand excluding the integral bit is all zeros.
3705 bool WillCrossBinadeBoundary =
3706 exponent != semantics->minExponent && isSignificandAllZeros();
3708 // Decrement the significand.
3710 // We always do this since:
3711 // 1. If we are dealing with a non-binade decrement, by definition we
3712 // just decrement the significand.
3713 // 2. If we are dealing with a normal -> normal binade decrement, since
3714 // we have an explicit integral bit the fact that all bits but the
3715 // integral bit are zero implies that subtracting one will yield a
3716 // significand with 0 integral bit and 1 in all other spots. Thus we
3717 // must just adjust the exponent and set the integral bit to 1.
3718 // 3. If we are dealing with a normal -> denormal binade decrement,
3719 // since we set the integral bit to 0 when we represent denormals, we
3720 // just decrement the significand.
3721 integerPart *Parts = significandParts();
3722 APInt::tcDecrement(Parts, partCount());
3724 if (WillCrossBinadeBoundary) {
3725 // Our result is a normal number. Do the following:
3726 // 1. Set the integral bit to 1.
3727 // 2. Decrement the exponent.
3728 APInt::tcSetBit(Parts, semantics->precision - 1);
3732 // If we are positive, we need to increment the significand.
3734 // We only cross a binade boundary that requires adjusting the exponent if
3735 // the input is not a denormal and all of said input's significand bits
3736 // are set. If all of said conditions are true: clear the significand, set
3737 // the integral bit to 1, and increment the exponent. If we have a
3738 // denormal always increment since moving denormals and the numbers in the
3739 // smallest normal binade have the same exponent in our representation.
3740 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
3742 if (WillCrossBinadeBoundary) {
3743 integerPart *Parts = significandParts();
3744 APInt::tcSet(Parts, 0, partCount());
3745 APInt::tcSetBit(Parts, semantics->precision - 1);
3746 assert(exponent != semantics->maxExponent &&
3747 "We can not increment an exponent beyond the maxExponent allowed"
3748 " by the given floating point semantics.");
3751 incrementSignificand();
3757 // If we are performing nextDown, swap sign so we have -nextUp(-x)
3764 void IEEEFloat::makeInf(bool Negative) {
3765 category = fcInfinity;
3767 exponent = semantics->maxExponent + 1;
3768 APInt::tcSet(significandParts(), 0, partCount());
3771 void IEEEFloat::makeZero(bool Negative) {
3774 exponent = semantics->minExponent-1;
3775 APInt::tcSet(significandParts(), 0, partCount());
3778 void IEEEFloat::makeQuiet() {
3780 APInt::tcSetBit(significandParts(), semantics->precision - 2);
3783 int ilogb(const IEEEFloat &Arg) {
3785 return IEEEFloat::IEK_NaN;
3787 return IEEEFloat::IEK_Zero;
3788 if (Arg.isInfinity())
3789 return IEEEFloat::IEK_Inf;
3790 if (!Arg.isDenormal())
3791 return Arg.exponent;
3793 IEEEFloat Normalized(Arg);
3794 int SignificandBits = Arg.getSemantics().precision - 1;
3796 Normalized.exponent += SignificandBits;
3797 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
3798 return Normalized.exponent - SignificandBits;
3801 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
3802 auto MaxExp = X.getSemantics().maxExponent;
3803 auto MinExp = X.getSemantics().minExponent;
3805 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
3806 // overflow; clamp it to a safe range before adding, but ensure that the range
3807 // is large enough that the clamp does not change the result. The range we
3808 // need to support is the difference between the largest possible exponent and
3809 // the normalized exponent of half the smallest denormal.
3811 int SignificandBits = X.getSemantics().precision - 1;
3812 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
3814 // Clamp to one past the range ends to let normalize handle overlflow.
3815 X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement);
3816 X.normalize(RoundingMode, lfExactlyZero);
3822 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
3825 // Quiet signalling nans.
3826 if (Exp == IEEEFloat::IEK_NaN) {
3827 IEEEFloat Quiet(Val);
3832 if (Exp == IEEEFloat::IEK_Inf)
3835 // 1 is added because frexp is defined to return a normalized fraction in
3836 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
3837 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
3838 return scalbn(Val, -Exp, RM);
3841 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
3843 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
3844 assert(Semantics == &semPPCDoubleDouble);
3847 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
3849 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
3850 APFloat(semIEEEdouble, uninitialized)}) {
3851 assert(Semantics == &semPPCDoubleDouble);
3854 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
3855 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
3856 APFloat(semIEEEdouble)}) {
3857 assert(Semantics == &semPPCDoubleDouble);
3860 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
3862 Floats(new APFloat[2]{
3863 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
3864 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
3865 assert(Semantics == &semPPCDoubleDouble);
3868 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
3871 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
3872 assert(Semantics == &semPPCDoubleDouble);
3873 assert(&Floats[0].getSemantics() == &semIEEEdouble);
3874 assert(&Floats[1].getSemantics() == &semIEEEdouble);
3877 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
3878 : Semantics(RHS.Semantics),
3879 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
3880 APFloat(RHS.Floats[1])}
3882 assert(Semantics == &semPPCDoubleDouble);
3885 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
3886 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
3887 RHS.Semantics = &semBogus;
3888 assert(Semantics == &semPPCDoubleDouble);
3891 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
3892 if (Semantics == RHS.Semantics && RHS.Floats) {
3893 Floats[0] = RHS.Floats[0];
3894 Floats[1] = RHS.Floats[1];
3895 } else if (this != &RHS) {
3896 this->~DoubleAPFloat();
3897 new (this) DoubleAPFloat(RHS);
3902 // Implement addition, subtraction, multiplication and division based on:
3903 // "Software for Doubled-Precision Floating-Point Computations",
3904 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
3905 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
3906 const APFloat &c, const APFloat &cc,
3910 Status |= z.add(c, RM);
3911 if (!z.isFinite()) {
3912 if (!z.isInfinity()) {
3913 Floats[0] = std::move(z);
3914 Floats[1].makeZero(/* Neg = */ false);
3915 return (opStatus)Status;
3918 auto AComparedToC = a.compareAbsoluteValue(c);
3920 Status |= z.add(aa, RM);
3921 if (AComparedToC == APFloat::cmpGreaterThan) {
3922 // z = cc + aa + c + a;
3923 Status |= z.add(c, RM);
3924 Status |= z.add(a, RM);
3926 // z = cc + aa + a + c;
3927 Status |= z.add(a, RM);
3928 Status |= z.add(c, RM);
3930 if (!z.isFinite()) {
3931 Floats[0] = std::move(z);
3932 Floats[1].makeZero(/* Neg = */ false);
3933 return (opStatus)Status;
3937 Status |= zz.add(cc, RM);
3938 if (AComparedToC == APFloat::cmpGreaterThan) {
3939 // Floats[1] = a - z + c + zz;
3941 Status |= Floats[1].subtract(z, RM);
3942 Status |= Floats[1].add(c, RM);
3943 Status |= Floats[1].add(zz, RM);
3945 // Floats[1] = c - z + a + zz;
3947 Status |= Floats[1].subtract(z, RM);
3948 Status |= Floats[1].add(a, RM);
3949 Status |= Floats[1].add(zz, RM);
3954 Status |= q.subtract(z, RM);
3956 // zz = q + c + (a - (q + z)) + aa + cc;
3957 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
3959 Status |= zz.add(c, RM);
3960 Status |= q.add(z, RM);
3961 Status |= q.subtract(a, RM);
3963 Status |= zz.add(q, RM);
3964 Status |= zz.add(aa, RM);
3965 Status |= zz.add(cc, RM);
3966 if (zz.isZero() && !zz.isNegative()) {
3967 Floats[0] = std::move(z);
3968 Floats[1].makeZero(/* Neg = */ false);
3972 Status |= Floats[0].add(zz, RM);
3973 if (!Floats[0].isFinite()) {
3974 Floats[1].makeZero(/* Neg = */ false);
3975 return (opStatus)Status;
3977 Floats[1] = std::move(z);
3978 Status |= Floats[1].subtract(Floats[0], RM);
3979 Status |= Floats[1].add(zz, RM);
3981 return (opStatus)Status;
3984 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
3985 const DoubleAPFloat &RHS,
3988 if (LHS.getCategory() == fcNaN) {
3992 if (RHS.getCategory() == fcNaN) {
3996 if (LHS.getCategory() == fcZero) {
4000 if (RHS.getCategory() == fcZero) {
4004 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4005 LHS.isNegative() != RHS.isNegative()) {
4006 Out.makeNaN(false, Out.isNegative(), nullptr);
4009 if (LHS.getCategory() == fcInfinity) {
4013 if (RHS.getCategory() == fcInfinity) {
4017 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4019 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4021 assert(&A.getSemantics() == &semIEEEdouble);
4022 assert(&AA.getSemantics() == &semIEEEdouble);
4023 assert(&C.getSemantics() == &semIEEEdouble);
4024 assert(&CC.getSemantics() == &semIEEEdouble);
4025 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4026 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4027 return Out.addImpl(A, AA, C, CC, RM);
4030 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4032 return addWithSpecial(*this, RHS, *this, RM);
4035 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4038 auto Ret = add(RHS, RM);
4043 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
4044 APFloat::roundingMode RM) {
4045 const auto &LHS = *this;
4047 /* Interesting observation: For special categories, finding the lowest
4048 common ancestor of the following layered graph gives the correct
4057 e.g. NaN * NaN = NaN
4059 Normal * Zero = Zero
4062 if (LHS.getCategory() == fcNaN) {
4066 if (RHS.getCategory() == fcNaN) {
4070 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4071 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4072 Out.makeNaN(false, false, nullptr);
4075 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4079 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4083 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4084 "Special cases not handled exhaustively");
4087 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4090 Status |= T.multiply(C, RM);
4091 if (!T.isFiniteNonZero()) {
4093 Floats[1].makeZero(/* Neg = */ false);
4094 return (opStatus)Status;
4097 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4100 Status |= Tau.fusedMultiplyAdd(C, T, RM);
4105 Status |= V.multiply(D, RM);
4108 Status |= W.multiply(C, RM);
4109 Status |= V.add(W, RM);
4111 Status |= Tau.add(V, RM);
4115 Status |= U.add(Tau, RM);
4118 if (!U.isFinite()) {
4119 Floats[1].makeZero(/* Neg = */ false);
4121 // Floats[1] = (t - u) + tau
4122 Status |= T.subtract(U, RM);
4123 Status |= T.add(Tau, RM);
4126 return (opStatus)Status;
4129 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
4130 APFloat::roundingMode RM) {
4131 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4132 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4134 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4135 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4139 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
4140 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4141 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4143 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4144 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4148 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
4149 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4150 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4151 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4152 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4157 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
4158 const DoubleAPFloat &Addend,
4159 APFloat::roundingMode RM) {
4160 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4161 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4162 auto Ret = Tmp.fusedMultiplyAdd(
4163 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
4164 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
4165 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4169 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
4170 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4171 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4172 auto Ret = Tmp.roundToIntegral(RM);
4173 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4177 void DoubleAPFloat::changeSign() {
4178 Floats[0].changeSign();
4179 Floats[1].changeSign();
4183 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
4184 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
4185 if (Result != cmpEqual)
4187 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
4188 if (Result == cmpLessThan || Result == cmpGreaterThan) {
4189 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
4190 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
4191 if (Against && !RHSAgainst)
4193 if (!Against && RHSAgainst)
4194 return cmpGreaterThan;
4195 if (!Against && !RHSAgainst)
4197 if (Against && RHSAgainst)
4198 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
4203 APFloat::fltCategory DoubleAPFloat::getCategory() const {
4204 return Floats[0].getCategory();
4207 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
4209 void DoubleAPFloat::makeInf(bool Neg) {
4210 Floats[0].makeInf(Neg);
4211 Floats[1].makeZero(/* Neg = */ false);
4214 void DoubleAPFloat::makeZero(bool Neg) {
4215 Floats[0].makeZero(Neg);
4216 Floats[1].makeZero(/* Neg = */ false);
4219 void DoubleAPFloat::makeLargest(bool Neg) {
4220 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4221 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
4222 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
4227 void DoubleAPFloat::makeSmallest(bool Neg) {
4228 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4229 Floats[0].makeSmallest(Neg);
4230 Floats[1].makeZero(/* Neg = */ false);
4233 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
4234 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4235 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
4237 Floats[0].changeSign();
4238 Floats[1].makeZero(/* Neg = */ false);
4241 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
4242 Floats[0].makeNaN(SNaN, Neg, fill);
4243 Floats[1].makeZero(/* Neg = */ false);
4246 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
4247 auto Result = Floats[0].compare(RHS.Floats[0]);
4248 // |Float[0]| > |Float[1]|
4249 if (Result == APFloat::cmpEqual)
4250 return Floats[1].compare(RHS.Floats[1]);
4254 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
4255 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
4256 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
4259 hash_code hash_value(const DoubleAPFloat &Arg) {
4261 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
4262 return hash_combine(Arg.Semantics);
4265 APInt DoubleAPFloat::bitcastToAPInt() const {
4266 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4268 Floats[0].bitcastToAPInt().getRawData()[0],
4269 Floats[1].bitcastToAPInt().getRawData()[0],
4271 return APInt(128, 2, Data);
4274 APFloat::opStatus DoubleAPFloat::convertFromString(StringRef S,
4276 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4277 APFloat Tmp(semPPCDoubleDoubleLegacy);
4278 auto Ret = Tmp.convertFromString(S, RM);
4279 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4283 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
4284 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4285 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4286 auto Ret = Tmp.next(nextDown);
4287 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4292 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
4293 unsigned int Width, bool IsSigned,
4294 roundingMode RM, bool *IsExact) const {
4295 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4296 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4297 .convertToInteger(Input, Width, IsSigned, RM, IsExact);
4300 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
4303 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4304 APFloat Tmp(semPPCDoubleDoubleLegacy);
4305 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
4306 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4311 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
4312 unsigned int InputSize,
4313 bool IsSigned, roundingMode RM) {
4314 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4315 APFloat Tmp(semPPCDoubleDoubleLegacy);
4316 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
4317 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4322 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
4323 unsigned int InputSize,
4324 bool IsSigned, roundingMode RM) {
4325 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4326 APFloat Tmp(semPPCDoubleDoubleLegacy);
4327 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
4328 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4332 unsigned int DoubleAPFloat::convertToHexString(char *DST,
4333 unsigned int HexDigits,
4335 roundingMode RM) const {
4336 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4337 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4338 .convertToHexString(DST, HexDigits, UpperCase, RM);
4341 bool DoubleAPFloat::isDenormal() const {
4342 return getCategory() == fcNormal &&
4343 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
4344 // (double)(Hi + Lo) == Hi defines a normal number.
4345 Floats[0].compare(Floats[0] + Floats[1]) != cmpEqual);
4348 bool DoubleAPFloat::isSmallest() const {
4349 if (getCategory() != fcNormal)
4351 DoubleAPFloat Tmp(*this);
4352 Tmp.makeSmallest(this->isNegative());
4353 return Tmp.compare(*this) == cmpEqual;
4356 bool DoubleAPFloat::isLargest() const {
4357 if (getCategory() != fcNormal)
4359 DoubleAPFloat Tmp(*this);
4360 Tmp.makeLargest(this->isNegative());
4361 return Tmp.compare(*this) == cmpEqual;
4364 bool DoubleAPFloat::isInteger() const {
4365 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4366 APFloat Tmp(semPPCDoubleDoubleLegacy);
4367 (void)Tmp.add(Floats[0], rmNearestTiesToEven);
4368 (void)Tmp.add(Floats[1], rmNearestTiesToEven);
4369 return Tmp.isInteger();
4372 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
4373 unsigned FormatPrecision,
4374 unsigned FormatMaxPadding,
4375 bool TruncateZero) const {
4376 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4377 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4378 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
4381 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
4382 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4383 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4385 return Tmp.getExactInverse(nullptr);
4386 APFloat Inv(semPPCDoubleDoubleLegacy);
4387 auto Ret = Tmp.getExactInverse(&Inv);
4388 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
4392 DoubleAPFloat scalbn(DoubleAPFloat Arg, int Exp, APFloat::roundingMode RM) {
4393 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4394 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
4395 scalbn(Arg.Floats[1], Exp, RM));
4398 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
4399 APFloat::roundingMode RM) {
4400 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4401 APFloat First = frexp(Arg.Floats[0], Exp, RM);
4402 APFloat Second = Arg.Floats[1];
4403 if (Arg.getCategory() == APFloat::fcNormal)
4404 Second = scalbn(Second, -Exp, RM);
4405 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
4408 } // End detail namespace
4410 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
4411 if (usesLayout<IEEEFloat>(Semantics)) {
4412 new (&IEEE) IEEEFloat(std::move(F));
4415 if (usesLayout<DoubleAPFloat>(Semantics)) {
4417 DoubleAPFloat(Semantics, APFloat(std::move(F), F.getSemantics()),
4418 APFloat(semIEEEdouble));
4421 llvm_unreachable("Unexpected semantics");
4424 APFloat::opStatus APFloat::convertFromString(StringRef Str, roundingMode RM) {
4425 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
4428 hash_code hash_value(const APFloat &Arg) {
4429 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
4430 return hash_value(Arg.U.IEEE);
4431 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
4432 return hash_value(Arg.U.Double);
4433 llvm_unreachable("Unexpected semantics");
4436 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
4437 : APFloat(Semantics) {
4438 convertFromString(S, rmNearestTiesToEven);
4441 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
4442 roundingMode RM, bool *losesInfo) {
4443 if (&getSemantics() == &ToSemantics)
4445 if (usesLayout<IEEEFloat>(getSemantics()) &&
4446 usesLayout<IEEEFloat>(ToSemantics))
4447 return U.IEEE.convert(ToSemantics, RM, losesInfo);
4448 if (usesLayout<IEEEFloat>(getSemantics()) &&
4449 usesLayout<DoubleAPFloat>(ToSemantics)) {
4450 assert(&ToSemantics == &semPPCDoubleDouble);
4451 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
4452 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
4455 if (usesLayout<DoubleAPFloat>(getSemantics()) &&
4456 usesLayout<IEEEFloat>(ToSemantics)) {
4457 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
4458 *this = APFloat(std::move(getIEEE()), ToSemantics);
4461 llvm_unreachable("Unexpected semantics");
4464 APFloat APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) {
4468 return APFloat(semIEEEhalf, APInt::getAllOnesValue(BitWidth));
4470 return APFloat(semIEEEsingle, APInt::getAllOnesValue(BitWidth));
4472 return APFloat(semIEEEdouble, APInt::getAllOnesValue(BitWidth));
4474 return APFloat(semX87DoubleExtended, APInt::getAllOnesValue(BitWidth));
4476 return APFloat(semIEEEquad, APInt::getAllOnesValue(BitWidth));
4478 llvm_unreachable("Unknown floating bit width");
4481 assert(BitWidth == 128);
4482 return APFloat(semPPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
4486 void APFloat::print(raw_ostream &OS) const {
4487 SmallVector<char, 16> Buffer;
4489 OS << Buffer << "\n";
4492 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4493 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }
4496 void APFloat::Profile(FoldingSetNodeID &NID) const {
4497 NID.Add(bitcastToAPInt());
4500 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
4501 an APSInt, whose initial bit-width and signed-ness are used to determine the
4502 precision of the conversion.
4504 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
4505 roundingMode rounding_mode,
4506 bool *isExact) const {
4507 unsigned bitWidth = result.getBitWidth();
4508 SmallVector<uint64_t, 4> parts(result.getNumWords());
4509 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
4510 rounding_mode, isExact);
4511 // Keeps the original signed-ness.
4512 result = APInt(bitWidth, parts);
4516 } // End llvm namespace
4518 #undef APFLOAT_DISPATCH_ON_SEMANTICS