1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/FoldingSet.h"
19 #include "llvm/ADT/Hashing.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/ErrorHandling.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
29 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
31 if (usesLayout<IEEEFloat>(getSemantics())) \
32 return U.IEEE.METHOD_CALL; \
33 if (usesLayout<DoubleAPFloat>(getSemantics())) \
34 return U.Double.METHOD_CALL; \
35 llvm_unreachable("Unexpected semantics"); \
40 /// A macro used to combine two fcCategory enums into one key which can be used
41 /// in a switch statement to classify how the interaction of two APFloat's
42 /// categories affects an operation.
44 /// TODO: If clang source code is ever allowed to use constexpr in its own
45 /// codebase, change this into a static inline function.
46 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
48 /* Assumed in hexadecimal significand parsing, and conversion to
49 hexadecimal strings. */
50 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
53 /* Represents floating point arithmetic semantics. */
55 /* The largest E such that 2^E is representable; this matches the
56 definition of IEEE 754. */
57 APFloatBase::ExponentType maxExponent;
59 /* The smallest E such that 2^E is a normalized number; this
60 matches the definition of IEEE 754. */
61 APFloatBase::ExponentType minExponent;
63 /* Number of bits in the significand. This includes the integer
65 unsigned int precision;
67 /* Number of bits actually used in the semantics. */
68 unsigned int sizeInBits;
71 static const fltSemantics semIEEEhalf = {15, -14, 11, 16};
72 static const fltSemantics semIEEEsingle = {127, -126, 24, 32};
73 static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
74 static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};
75 static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
76 static const fltSemantics semBogus = {0, 0, 0, 0};
78 /* The IBM double-double semantics. Such a number consists of a pair of IEEE
79 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
80 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
81 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
82 to each other, and two 11-bit exponents.
84 Note: we need to make the value different from semBogus as otherwise
85 an unsafe optimization may collapse both values to a single address,
86 and we heavily rely on them having distinct addresses. */
87 static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0};
89 /* These are legacy semantics for the fallback, inaccrurate implementation of
90 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
91 operation. It's equivalent to having an IEEE number with consecutive 106
92 bits of mantissa and 11 bits of exponent.
94 It's not equivalent to IBM double-double. For example, a legit IBM
95 double-double, 1 + epsilon:
97 1 + epsilon = 1 + (1 >> 1076)
99 is not representable by a consecutive 106 bits of mantissa.
101 Currently, these semantics are used in the following way:
103 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
104 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
105 semPPCDoubleDoubleLegacy -> IEEE operations
107 We use bitcastToAPInt() to get the bit representation (in APInt) of the
108 underlying IEEEdouble, then use the APInt constructor to construct the
111 TODO: Implement all operations in semPPCDoubleDouble, and delete these
113 static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
116 const fltSemantics &APFloatBase::IEEEhalf() {
119 const fltSemantics &APFloatBase::IEEEsingle() {
120 return semIEEEsingle;
122 const fltSemantics &APFloatBase::IEEEdouble() {
123 return semIEEEdouble;
125 const fltSemantics &APFloatBase::IEEEquad() {
128 const fltSemantics &APFloatBase::x87DoubleExtended() {
129 return semX87DoubleExtended;
131 const fltSemantics &APFloatBase::Bogus() {
134 const fltSemantics &APFloatBase::PPCDoubleDouble() {
135 return semPPCDoubleDouble;
138 /* A tight upper bound on number of parts required to hold the value
141 power * 815 / (351 * integerPartWidth) + 1
143 However, whilst the result may require only this many parts,
144 because we are multiplying two values to get it, the
145 multiplication may require an extra part with the excess part
146 being zero (consider the trivial case of 1 * 1, tcFullMultiply
147 requires two parts to hold the single-part result). So we add an
148 extra one to guarantee enough space whilst multiplying. */
149 const unsigned int maxExponent = 16383;
150 const unsigned int maxPrecision = 113;
151 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
152 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
154 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
155 return semantics.precision;
157 APFloatBase::ExponentType
158 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
159 return semantics.maxExponent;
161 APFloatBase::ExponentType
162 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
163 return semantics.minExponent;
165 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
166 return semantics.sizeInBits;
169 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
170 return Sem.sizeInBits;
173 /* A bunch of private, handy routines. */
175 static inline unsigned int
176 partCountForBits(unsigned int bits)
178 return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
181 /* Returns 0U-9U. Return values >= 10U are not digits. */
182 static inline unsigned int
183 decDigitValue(unsigned int c)
188 /* Return the value of a decimal exponent of the form
191 If the exponent overflows, returns a large exponent with the
194 readExponent(StringRef::iterator begin, StringRef::iterator end)
197 unsigned int absExponent;
198 const unsigned int overlargeExponent = 24000; /* FIXME. */
199 StringRef::iterator p = begin;
201 assert(p != end && "Exponent has no digits");
203 isNegative = (*p == '-');
204 if (*p == '-' || *p == '+') {
206 assert(p != end && "Exponent has no digits");
209 absExponent = decDigitValue(*p++);
210 assert(absExponent < 10U && "Invalid character in exponent");
212 for (; p != end; ++p) {
215 value = decDigitValue(*p);
216 assert(value < 10U && "Invalid character in exponent");
218 value += absExponent * 10;
219 if (absExponent >= overlargeExponent) {
220 absExponent = overlargeExponent;
221 p = end; /* outwit assert below */
227 assert(p == end && "Invalid exponent in exponent");
230 return -(int) absExponent;
232 return (int) absExponent;
235 /* This is ugly and needs cleaning up, but I don't immediately see
236 how whilst remaining safe. */
238 totalExponent(StringRef::iterator p, StringRef::iterator end,
239 int exponentAdjustment)
241 int unsignedExponent;
242 bool negative, overflow;
245 assert(p != end && "Exponent has no digits");
247 negative = *p == '-';
248 if (*p == '-' || *p == '+') {
250 assert(p != end && "Exponent has no digits");
253 unsignedExponent = 0;
255 for (; p != end; ++p) {
258 value = decDigitValue(*p);
259 assert(value < 10U && "Invalid character in exponent");
261 unsignedExponent = unsignedExponent * 10 + value;
262 if (unsignedExponent > 32767) {
268 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
272 exponent = unsignedExponent;
274 exponent = -exponent;
275 exponent += exponentAdjustment;
276 if (exponent > 32767 || exponent < -32768)
281 exponent = negative ? -32768: 32767;
286 static StringRef::iterator
287 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
288 StringRef::iterator *dot)
290 StringRef::iterator p = begin;
292 while (p != end && *p == '0')
295 if (p != end && *p == '.') {
298 assert(end - begin != 1 && "Significand has no digits");
300 while (p != end && *p == '0')
307 /* Given a normal decimal floating point number of the form
311 where the decimal point and exponent are optional, fill out the
312 structure D. Exponent is appropriate if the significand is
313 treated as an integer, and normalizedExponent if the significand
314 is taken to have the decimal point after a single leading
317 If the value is zero, V->firstSigDigit points to a non-digit, and
318 the return exponent is zero.
321 const char *firstSigDigit;
322 const char *lastSigDigit;
324 int normalizedExponent;
328 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
331 StringRef::iterator dot = end;
332 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
334 D->firstSigDigit = p;
336 D->normalizedExponent = 0;
338 for (; p != end; ++p) {
340 assert(dot == end && "String contains multiple dots");
345 if (decDigitValue(*p) >= 10U)
350 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
351 assert(p != begin && "Significand has no digits");
352 assert((dot == end || p - begin != 1) && "Significand has no digits");
354 /* p points to the first non-digit in the string */
355 D->exponent = readExponent(p + 1, end);
357 /* Implied decimal point? */
362 /* If number is all zeroes accept any exponent. */
363 if (p != D->firstSigDigit) {
364 /* Drop insignificant trailing zeroes. */
369 while (p != begin && *p == '0');
370 while (p != begin && *p == '.');
373 /* Adjust the exponents for any decimal point. */
374 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
375 D->normalizedExponent = (D->exponent +
376 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
377 - (dot > D->firstSigDigit && dot < p)));
383 /* Return the trailing fraction of a hexadecimal number.
384 DIGITVALUE is the first hex digit of the fraction, P points to
387 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
388 unsigned int digitValue)
390 unsigned int hexDigit;
392 /* If the first trailing digit isn't 0 or 8 we can work out the
393 fraction immediately. */
395 return lfMoreThanHalf;
396 else if (digitValue < 8 && digitValue > 0)
397 return lfLessThanHalf;
399 // Otherwise we need to find the first non-zero digit.
400 while (p != end && (*p == '0' || *p == '.'))
403 assert(p != end && "Invalid trailing hexadecimal fraction!");
405 hexDigit = hexDigitValue(*p);
407 /* If we ran off the end it is exactly zero or one-half, otherwise
410 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
412 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
415 /* Return the fraction lost were a bignum truncated losing the least
416 significant BITS bits. */
418 lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
419 unsigned int partCount,
424 lsb = APInt::tcLSB(parts, partCount);
426 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
428 return lfExactlyZero;
430 return lfExactlyHalf;
431 if (bits <= partCount * APFloatBase::integerPartWidth &&
432 APInt::tcExtractBit(parts, bits - 1))
433 return lfMoreThanHalf;
435 return lfLessThanHalf;
438 /* Shift DST right BITS bits noting lost fraction. */
440 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
442 lostFraction lost_fraction;
444 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
446 APInt::tcShiftRight(dst, parts, bits);
448 return lost_fraction;
451 /* Combine the effect of two lost fractions. */
453 combineLostFractions(lostFraction moreSignificant,
454 lostFraction lessSignificant)
456 if (lessSignificant != lfExactlyZero) {
457 if (moreSignificant == lfExactlyZero)
458 moreSignificant = lfLessThanHalf;
459 else if (moreSignificant == lfExactlyHalf)
460 moreSignificant = lfMoreThanHalf;
463 return moreSignificant;
466 /* The error from the true value, in half-ulps, on multiplying two
467 floating point numbers, which differ from the value they
468 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
469 than the returned value.
471 See "How to Read Floating Point Numbers Accurately" by William D
474 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
476 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
478 if (HUerr1 + HUerr2 == 0)
479 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
481 return inexactMultiply + 2 * (HUerr1 + HUerr2);
484 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
485 when the least significant BITS are truncated. BITS cannot be
487 static APFloatBase::integerPart
488 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
490 unsigned int count, partBits;
491 APFloatBase::integerPart part, boundary;
496 count = bits / APFloatBase::integerPartWidth;
497 partBits = bits % APFloatBase::integerPartWidth + 1;
499 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
502 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
507 if (part - boundary <= boundary - part)
508 return part - boundary;
510 return boundary - part;
513 if (part == boundary) {
516 return ~(APFloatBase::integerPart) 0; /* A lot. */
519 } else if (part == boundary - 1) {
522 return ~(APFloatBase::integerPart) 0; /* A lot. */
527 return ~(APFloatBase::integerPart) 0; /* A lot. */
530 /* Place pow(5, power) in DST, and return the number of parts used.
531 DST must be at least one part larger than size of the answer. */
533 powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
534 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
535 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
536 pow5s[0] = 78125 * 5;
538 unsigned int partsCount[16] = { 1 };
539 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
541 assert(power <= maxExponent);
546 *p1 = firstEightPowers[power & 7];
552 for (unsigned int n = 0; power; power >>= 1, n++) {
557 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
559 pc = partsCount[n - 1];
560 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
562 if (pow5[pc - 1] == 0)
568 APFloatBase::integerPart *tmp;
570 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
572 if (p2[result - 1] == 0)
575 /* Now result is in p1 with partsCount parts and p2 is scratch
586 APInt::tcAssign(dst, p1, result);
591 /* Zero at the end to avoid modular arithmetic when adding one; used
592 when rounding up during hexadecimal output. */
593 static const char hexDigitsLower[] = "0123456789abcdef0";
594 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
595 static const char infinityL[] = "infinity";
596 static const char infinityU[] = "INFINITY";
597 static const char NaNL[] = "nan";
598 static const char NaNU[] = "NAN";
600 /* Write out an integerPart in hexadecimal, starting with the most
601 significant nibble. Write out exactly COUNT hexdigits, return
604 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
605 const char *hexDigitChars)
607 unsigned int result = count;
609 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
611 part >>= (APFloatBase::integerPartWidth - 4 * count);
613 dst[count] = hexDigitChars[part & 0xf];
620 /* Write out an unsigned decimal integer. */
622 writeUnsignedDecimal (char *dst, unsigned int n)
638 /* Write out a signed decimal integer. */
640 writeSignedDecimal (char *dst, int value)
644 dst = writeUnsignedDecimal(dst, -(unsigned) value);
646 dst = writeUnsignedDecimal(dst, value);
653 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
656 semantics = ourSemantics;
659 significand.parts = new integerPart[count];
662 void IEEEFloat::freeSignificand() {
664 delete [] significand.parts;
667 void IEEEFloat::assign(const IEEEFloat &rhs) {
668 assert(semantics == rhs.semantics);
671 category = rhs.category;
672 exponent = rhs.exponent;
673 if (isFiniteNonZero() || category == fcNaN)
674 copySignificand(rhs);
677 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
678 assert(isFiniteNonZero() || category == fcNaN);
679 assert(rhs.partCount() >= partCount());
681 APInt::tcAssign(significandParts(), rhs.significandParts(),
685 /* Make this number a NaN, with an arbitrary but deterministic value
686 for the significand. If double or longer, this is a signalling NaN,
687 which may not be ideal. If float, this is QNaN(0). */
688 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
692 integerPart *significand = significandParts();
693 unsigned numParts = partCount();
695 // Set the significand bits to the fill.
696 if (!fill || fill->getNumWords() < numParts)
697 APInt::tcSet(significand, 0, numParts);
699 APInt::tcAssign(significand, fill->getRawData(),
700 std::min(fill->getNumWords(), numParts));
702 // Zero out the excess bits of the significand.
703 unsigned bitsToPreserve = semantics->precision - 1;
704 unsigned part = bitsToPreserve / 64;
705 bitsToPreserve %= 64;
706 significand[part] &= ((1ULL << bitsToPreserve) - 1);
707 for (part++; part != numParts; ++part)
708 significand[part] = 0;
711 unsigned QNaNBit = semantics->precision - 2;
714 // We always have to clear the QNaN bit to make it an SNaN.
715 APInt::tcClearBit(significand, QNaNBit);
717 // If there are no bits set in the payload, we have to set
718 // *something* to make it a NaN instead of an infinity;
719 // conventionally, this is the next bit down from the QNaN bit.
720 if (APInt::tcIsZero(significand, numParts))
721 APInt::tcSetBit(significand, QNaNBit - 1);
723 // We always have to set the QNaN bit to make it a QNaN.
724 APInt::tcSetBit(significand, QNaNBit);
727 // For x87 extended precision, we want to make a NaN, not a
728 // pseudo-NaN. Maybe we should expose the ability to make
730 if (semantics == &semX87DoubleExtended)
731 APInt::tcSetBit(significand, QNaNBit + 1);
734 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
736 if (semantics != rhs.semantics) {
738 initialize(rhs.semantics);
746 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
749 semantics = rhs.semantics;
750 significand = rhs.significand;
751 exponent = rhs.exponent;
752 category = rhs.category;
755 rhs.semantics = &semBogus;
759 bool IEEEFloat::isDenormal() const {
760 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
761 (APInt::tcExtractBit(significandParts(),
762 semantics->precision - 1) == 0);
765 bool IEEEFloat::isSmallest() const {
766 // The smallest number by magnitude in our format will be the smallest
767 // denormal, i.e. the floating point number with exponent being minimum
768 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
769 return isFiniteNonZero() && exponent == semantics->minExponent &&
770 significandMSB() == 0;
773 bool IEEEFloat::isSignificandAllOnes() const {
774 // Test if the significand excluding the integral bit is all ones. This allows
775 // us to test for binade boundaries.
776 const integerPart *Parts = significandParts();
777 const unsigned PartCount = partCount();
778 for (unsigned i = 0; i < PartCount - 1; i++)
782 // Set the unused high bits to all ones when we compare.
783 const unsigned NumHighBits =
784 PartCount*integerPartWidth - semantics->precision + 1;
785 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
786 "fill than integerPartWidth");
787 const integerPart HighBitFill =
788 ~integerPart(0) << (integerPartWidth - NumHighBits);
789 if (~(Parts[PartCount - 1] | HighBitFill))
795 bool IEEEFloat::isSignificandAllZeros() const {
796 // Test if the significand excluding the integral bit is all zeros. This
797 // allows us to test for binade boundaries.
798 const integerPart *Parts = significandParts();
799 const unsigned PartCount = partCount();
801 for (unsigned i = 0; i < PartCount - 1; i++)
805 const unsigned NumHighBits =
806 PartCount*integerPartWidth - semantics->precision + 1;
807 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
808 "clear than integerPartWidth");
809 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
811 if (Parts[PartCount - 1] & HighBitMask)
817 bool IEEEFloat::isLargest() const {
818 // The largest number by magnitude in our format will be the floating point
819 // number with maximum exponent and with significand that is all ones.
820 return isFiniteNonZero() && exponent == semantics->maxExponent
821 && isSignificandAllOnes();
824 bool IEEEFloat::isInteger() const {
825 // This could be made more efficient; I'm going for obviously correct.
826 if (!isFinite()) return false;
827 IEEEFloat truncated = *this;
828 truncated.roundToIntegral(rmTowardZero);
829 return compare(truncated) == cmpEqual;
832 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
835 if (semantics != rhs.semantics ||
836 category != rhs.category ||
839 if (category==fcZero || category==fcInfinity)
842 if (isFiniteNonZero() && exponent != rhs.exponent)
845 return std::equal(significandParts(), significandParts() + partCount(),
846 rhs.significandParts());
849 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
850 initialize(&ourSemantics);
854 exponent = ourSemantics.precision - 1;
855 significandParts()[0] = value;
856 normalize(rmNearestTiesToEven, lfExactlyZero);
859 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
860 initialize(&ourSemantics);
865 // Delegate to the previous constructor, because later copy constructor may
866 // actually inspects category, which can't be garbage.
867 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
868 : IEEEFloat(ourSemantics) {}
870 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
871 initialize(rhs.semantics);
875 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
876 *this = std::move(rhs);
879 IEEEFloat::~IEEEFloat() { freeSignificand(); }
881 unsigned int IEEEFloat::partCount() const {
882 return partCountForBits(semantics->precision + 1);
885 const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
886 return const_cast<IEEEFloat *>(this)->significandParts();
889 IEEEFloat::integerPart *IEEEFloat::significandParts() {
891 return significand.parts;
893 return &significand.part;
896 void IEEEFloat::zeroSignificand() {
897 APInt::tcSet(significandParts(), 0, partCount());
900 /* Increment an fcNormal floating point number's significand. */
901 void IEEEFloat::incrementSignificand() {
904 carry = APInt::tcIncrement(significandParts(), partCount());
906 /* Our callers should never cause us to overflow. */
911 /* Add the significand of the RHS. Returns the carry flag. */
912 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
915 parts = significandParts();
917 assert(semantics == rhs.semantics);
918 assert(exponent == rhs.exponent);
920 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
923 /* Subtract the significand of the RHS with a borrow flag. Returns
925 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
926 integerPart borrow) {
929 parts = significandParts();
931 assert(semantics == rhs.semantics);
932 assert(exponent == rhs.exponent);
934 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
938 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
939 on to the full-precision result of the multiplication. Returns the
941 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
942 const IEEEFloat *addend) {
943 unsigned int omsb; // One, not zero, based MSB.
944 unsigned int partsCount, newPartsCount, precision;
945 integerPart *lhsSignificand;
946 integerPart scratch[4];
947 integerPart *fullSignificand;
948 lostFraction lost_fraction;
951 assert(semantics == rhs.semantics);
953 precision = semantics->precision;
955 // Allocate space for twice as many bits as the original significand, plus one
956 // extra bit for the addition to overflow into.
957 newPartsCount = partCountForBits(precision * 2 + 1);
959 if (newPartsCount > 4)
960 fullSignificand = new integerPart[newPartsCount];
962 fullSignificand = scratch;
964 lhsSignificand = significandParts();
965 partsCount = partCount();
967 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
968 rhs.significandParts(), partsCount, partsCount);
970 lost_fraction = lfExactlyZero;
971 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
972 exponent += rhs.exponent;
974 // Assume the operands involved in the multiplication are single-precision
975 // FP, and the two multiplicants are:
976 // *this = a23 . a22 ... a0 * 2^e1
977 // rhs = b23 . b22 ... b0 * 2^e2
978 // the result of multiplication is:
979 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
980 // Note that there are three significant bits at the left-hand side of the
981 // radix point: two for the multiplication, and an overflow bit for the
982 // addition (that will always be zero at this point). Move the radix point
983 // toward left by two bits, and adjust exponent accordingly.
986 if (addend && addend->isNonZero()) {
987 // The intermediate result of the multiplication has "2 * precision"
988 // signicant bit; adjust the addend to be consistent with mul result.
990 Significand savedSignificand = significand;
991 const fltSemantics *savedSemantics = semantics;
992 fltSemantics extendedSemantics;
994 unsigned int extendedPrecision;
996 // Normalize our MSB to one below the top bit to allow for overflow.
997 extendedPrecision = 2 * precision + 1;
998 if (omsb != extendedPrecision - 1) {
999 assert(extendedPrecision > omsb);
1000 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1001 (extendedPrecision - 1) - omsb);
1002 exponent -= (extendedPrecision - 1) - omsb;
1005 /* Create new semantics. */
1006 extendedSemantics = *semantics;
1007 extendedSemantics.precision = extendedPrecision;
1009 if (newPartsCount == 1)
1010 significand.part = fullSignificand[0];
1012 significand.parts = fullSignificand;
1013 semantics = &extendedSemantics;
1015 IEEEFloat extendedAddend(*addend);
1016 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1017 assert(status == opOK);
1020 // Shift the significand of the addend right by one bit. This guarantees
1021 // that the high bit of the significand is zero (same as fullSignificand),
1022 // so the addition will overflow (if it does overflow at all) into the top bit.
1023 lost_fraction = extendedAddend.shiftSignificandRight(1);
1024 assert(lost_fraction == lfExactlyZero &&
1025 "Lost precision while shifting addend for fused-multiply-add.");
1027 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1029 /* Restore our state. */
1030 if (newPartsCount == 1)
1031 fullSignificand[0] = significand.part;
1032 significand = savedSignificand;
1033 semantics = savedSemantics;
1035 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1038 // Convert the result having "2 * precision" significant-bits back to the one
1039 // having "precision" significant-bits. First, move the radix point from
1040 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1041 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1042 exponent -= precision + 1;
1044 // In case MSB resides at the left-hand side of radix point, shift the
1045 // mantissa right by some amount to make sure the MSB reside right before
1046 // the radix point (i.e. "MSB . rest-significant-bits").
1048 // Note that the result is not normalized when "omsb < precision". So, the
1049 // caller needs to call IEEEFloat::normalize() if normalized value is
1051 if (omsb > precision) {
1052 unsigned int bits, significantParts;
1055 bits = omsb - precision;
1056 significantParts = partCountForBits(omsb);
1057 lf = shiftRight(fullSignificand, significantParts, bits);
1058 lost_fraction = combineLostFractions(lf, lost_fraction);
1062 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1064 if (newPartsCount > 4)
1065 delete [] fullSignificand;
1067 return lost_fraction;
1070 /* Multiply the significands of LHS and RHS to DST. */
1071 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1072 unsigned int bit, i, partsCount;
1073 const integerPart *rhsSignificand;
1074 integerPart *lhsSignificand, *dividend, *divisor;
1075 integerPart scratch[4];
1076 lostFraction lost_fraction;
1078 assert(semantics == rhs.semantics);
1080 lhsSignificand = significandParts();
1081 rhsSignificand = rhs.significandParts();
1082 partsCount = partCount();
1085 dividend = new integerPart[partsCount * 2];
1089 divisor = dividend + partsCount;
1091 /* Copy the dividend and divisor as they will be modified in-place. */
1092 for (i = 0; i < partsCount; i++) {
1093 dividend[i] = lhsSignificand[i];
1094 divisor[i] = rhsSignificand[i];
1095 lhsSignificand[i] = 0;
1098 exponent -= rhs.exponent;
1100 unsigned int precision = semantics->precision;
1102 /* Normalize the divisor. */
1103 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1106 APInt::tcShiftLeft(divisor, partsCount, bit);
1109 /* Normalize the dividend. */
1110 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1113 APInt::tcShiftLeft(dividend, partsCount, bit);
1116 /* Ensure the dividend >= divisor initially for the loop below.
1117 Incidentally, this means that the division loop below is
1118 guaranteed to set the integer bit to one. */
1119 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1121 APInt::tcShiftLeft(dividend, partsCount, 1);
1122 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1125 /* Long division. */
1126 for (bit = precision; bit; bit -= 1) {
1127 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1128 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1129 APInt::tcSetBit(lhsSignificand, bit - 1);
1132 APInt::tcShiftLeft(dividend, partsCount, 1);
1135 /* Figure out the lost fraction. */
1136 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1139 lost_fraction = lfMoreThanHalf;
1141 lost_fraction = lfExactlyHalf;
1142 else if (APInt::tcIsZero(dividend, partsCount))
1143 lost_fraction = lfExactlyZero;
1145 lost_fraction = lfLessThanHalf;
1150 return lost_fraction;
1153 unsigned int IEEEFloat::significandMSB() const {
1154 return APInt::tcMSB(significandParts(), partCount());
1157 unsigned int IEEEFloat::significandLSB() const {
1158 return APInt::tcLSB(significandParts(), partCount());
1161 /* Note that a zero result is NOT normalized to fcZero. */
1162 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1163 /* Our exponent should not overflow. */
1164 assert((ExponentType) (exponent + bits) >= exponent);
1168 return shiftRight(significandParts(), partCount(), bits);
1171 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1172 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1173 assert(bits < semantics->precision);
1176 unsigned int partsCount = partCount();
1178 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1181 assert(!APInt::tcIsZero(significandParts(), partsCount));
1185 IEEEFloat::cmpResult
1186 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1189 assert(semantics == rhs.semantics);
1190 assert(isFiniteNonZero());
1191 assert(rhs.isFiniteNonZero());
1193 compare = exponent - rhs.exponent;
1195 /* If exponents are equal, do an unsigned bignum comparison of the
1198 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1202 return cmpGreaterThan;
1203 else if (compare < 0)
1209 /* Handle overflow. Sign is preserved. We either become infinity or
1210 the largest finite number. */
1211 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1213 if (rounding_mode == rmNearestTiesToEven ||
1214 rounding_mode == rmNearestTiesToAway ||
1215 (rounding_mode == rmTowardPositive && !sign) ||
1216 (rounding_mode == rmTowardNegative && sign)) {
1217 category = fcInfinity;
1218 return (opStatus) (opOverflow | opInexact);
1221 /* Otherwise we become the largest finite number. */
1222 category = fcNormal;
1223 exponent = semantics->maxExponent;
1224 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1225 semantics->precision);
1230 /* Returns TRUE if, when truncating the current number, with BIT the
1231 new LSB, with the given lost fraction and rounding mode, the result
1232 would need to be rounded away from zero (i.e., by increasing the
1233 signficand). This routine must work for fcZero of both signs, and
1234 fcNormal numbers. */
1235 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1236 lostFraction lost_fraction,
1237 unsigned int bit) const {
1238 /* NaNs and infinities should not have lost fractions. */
1239 assert(isFiniteNonZero() || category == fcZero);
1241 /* Current callers never pass this so we don't handle it. */
1242 assert(lost_fraction != lfExactlyZero);
1244 switch (rounding_mode) {
1245 case rmNearestTiesToAway:
1246 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1248 case rmNearestTiesToEven:
1249 if (lost_fraction == lfMoreThanHalf)
1252 /* Our zeroes don't have a significand to test. */
1253 if (lost_fraction == lfExactlyHalf && category != fcZero)
1254 return APInt::tcExtractBit(significandParts(), bit);
1261 case rmTowardPositive:
1264 case rmTowardNegative:
1267 llvm_unreachable("Invalid rounding mode found");
1270 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1271 lostFraction lost_fraction) {
1272 unsigned int omsb; /* One, not zero, based MSB. */
1275 if (!isFiniteNonZero())
1278 /* Before rounding normalize the exponent of fcNormal numbers. */
1279 omsb = significandMSB() + 1;
1282 /* OMSB is numbered from 1. We want to place it in the integer
1283 bit numbered PRECISION if possible, with a compensating change in
1285 exponentChange = omsb - semantics->precision;
1287 /* If the resulting exponent is too high, overflow according to
1288 the rounding mode. */
1289 if (exponent + exponentChange > semantics->maxExponent)
1290 return handleOverflow(rounding_mode);
1292 /* Subnormal numbers have exponent minExponent, and their MSB
1293 is forced based on that. */
1294 if (exponent + exponentChange < semantics->minExponent)
1295 exponentChange = semantics->minExponent - exponent;
1297 /* Shifting left is easy as we don't lose precision. */
1298 if (exponentChange < 0) {
1299 assert(lost_fraction == lfExactlyZero);
1301 shiftSignificandLeft(-exponentChange);
1306 if (exponentChange > 0) {
1309 /* Shift right and capture any new lost fraction. */
1310 lf = shiftSignificandRight(exponentChange);
1312 lost_fraction = combineLostFractions(lf, lost_fraction);
1314 /* Keep OMSB up-to-date. */
1315 if (omsb > (unsigned) exponentChange)
1316 omsb -= exponentChange;
1322 /* Now round the number according to rounding_mode given the lost
1325 /* As specified in IEEE 754, since we do not trap we do not report
1326 underflow for exact results. */
1327 if (lost_fraction == lfExactlyZero) {
1328 /* Canonicalize zeroes. */
1335 /* Increment the significand if we're rounding away from zero. */
1336 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1338 exponent = semantics->minExponent;
1340 incrementSignificand();
1341 omsb = significandMSB() + 1;
1343 /* Did the significand increment overflow? */
1344 if (omsb == (unsigned) semantics->precision + 1) {
1345 /* Renormalize by incrementing the exponent and shifting our
1346 significand right one. However if we already have the
1347 maximum exponent we overflow to infinity. */
1348 if (exponent == semantics->maxExponent) {
1349 category = fcInfinity;
1351 return (opStatus) (opOverflow | opInexact);
1354 shiftSignificandRight(1);
1360 /* The normal case - we were and are not denormal, and any
1361 significand increment above didn't overflow. */
1362 if (omsb == semantics->precision)
1365 /* We have a non-zero denormal. */
1366 assert(omsb < semantics->precision);
1368 /* Canonicalize zeroes. */
1372 /* The fcZero case is a denormal that underflowed to zero. */
1373 return (opStatus) (opUnderflow | opInexact);
1376 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1378 switch (PackCategoriesIntoKey(category, rhs.category)) {
1380 llvm_unreachable(nullptr);
1382 case PackCategoriesIntoKey(fcNaN, fcZero):
1383 case PackCategoriesIntoKey(fcNaN, fcNormal):
1384 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1385 case PackCategoriesIntoKey(fcNaN, fcNaN):
1386 case PackCategoriesIntoKey(fcNormal, fcZero):
1387 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1388 case PackCategoriesIntoKey(fcInfinity, fcZero):
1391 case PackCategoriesIntoKey(fcZero, fcNaN):
1392 case PackCategoriesIntoKey(fcNormal, fcNaN):
1393 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1394 // We need to be sure to flip the sign here for subtraction because we
1395 // don't have a separate negate operation so -NaN becomes 0 - NaN here.
1396 sign = rhs.sign ^ subtract;
1398 copySignificand(rhs);
1401 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1402 case PackCategoriesIntoKey(fcZero, fcInfinity):
1403 category = fcInfinity;
1404 sign = rhs.sign ^ subtract;
1407 case PackCategoriesIntoKey(fcZero, fcNormal):
1409 sign = rhs.sign ^ subtract;
1412 case PackCategoriesIntoKey(fcZero, fcZero):
1413 /* Sign depends on rounding mode; handled by caller. */
1416 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1417 /* Differently signed infinities can only be validly
1419 if (((sign ^ rhs.sign)!=0) != subtract) {
1426 case PackCategoriesIntoKey(fcNormal, fcNormal):
1431 /* Add or subtract two normal numbers. */
1432 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1435 lostFraction lost_fraction;
1438 /* Determine if the operation on the absolute values is effectively
1439 an addition or subtraction. */
1440 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1442 /* Are we bigger exponent-wise than the RHS? */
1443 bits = exponent - rhs.exponent;
1445 /* Subtraction is more subtle than one might naively expect. */
1447 IEEEFloat temp_rhs(rhs);
1451 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1452 lost_fraction = lfExactlyZero;
1453 } else if (bits > 0) {
1454 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1455 shiftSignificandLeft(1);
1458 lost_fraction = shiftSignificandRight(-bits - 1);
1459 temp_rhs.shiftSignificandLeft(1);
1464 carry = temp_rhs.subtractSignificand
1465 (*this, lost_fraction != lfExactlyZero);
1466 copySignificand(temp_rhs);
1469 carry = subtractSignificand
1470 (temp_rhs, lost_fraction != lfExactlyZero);
1473 /* Invert the lost fraction - it was on the RHS and
1475 if (lost_fraction == lfLessThanHalf)
1476 lost_fraction = lfMoreThanHalf;
1477 else if (lost_fraction == lfMoreThanHalf)
1478 lost_fraction = lfLessThanHalf;
1480 /* The code above is intended to ensure that no borrow is
1486 IEEEFloat temp_rhs(rhs);
1488 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1489 carry = addSignificand(temp_rhs);
1491 lost_fraction = shiftSignificandRight(-bits);
1492 carry = addSignificand(rhs);
1495 /* We have a guard bit; generating a carry cannot happen. */
1500 return lost_fraction;
1503 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1504 switch (PackCategoriesIntoKey(category, rhs.category)) {
1506 llvm_unreachable(nullptr);
1508 case PackCategoriesIntoKey(fcNaN, fcZero):
1509 case PackCategoriesIntoKey(fcNaN, fcNormal):
1510 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1511 case PackCategoriesIntoKey(fcNaN, fcNaN):
1515 case PackCategoriesIntoKey(fcZero, fcNaN):
1516 case PackCategoriesIntoKey(fcNormal, fcNaN):
1517 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1520 copySignificand(rhs);
1523 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1524 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1525 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1526 category = fcInfinity;
1529 case PackCategoriesIntoKey(fcZero, fcNormal):
1530 case PackCategoriesIntoKey(fcNormal, fcZero):
1531 case PackCategoriesIntoKey(fcZero, fcZero):
1535 case PackCategoriesIntoKey(fcZero, fcInfinity):
1536 case PackCategoriesIntoKey(fcInfinity, fcZero):
1540 case PackCategoriesIntoKey(fcNormal, fcNormal):
1545 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1546 switch (PackCategoriesIntoKey(category, rhs.category)) {
1548 llvm_unreachable(nullptr);
1550 case PackCategoriesIntoKey(fcZero, fcNaN):
1551 case PackCategoriesIntoKey(fcNormal, fcNaN):
1552 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1554 copySignificand(rhs);
1556 case PackCategoriesIntoKey(fcNaN, fcZero):
1557 case PackCategoriesIntoKey(fcNaN, fcNormal):
1558 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1559 case PackCategoriesIntoKey(fcNaN, fcNaN):
1562 case PackCategoriesIntoKey(fcInfinity, fcZero):
1563 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1564 case PackCategoriesIntoKey(fcZero, fcInfinity):
1565 case PackCategoriesIntoKey(fcZero, fcNormal):
1568 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1572 case PackCategoriesIntoKey(fcNormal, fcZero):
1573 category = fcInfinity;
1576 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1577 case PackCategoriesIntoKey(fcZero, fcZero):
1581 case PackCategoriesIntoKey(fcNormal, fcNormal):
1586 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1587 switch (PackCategoriesIntoKey(category, rhs.category)) {
1589 llvm_unreachable(nullptr);
1591 case PackCategoriesIntoKey(fcNaN, fcZero):
1592 case PackCategoriesIntoKey(fcNaN, fcNormal):
1593 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1594 case PackCategoriesIntoKey(fcNaN, fcNaN):
1595 case PackCategoriesIntoKey(fcZero, fcInfinity):
1596 case PackCategoriesIntoKey(fcZero, fcNormal):
1597 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1600 case PackCategoriesIntoKey(fcZero, fcNaN):
1601 case PackCategoriesIntoKey(fcNormal, fcNaN):
1602 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1605 copySignificand(rhs);
1608 case PackCategoriesIntoKey(fcNormal, fcZero):
1609 case PackCategoriesIntoKey(fcInfinity, fcZero):
1610 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1611 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1612 case PackCategoriesIntoKey(fcZero, fcZero):
1616 case PackCategoriesIntoKey(fcNormal, fcNormal):
1622 void IEEEFloat::changeSign() {
1623 /* Look mummy, this one's easy. */
1627 /* Normalized addition or subtraction. */
1628 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
1629 roundingMode rounding_mode,
1633 fs = addOrSubtractSpecials(rhs, subtract);
1635 /* This return code means it was not a simple case. */
1636 if (fs == opDivByZero) {
1637 lostFraction lost_fraction;
1639 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1640 fs = normalize(rounding_mode, lost_fraction);
1642 /* Can only be zero if we lost no fraction. */
1643 assert(category != fcZero || lost_fraction == lfExactlyZero);
1646 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1647 positive zero unless rounding to minus infinity, except that
1648 adding two like-signed zeroes gives that zero. */
1649 if (category == fcZero) {
1650 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1651 sign = (rounding_mode == rmTowardNegative);
1657 /* Normalized addition. */
1658 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
1659 roundingMode rounding_mode) {
1660 return addOrSubtract(rhs, rounding_mode, false);
1663 /* Normalized subtraction. */
1664 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
1665 roundingMode rounding_mode) {
1666 return addOrSubtract(rhs, rounding_mode, true);
1669 /* Normalized multiply. */
1670 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
1671 roundingMode rounding_mode) {
1675 fs = multiplySpecials(rhs);
1677 if (isFiniteNonZero()) {
1678 lostFraction lost_fraction = multiplySignificand(rhs, nullptr);
1679 fs = normalize(rounding_mode, lost_fraction);
1680 if (lost_fraction != lfExactlyZero)
1681 fs = (opStatus) (fs | opInexact);
1687 /* Normalized divide. */
1688 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
1689 roundingMode rounding_mode) {
1693 fs = divideSpecials(rhs);
1695 if (isFiniteNonZero()) {
1696 lostFraction lost_fraction = divideSignificand(rhs);
1697 fs = normalize(rounding_mode, lost_fraction);
1698 if (lost_fraction != lfExactlyZero)
1699 fs = (opStatus) (fs | opInexact);
1705 /* Normalized remainder. This is not currently correct in all cases. */
1706 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
1708 IEEEFloat V = *this;
1709 unsigned int origSign = sign;
1711 fs = V.divide(rhs, rmNearestTiesToEven);
1712 if (fs == opDivByZero)
1715 int parts = partCount();
1716 integerPart *x = new integerPart[parts];
1718 fs = V.convertToInteger(makeMutableArrayRef(x, parts),
1719 parts * integerPartWidth, true, rmNearestTiesToEven,
1721 if (fs == opInvalidOp) {
1726 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1727 rmNearestTiesToEven);
1728 assert(fs==opOK); // should always work
1730 fs = V.multiply(rhs, rmNearestTiesToEven);
1731 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1733 fs = subtract(V, rmNearestTiesToEven);
1734 assert(fs==opOK || fs==opInexact); // likewise
1737 sign = origSign; // IEEE754 requires this
1742 /* Normalized llvm frem (C fmod). */
1743 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
1745 fs = modSpecials(rhs);
1746 unsigned int origSign = sign;
1748 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
1749 compareAbsoluteValue(rhs) != cmpLessThan) {
1750 IEEEFloat V = scalbn(rhs, ilogb(*this) - ilogb(rhs), rmNearestTiesToEven);
1751 if (compareAbsoluteValue(V) == cmpLessThan)
1752 V = scalbn(V, -1, rmNearestTiesToEven);
1755 fs = subtract(V, rmNearestTiesToEven);
1759 sign = origSign; // fmod requires this
1763 /* Normalized fused-multiply-add. */
1764 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
1765 const IEEEFloat &addend,
1766 roundingMode rounding_mode) {
1769 /* Post-multiplication sign, before addition. */
1770 sign ^= multiplicand.sign;
1772 /* If and only if all arguments are normal do we need to do an
1773 extended-precision calculation. */
1774 if (isFiniteNonZero() &&
1775 multiplicand.isFiniteNonZero() &&
1776 addend.isFinite()) {
1777 lostFraction lost_fraction;
1779 lost_fraction = multiplySignificand(multiplicand, &addend);
1780 fs = normalize(rounding_mode, lost_fraction);
1781 if (lost_fraction != lfExactlyZero)
1782 fs = (opStatus) (fs | opInexact);
1784 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1785 positive zero unless rounding to minus infinity, except that
1786 adding two like-signed zeroes gives that zero. */
1787 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
1788 sign = (rounding_mode == rmTowardNegative);
1790 fs = multiplySpecials(multiplicand);
1792 /* FS can only be opOK or opInvalidOp. There is no more work
1793 to do in the latter case. The IEEE-754R standard says it is
1794 implementation-defined in this case whether, if ADDEND is a
1795 quiet NaN, we raise invalid op; this implementation does so.
1797 If we need to do the addition we can do so with normal
1800 fs = addOrSubtract(addend, rounding_mode, false);
1806 /* Rounding-mode corrrect round to integral value. */
1807 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
1810 // If the exponent is large enough, we know that this value is already
1811 // integral, and the arithmetic below would potentially cause it to saturate
1812 // to +/-Inf. Bail out early instead.
1813 if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics))
1816 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1817 // precision of our format, and then subtract it back off again. The choice
1818 // of rounding modes for the addition/subtraction determines the rounding mode
1819 // for our integral rounding as well.
1820 // NOTE: When the input value is negative, we do subtraction followed by
1821 // addition instead.
1822 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1823 IntegerConstant <<= semanticsPrecision(*semantics)-1;
1824 IEEEFloat MagicConstant(*semantics);
1825 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1826 rmNearestTiesToEven);
1827 MagicConstant.sign = sign;
1832 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1833 bool inputSign = isNegative();
1835 fs = add(MagicConstant, rounding_mode);
1836 if (fs != opOK && fs != opInexact)
1839 fs = subtract(MagicConstant, rounding_mode);
1841 // Restore the input sign.
1842 if (inputSign != isNegative())
1849 /* Comparison requires normalized numbers. */
1850 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
1853 assert(semantics == rhs.semantics);
1855 switch (PackCategoriesIntoKey(category, rhs.category)) {
1857 llvm_unreachable(nullptr);
1859 case PackCategoriesIntoKey(fcNaN, fcZero):
1860 case PackCategoriesIntoKey(fcNaN, fcNormal):
1861 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1862 case PackCategoriesIntoKey(fcNaN, fcNaN):
1863 case PackCategoriesIntoKey(fcZero, fcNaN):
1864 case PackCategoriesIntoKey(fcNormal, fcNaN):
1865 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1866 return cmpUnordered;
1868 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1869 case PackCategoriesIntoKey(fcInfinity, fcZero):
1870 case PackCategoriesIntoKey(fcNormal, fcZero):
1874 return cmpGreaterThan;
1876 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1877 case PackCategoriesIntoKey(fcZero, fcInfinity):
1878 case PackCategoriesIntoKey(fcZero, fcNormal):
1880 return cmpGreaterThan;
1884 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1885 if (sign == rhs.sign)
1890 return cmpGreaterThan;
1892 case PackCategoriesIntoKey(fcZero, fcZero):
1895 case PackCategoriesIntoKey(fcNormal, fcNormal):
1899 /* Two normal numbers. Do they have the same sign? */
1900 if (sign != rhs.sign) {
1902 result = cmpLessThan;
1904 result = cmpGreaterThan;
1906 /* Compare absolute values; invert result if negative. */
1907 result = compareAbsoluteValue(rhs);
1910 if (result == cmpLessThan)
1911 result = cmpGreaterThan;
1912 else if (result == cmpGreaterThan)
1913 result = cmpLessThan;
1920 /// IEEEFloat::convert - convert a value of one floating point type to another.
1921 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1922 /// records whether the transformation lost information, i.e. whether
1923 /// converting the result back to the original type will produce the
1924 /// original value (this is almost the same as return value==fsOK, but there
1925 /// are edge cases where this is not so).
1927 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
1928 roundingMode rounding_mode,
1930 lostFraction lostFraction;
1931 unsigned int newPartCount, oldPartCount;
1934 const fltSemantics &fromSemantics = *semantics;
1936 lostFraction = lfExactlyZero;
1937 newPartCount = partCountForBits(toSemantics.precision + 1);
1938 oldPartCount = partCount();
1939 shift = toSemantics.precision - fromSemantics.precision;
1941 bool X86SpecialNan = false;
1942 if (&fromSemantics == &semX87DoubleExtended &&
1943 &toSemantics != &semX87DoubleExtended && category == fcNaN &&
1944 (!(*significandParts() & 0x8000000000000000ULL) ||
1945 !(*significandParts() & 0x4000000000000000ULL))) {
1946 // x86 has some unusual NaNs which cannot be represented in any other
1947 // format; note them here.
1948 X86SpecialNan = true;
1951 // If this is a truncation of a denormal number, and the target semantics
1952 // has larger exponent range than the source semantics (this can happen
1953 // when truncating from PowerPC double-double to double format), the
1954 // right shift could lose result mantissa bits. Adjust exponent instead
1955 // of performing excessive shift.
1956 if (shift < 0 && isFiniteNonZero()) {
1957 int exponentChange = significandMSB() + 1 - fromSemantics.precision;
1958 if (exponent + exponentChange < toSemantics.minExponent)
1959 exponentChange = toSemantics.minExponent - exponent;
1960 if (exponentChange < shift)
1961 exponentChange = shift;
1962 if (exponentChange < 0) {
1963 shift -= exponentChange;
1964 exponent += exponentChange;
1968 // If this is a truncation, perform the shift before we narrow the storage.
1969 if (shift < 0 && (isFiniteNonZero() || category==fcNaN))
1970 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
1972 // Fix the storage so it can hold to new value.
1973 if (newPartCount > oldPartCount) {
1974 // The new type requires more storage; make it available.
1975 integerPart *newParts;
1976 newParts = new integerPart[newPartCount];
1977 APInt::tcSet(newParts, 0, newPartCount);
1978 if (isFiniteNonZero() || category==fcNaN)
1979 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1981 significand.parts = newParts;
1982 } else if (newPartCount == 1 && oldPartCount != 1) {
1983 // Switch to built-in storage for a single part.
1984 integerPart newPart = 0;
1985 if (isFiniteNonZero() || category==fcNaN)
1986 newPart = significandParts()[0];
1988 significand.part = newPart;
1991 // Now that we have the right storage, switch the semantics.
1992 semantics = &toSemantics;
1994 // If this is an extension, perform the shift now that the storage is
1996 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
1997 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1999 if (isFiniteNonZero()) {
2000 fs = normalize(rounding_mode, lostFraction);
2001 *losesInfo = (fs != opOK);
2002 } else if (category == fcNaN) {
2003 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2005 // For x87 extended precision, we want to make a NaN, not a special NaN if
2006 // the input wasn't special either.
2007 if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2008 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2010 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
2011 // does not give you back the same bits. This is dubious, and we
2012 // don't currently do it. You're really supposed to get
2013 // an invalid operation signal at runtime, but nobody does that.
2023 /* Convert a floating point number to an integer according to the
2024 rounding mode. If the rounded integer value is out of range this
2025 returns an invalid operation exception and the contents of the
2026 destination parts are unspecified. If the rounded value is in
2027 range but the floating point number is not the exact integer, the C
2028 standard doesn't require an inexact exception to be raised. IEEE
2029 854 does require it so we do that.
2031 Note that for conversions to integer type the C standard requires
2032 round-to-zero to always be used. */
2033 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2034 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2035 roundingMode rounding_mode, bool *isExact) const {
2036 lostFraction lost_fraction;
2037 const integerPart *src;
2038 unsigned int dstPartsCount, truncatedBits;
2042 /* Handle the three special cases first. */
2043 if (category == fcInfinity || category == fcNaN)
2046 dstPartsCount = partCountForBits(width);
2047 assert(dstPartsCount <= parts.size() && "Integer too big");
2049 if (category == fcZero) {
2050 APInt::tcSet(parts.data(), 0, dstPartsCount);
2051 // Negative zero can't be represented as an int.
2056 src = significandParts();
2058 /* Step 1: place our absolute value, with any fraction truncated, in
2061 /* Our absolute value is less than one; truncate everything. */
2062 APInt::tcSet(parts.data(), 0, dstPartsCount);
2063 /* For exponent -1 the integer bit represents .5, look at that.
2064 For smaller exponents leftmost truncated bit is 0. */
2065 truncatedBits = semantics->precision -1U - exponent;
2067 /* We want the most significant (exponent + 1) bits; the rest are
2069 unsigned int bits = exponent + 1U;
2071 /* Hopelessly large in magnitude? */
2075 if (bits < semantics->precision) {
2076 /* We truncate (semantics->precision - bits) bits. */
2077 truncatedBits = semantics->precision - bits;
2078 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2080 /* We want at least as many bits as are available. */
2081 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2083 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2084 bits - semantics->precision);
2089 /* Step 2: work out any lost fraction, and increment the absolute
2090 value if we would round away from zero. */
2091 if (truncatedBits) {
2092 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2094 if (lost_fraction != lfExactlyZero &&
2095 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2096 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2097 return opInvalidOp; /* Overflow. */
2100 lost_fraction = lfExactlyZero;
2103 /* Step 3: check if we fit in the destination. */
2104 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2108 /* Negative numbers cannot be represented as unsigned. */
2112 /* It takes omsb bits to represent the unsigned integer value.
2113 We lose a bit for the sign, but care is needed as the
2114 maximally negative integer is a special case. */
2115 if (omsb == width &&
2116 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2119 /* This case can happen because of rounding. */
2124 APInt::tcNegate (parts.data(), dstPartsCount);
2126 if (omsb >= width + !isSigned)
2130 if (lost_fraction == lfExactlyZero) {
2137 /* Same as convertToSignExtendedInteger, except we provide
2138 deterministic values in case of an invalid operation exception,
2139 namely zero for NaNs and the minimal or maximal value respectively
2140 for underflow or overflow.
2141 The *isExact output tells whether the result is exact, in the sense
2142 that converting it back to the original floating point type produces
2143 the original value. This is almost equivalent to result==opOK,
2144 except for negative zeroes.
2147 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2148 unsigned int width, bool isSigned,
2149 roundingMode rounding_mode, bool *isExact) const {
2152 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2155 if (fs == opInvalidOp) {
2156 unsigned int bits, dstPartsCount;
2158 dstPartsCount = partCountForBits(width);
2159 assert(dstPartsCount <= parts.size() && "Integer too big");
2161 if (category == fcNaN)
2166 bits = width - isSigned;
2168 APInt::tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2169 if (sign && isSigned)
2170 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2176 /* Convert an unsigned integer SRC to a floating point number,
2177 rounding according to ROUNDING_MODE. The sign of the floating
2178 point number is not modified. */
2179 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2180 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2181 unsigned int omsb, precision, dstCount;
2183 lostFraction lost_fraction;
2185 category = fcNormal;
2186 omsb = APInt::tcMSB(src, srcCount) + 1;
2187 dst = significandParts();
2188 dstCount = partCount();
2189 precision = semantics->precision;
2191 /* We want the most significant PRECISION bits of SRC. There may not
2192 be that many; extract what we can. */
2193 if (precision <= omsb) {
2194 exponent = omsb - 1;
2195 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2197 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2199 exponent = precision - 1;
2200 lost_fraction = lfExactlyZero;
2201 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2204 return normalize(rounding_mode, lost_fraction);
2207 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2208 roundingMode rounding_mode) {
2209 unsigned int partCount = Val.getNumWords();
2213 if (isSigned && api.isNegative()) {
2218 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2221 /* Convert a two's complement integer SRC to a floating point number,
2222 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2223 integer is signed, in which case it must be sign-extended. */
2225 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2226 unsigned int srcCount, bool isSigned,
2227 roundingMode rounding_mode) {
2231 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2234 /* If we're signed and negative negate a copy. */
2236 copy = new integerPart[srcCount];
2237 APInt::tcAssign(copy, src, srcCount);
2238 APInt::tcNegate(copy, srcCount);
2239 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2243 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2249 /* FIXME: should this just take a const APInt reference? */
2251 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2252 unsigned int width, bool isSigned,
2253 roundingMode rounding_mode) {
2254 unsigned int partCount = partCountForBits(width);
2255 APInt api = APInt(width, makeArrayRef(parts, partCount));
2258 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2263 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2267 IEEEFloat::convertFromHexadecimalString(StringRef s,
2268 roundingMode rounding_mode) {
2269 lostFraction lost_fraction = lfExactlyZero;
2271 category = fcNormal;
2275 integerPart *significand = significandParts();
2276 unsigned partsCount = partCount();
2277 unsigned bitPos = partsCount * integerPartWidth;
2278 bool computedTrailingFraction = false;
2280 // Skip leading zeroes and any (hexa)decimal point.
2281 StringRef::iterator begin = s.begin();
2282 StringRef::iterator end = s.end();
2283 StringRef::iterator dot;
2284 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2285 StringRef::iterator firstSignificantDigit = p;
2288 integerPart hex_value;
2291 assert(dot == end && "String contains multiple dots");
2296 hex_value = hexDigitValue(*p);
2297 if (hex_value == -1U)
2302 // Store the number while we have space.
2305 hex_value <<= bitPos % integerPartWidth;
2306 significand[bitPos / integerPartWidth] |= hex_value;
2307 } else if (!computedTrailingFraction) {
2308 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2309 computedTrailingFraction = true;
2313 /* Hex floats require an exponent but not a hexadecimal point. */
2314 assert(p != end && "Hex strings require an exponent");
2315 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2316 assert(p != begin && "Significand has no digits");
2317 assert((dot == end || p - begin != 1) && "Significand has no digits");
2319 /* Ignore the exponent if we are zero. */
2320 if (p != firstSignificantDigit) {
2323 /* Implicit hexadecimal point? */
2327 /* Calculate the exponent adjustment implicit in the number of
2328 significant digits. */
2329 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2330 if (expAdjustment < 0)
2332 expAdjustment = expAdjustment * 4 - 1;
2334 /* Adjust for writing the significand starting at the most
2335 significant nibble. */
2336 expAdjustment += semantics->precision;
2337 expAdjustment -= partsCount * integerPartWidth;
2339 /* Adjust for the given exponent. */
2340 exponent = totalExponent(p + 1, end, expAdjustment);
2343 return normalize(rounding_mode, lost_fraction);
2347 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2348 unsigned sigPartCount, int exp,
2349 roundingMode rounding_mode) {
2350 unsigned int parts, pow5PartCount;
2351 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2352 integerPart pow5Parts[maxPowerOfFiveParts];
2355 isNearest = (rounding_mode == rmNearestTiesToEven ||
2356 rounding_mode == rmNearestTiesToAway);
2358 parts = partCountForBits(semantics->precision + 11);
2360 /* Calculate pow(5, abs(exp)). */
2361 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2363 for (;; parts *= 2) {
2364 opStatus sigStatus, powStatus;
2365 unsigned int excessPrecision, truncatedBits;
2367 calcSemantics.precision = parts * integerPartWidth - 1;
2368 excessPrecision = calcSemantics.precision - semantics->precision;
2369 truncatedBits = excessPrecision;
2371 IEEEFloat decSig(calcSemantics, uninitialized);
2372 decSig.makeZero(sign);
2373 IEEEFloat pow5(calcSemantics);
2375 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2376 rmNearestTiesToEven);
2377 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2378 rmNearestTiesToEven);
2379 /* Add exp, as 10^n = 5^n * 2^n. */
2380 decSig.exponent += exp;
2382 lostFraction calcLostFraction;
2383 integerPart HUerr, HUdistance;
2384 unsigned int powHUerr;
2387 /* multiplySignificand leaves the precision-th bit set to 1. */
2388 calcLostFraction = decSig.multiplySignificand(pow5, nullptr);
2389 powHUerr = powStatus != opOK;
2391 calcLostFraction = decSig.divideSignificand(pow5);
2392 /* Denormal numbers have less precision. */
2393 if (decSig.exponent < semantics->minExponent) {
2394 excessPrecision += (semantics->minExponent - decSig.exponent);
2395 truncatedBits = excessPrecision;
2396 if (excessPrecision > calcSemantics.precision)
2397 excessPrecision = calcSemantics.precision;
2399 /* Extra half-ulp lost in reciprocal of exponent. */
2400 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2403 /* Both multiplySignificand and divideSignificand return the
2404 result with the integer bit set. */
2405 assert(APInt::tcExtractBit
2406 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2408 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2410 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2411 excessPrecision, isNearest);
2413 /* Are we guaranteed to round correctly if we truncate? */
2414 if (HUdistance >= HUerr) {
2415 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2416 calcSemantics.precision - excessPrecision,
2418 /* Take the exponent of decSig. If we tcExtract-ed less bits
2419 above we must adjust our exponent to compensate for the
2420 implicit right shift. */
2421 exponent = (decSig.exponent + semantics->precision
2422 - (calcSemantics.precision - excessPrecision));
2423 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2426 return normalize(rounding_mode, calcLostFraction);
2432 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2436 /* Scan the text. */
2437 StringRef::iterator p = str.begin();
2438 interpretDecimal(p, str.end(), &D);
2440 /* Handle the quick cases. First the case of no significant digits,
2441 i.e. zero, and then exponents that are obviously too large or too
2442 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2443 definitely overflows if
2445 (exp - 1) * L >= maxExponent
2447 and definitely underflows to zero where
2449 (exp + 1) * L <= minExponent - precision
2451 With integer arithmetic the tightest bounds for L are
2453 93/28 < L < 196/59 [ numerator <= 256 ]
2454 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2457 // Test if we have a zero number allowing for strings with no null terminators
2458 // and zero decimals with non-zero exponents.
2460 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2461 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2462 // be at most one dot. On the other hand, if we have a zero with a non-zero
2463 // exponent, then we know that D.firstSigDigit will be non-numeric.
2464 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2468 /* Check whether the normalized exponent is high enough to overflow
2469 max during the log-rebasing in the max-exponent check below. */
2470 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2471 fs = handleOverflow(rounding_mode);
2473 /* If it wasn't, then it also wasn't high enough to overflow max
2474 during the log-rebasing in the min-exponent check. Check that it
2475 won't overflow min in either check, then perform the min-exponent
2477 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2478 (D.normalizedExponent + 1) * 28738 <=
2479 8651 * (semantics->minExponent - (int) semantics->precision)) {
2480 /* Underflow to zero and round. */
2481 category = fcNormal;
2483 fs = normalize(rounding_mode, lfLessThanHalf);
2485 /* We can finally safely perform the max-exponent check. */
2486 } else if ((D.normalizedExponent - 1) * 42039
2487 >= 12655 * semantics->maxExponent) {
2488 /* Overflow and round. */
2489 fs = handleOverflow(rounding_mode);
2491 integerPart *decSignificand;
2492 unsigned int partCount;
2494 /* A tight upper bound on number of bits required to hold an
2495 N-digit decimal integer is N * 196 / 59. Allocate enough space
2496 to hold the full significand, and an extra part required by
2498 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2499 partCount = partCountForBits(1 + 196 * partCount / 59);
2500 decSignificand = new integerPart[partCount + 1];
2503 /* Convert to binary efficiently - we do almost all multiplication
2504 in an integerPart. When this would overflow do we do a single
2505 bignum multiplication, and then revert again to multiplication
2506 in an integerPart. */
2508 integerPart decValue, val, multiplier;
2516 if (p == str.end()) {
2520 decValue = decDigitValue(*p++);
2521 assert(decValue < 10U && "Invalid character in significand");
2523 val = val * 10 + decValue;
2524 /* The maximum number that can be multiplied by ten with any
2525 digit added without overflowing an integerPart. */
2526 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2528 /* Multiply out the current part. */
2529 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2530 partCount, partCount + 1, false);
2532 /* If we used another part (likely but not guaranteed), increase
2534 if (decSignificand[partCount])
2536 } while (p <= D.lastSigDigit);
2538 category = fcNormal;
2539 fs = roundSignificandWithExponent(decSignificand, partCount,
2540 D.exponent, rounding_mode);
2542 delete [] decSignificand;
2548 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
2549 if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) {
2554 if (str.equals("-inf") || str.equals("-INFINITY") || str.equals("-Inf")) {
2559 if (str.equals("nan") || str.equals("NaN")) {
2560 makeNaN(false, false);
2564 if (str.equals("-nan") || str.equals("-NaN")) {
2565 makeNaN(false, true);
2572 IEEEFloat::opStatus IEEEFloat::convertFromString(StringRef str,
2573 roundingMode rounding_mode) {
2574 assert(!str.empty() && "Invalid string length");
2576 // Handle special cases.
2577 if (convertFromStringSpecials(str))
2580 /* Handle a leading minus sign. */
2581 StringRef::iterator p = str.begin();
2582 size_t slen = str.size();
2583 sign = *p == '-' ? 1 : 0;
2584 if (*p == '-' || *p == '+') {
2587 assert(slen && "String has no digits");
2590 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2591 assert(slen - 2 && "Invalid string");
2592 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2596 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2599 /* Write out a hexadecimal representation of the floating point value
2600 to DST, which must be of sufficient size, in the C99 form
2601 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2602 excluding the terminating NUL.
2604 If UPPERCASE, the output is in upper case, otherwise in lower case.
2606 HEXDIGITS digits appear altogether, rounding the value if
2607 necessary. If HEXDIGITS is 0, the minimal precision to display the
2608 number precisely is used instead. If nothing would appear after
2609 the decimal point it is suppressed.
2611 The decimal exponent is always printed and has at least one digit.
2612 Zero values display an exponent of zero. Infinities and NaNs
2613 appear as "infinity" or "nan" respectively.
2615 The above rules are as specified by C99. There is ambiguity about
2616 what the leading hexadecimal digit should be. This implementation
2617 uses whatever is necessary so that the exponent is displayed as
2618 stored. This implies the exponent will fall within the IEEE format
2619 range, and the leading hexadecimal digit will be 0 (for denormals),
2620 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2621 any other digits zero).
2623 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
2625 roundingMode rounding_mode) const {
2634 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2635 dst += sizeof infinityL - 1;
2639 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2640 dst += sizeof NaNU - 1;
2645 *dst++ = upperCase ? 'X': 'x';
2647 if (hexDigits > 1) {
2649 memset (dst, '0', hexDigits - 1);
2650 dst += hexDigits - 1;
2652 *dst++ = upperCase ? 'P': 'p';
2657 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2663 return static_cast<unsigned int>(dst - p);
2666 /* Does the hard work of outputting the correctly rounded hexadecimal
2667 form of a normal floating point number with the specified number of
2668 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2669 digits necessary to print the value precisely is output. */
2670 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2672 roundingMode rounding_mode) const {
2673 unsigned int count, valueBits, shift, partsCount, outputDigits;
2674 const char *hexDigitChars;
2675 const integerPart *significand;
2680 *dst++ = upperCase ? 'X': 'x';
2683 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2685 significand = significandParts();
2686 partsCount = partCount();
2688 /* +3 because the first digit only uses the single integer bit, so
2689 we have 3 virtual zero most-significant-bits. */
2690 valueBits = semantics->precision + 3;
2691 shift = integerPartWidth - valueBits % integerPartWidth;
2693 /* The natural number of digits required ignoring trailing
2694 insignificant zeroes. */
2695 outputDigits = (valueBits - significandLSB () + 3) / 4;
2697 /* hexDigits of zero means use the required number for the
2698 precision. Otherwise, see if we are truncating. If we are,
2699 find out if we need to round away from zero. */
2701 if (hexDigits < outputDigits) {
2702 /* We are dropping non-zero bits, so need to check how to round.
2703 "bits" is the number of dropped bits. */
2705 lostFraction fraction;
2707 bits = valueBits - hexDigits * 4;
2708 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2709 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2711 outputDigits = hexDigits;
2714 /* Write the digits consecutively, and start writing in the location
2715 of the hexadecimal point. We move the most significant digit
2716 left and add the hexadecimal point later. */
2719 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2721 while (outputDigits && count) {
2724 /* Put the most significant integerPartWidth bits in "part". */
2725 if (--count == partsCount)
2726 part = 0; /* An imaginary higher zero part. */
2728 part = significand[count] << shift;
2731 part |= significand[count - 1] >> (integerPartWidth - shift);
2733 /* Convert as much of "part" to hexdigits as we can. */
2734 unsigned int curDigits = integerPartWidth / 4;
2736 if (curDigits > outputDigits)
2737 curDigits = outputDigits;
2738 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2739 outputDigits -= curDigits;
2745 /* Note that hexDigitChars has a trailing '0'. */
2748 *q = hexDigitChars[hexDigitValue (*q) + 1];
2749 } while (*q == '0');
2752 /* Add trailing zeroes. */
2753 memset (dst, '0', outputDigits);
2754 dst += outputDigits;
2757 /* Move the most significant digit to before the point, and if there
2758 is something after the decimal point add it. This must come
2759 after rounding above. */
2766 /* Finally output the exponent. */
2767 *dst++ = upperCase ? 'P': 'p';
2769 return writeSignedDecimal (dst, exponent);
2772 hash_code hash_value(const IEEEFloat &Arg) {
2773 if (!Arg.isFiniteNonZero())
2774 return hash_combine((uint8_t)Arg.category,
2775 // NaN has no sign, fix it at zero.
2776 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2777 Arg.semantics->precision);
2779 // Normal floats need their exponent and significand hashed.
2780 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2781 Arg.semantics->precision, Arg.exponent,
2783 Arg.significandParts(),
2784 Arg.significandParts() + Arg.partCount()));
2787 // Conversion from APFloat to/from host float/double. It may eventually be
2788 // possible to eliminate these and have everybody deal with APFloats, but that
2789 // will take a while. This approach will not easily extend to long double.
2790 // Current implementation requires integerPartWidth==64, which is correct at
2791 // the moment but could be made more general.
2793 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2794 // the actual IEEE respresentations. We compensate for that here.
2796 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
2797 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
2798 assert(partCount()==2);
2800 uint64_t myexponent, mysignificand;
2802 if (isFiniteNonZero()) {
2803 myexponent = exponent+16383; //bias
2804 mysignificand = significandParts()[0];
2805 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2806 myexponent = 0; // denormal
2807 } else if (category==fcZero) {
2810 } else if (category==fcInfinity) {
2811 myexponent = 0x7fff;
2812 mysignificand = 0x8000000000000000ULL;
2814 assert(category == fcNaN && "Unknown category");
2815 myexponent = 0x7fff;
2816 mysignificand = significandParts()[0];
2820 words[0] = mysignificand;
2821 words[1] = ((uint64_t)(sign & 1) << 15) |
2822 (myexponent & 0x7fffLL);
2823 return APInt(80, words);
2826 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
2827 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
2828 assert(partCount()==2);
2834 // Convert number to double. To avoid spurious underflows, we re-
2835 // normalize against the "double" minExponent first, and only *then*
2836 // truncate the mantissa. The result of that second conversion
2837 // may be inexact, but should never underflow.
2838 // Declare fltSemantics before APFloat that uses it (and
2839 // saves pointer to it) to ensure correct destruction order.
2840 fltSemantics extendedSemantics = *semantics;
2841 extendedSemantics.minExponent = semIEEEdouble.minExponent;
2842 IEEEFloat extended(*this);
2843 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2844 assert(fs == opOK && !losesInfo);
2847 IEEEFloat u(extended);
2848 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
2849 assert(fs == opOK || fs == opInexact);
2851 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2853 // If conversion was exact or resulted in a special case, we're done;
2854 // just set the second double to zero. Otherwise, re-convert back to
2855 // the extended format and compute the difference. This now should
2856 // convert exactly to double.
2857 if (u.isFiniteNonZero() && losesInfo) {
2858 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2859 assert(fs == opOK && !losesInfo);
2862 IEEEFloat v(extended);
2863 v.subtract(u, rmNearestTiesToEven);
2864 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
2865 assert(fs == opOK && !losesInfo);
2867 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2872 return APInt(128, words);
2875 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
2876 assert(semantics == (const llvm::fltSemantics*)&semIEEEquad);
2877 assert(partCount()==2);
2879 uint64_t myexponent, mysignificand, mysignificand2;
2881 if (isFiniteNonZero()) {
2882 myexponent = exponent+16383; //bias
2883 mysignificand = significandParts()[0];
2884 mysignificand2 = significandParts()[1];
2885 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2886 myexponent = 0; // denormal
2887 } else if (category==fcZero) {
2889 mysignificand = mysignificand2 = 0;
2890 } else if (category==fcInfinity) {
2891 myexponent = 0x7fff;
2892 mysignificand = mysignificand2 = 0;
2894 assert(category == fcNaN && "Unknown category!");
2895 myexponent = 0x7fff;
2896 mysignificand = significandParts()[0];
2897 mysignificand2 = significandParts()[1];
2901 words[0] = mysignificand;
2902 words[1] = ((uint64_t)(sign & 1) << 63) |
2903 ((myexponent & 0x7fff) << 48) |
2904 (mysignificand2 & 0xffffffffffffLL);
2906 return APInt(128, words);
2909 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
2910 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble);
2911 assert(partCount()==1);
2913 uint64_t myexponent, mysignificand;
2915 if (isFiniteNonZero()) {
2916 myexponent = exponent+1023; //bias
2917 mysignificand = *significandParts();
2918 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2919 myexponent = 0; // denormal
2920 } else if (category==fcZero) {
2923 } else if (category==fcInfinity) {
2927 assert(category == fcNaN && "Unknown category!");
2929 mysignificand = *significandParts();
2932 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2933 ((myexponent & 0x7ff) << 52) |
2934 (mysignificand & 0xfffffffffffffLL))));
2937 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
2938 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle);
2939 assert(partCount()==1);
2941 uint32_t myexponent, mysignificand;
2943 if (isFiniteNonZero()) {
2944 myexponent = exponent+127; //bias
2945 mysignificand = (uint32_t)*significandParts();
2946 if (myexponent == 1 && !(mysignificand & 0x800000))
2947 myexponent = 0; // denormal
2948 } else if (category==fcZero) {
2951 } else if (category==fcInfinity) {
2955 assert(category == fcNaN && "Unknown category!");
2957 mysignificand = (uint32_t)*significandParts();
2960 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2961 (mysignificand & 0x7fffff)));
2964 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
2965 assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf);
2966 assert(partCount()==1);
2968 uint32_t myexponent, mysignificand;
2970 if (isFiniteNonZero()) {
2971 myexponent = exponent+15; //bias
2972 mysignificand = (uint32_t)*significandParts();
2973 if (myexponent == 1 && !(mysignificand & 0x400))
2974 myexponent = 0; // denormal
2975 } else if (category==fcZero) {
2978 } else if (category==fcInfinity) {
2982 assert(category == fcNaN && "Unknown category!");
2984 mysignificand = (uint32_t)*significandParts();
2987 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
2988 (mysignificand & 0x3ff)));
2991 // This function creates an APInt that is just a bit map of the floating
2992 // point constant as it would appear in memory. It is not a conversion,
2993 // and treating the result as a normal integer is unlikely to be useful.
2995 APInt IEEEFloat::bitcastToAPInt() const {
2996 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
2997 return convertHalfAPFloatToAPInt();
2999 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3000 return convertFloatAPFloatToAPInt();
3002 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3003 return convertDoubleAPFloatToAPInt();
3005 if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3006 return convertQuadrupleAPFloatToAPInt();
3008 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3009 return convertPPCDoubleDoubleAPFloatToAPInt();
3011 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3013 return convertF80LongDoubleAPFloatToAPInt();
3016 float IEEEFloat::convertToFloat() const {
3017 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3018 "Float semantics are not IEEEsingle");
3019 APInt api = bitcastToAPInt();
3020 return api.bitsToFloat();
3023 double IEEEFloat::convertToDouble() const {
3024 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3025 "Float semantics are not IEEEdouble");
3026 APInt api = bitcastToAPInt();
3027 return api.bitsToDouble();
3030 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3031 /// does not support these bit patterns:
3032 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3033 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3034 /// exponent = 0, integer bit 1 ("pseudodenormal")
3035 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3036 /// At the moment, the first two are treated as NaNs, the second two as Normal.
3037 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3038 assert(api.getBitWidth()==80);
3039 uint64_t i1 = api.getRawData()[0];
3040 uint64_t i2 = api.getRawData()[1];
3041 uint64_t myexponent = (i2 & 0x7fff);
3042 uint64_t mysignificand = i1;
3044 initialize(&semX87DoubleExtended);
3045 assert(partCount()==2);
3047 sign = static_cast<unsigned int>(i2>>15);
3048 if (myexponent==0 && mysignificand==0) {
3049 // exponent, significand meaningless
3051 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3052 // exponent, significand meaningless
3053 category = fcInfinity;
3054 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
3055 // exponent meaningless
3057 significandParts()[0] = mysignificand;
3058 significandParts()[1] = 0;
3060 category = fcNormal;
3061 exponent = myexponent - 16383;
3062 significandParts()[0] = mysignificand;
3063 significandParts()[1] = 0;
3064 if (myexponent==0) // denormal
3069 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3070 assert(api.getBitWidth()==128);
3071 uint64_t i1 = api.getRawData()[0];
3072 uint64_t i2 = api.getRawData()[1];
3076 // Get the first double and convert to our format.
3077 initFromDoubleAPInt(APInt(64, i1));
3078 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3079 assert(fs == opOK && !losesInfo);
3082 // Unless we have a special case, add in second double.
3083 if (isFiniteNonZero()) {
3084 IEEEFloat v(semIEEEdouble, APInt(64, i2));
3085 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3086 assert(fs == opOK && !losesInfo);
3089 add(v, rmNearestTiesToEven);
3093 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3094 assert(api.getBitWidth()==128);
3095 uint64_t i1 = api.getRawData()[0];
3096 uint64_t i2 = api.getRawData()[1];
3097 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3098 uint64_t mysignificand = i1;
3099 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3101 initialize(&semIEEEquad);
3102 assert(partCount()==2);
3104 sign = static_cast<unsigned int>(i2>>63);
3105 if (myexponent==0 &&
3106 (mysignificand==0 && mysignificand2==0)) {
3107 // exponent, significand meaningless
3109 } else if (myexponent==0x7fff &&
3110 (mysignificand==0 && mysignificand2==0)) {
3111 // exponent, significand meaningless
3112 category = fcInfinity;
3113 } else if (myexponent==0x7fff &&
3114 (mysignificand!=0 || mysignificand2 !=0)) {
3115 // exponent meaningless
3117 significandParts()[0] = mysignificand;
3118 significandParts()[1] = mysignificand2;
3120 category = fcNormal;
3121 exponent = myexponent - 16383;
3122 significandParts()[0] = mysignificand;
3123 significandParts()[1] = mysignificand2;
3124 if (myexponent==0) // denormal
3127 significandParts()[1] |= 0x1000000000000LL; // integer bit
3131 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3132 assert(api.getBitWidth()==64);
3133 uint64_t i = *api.getRawData();
3134 uint64_t myexponent = (i >> 52) & 0x7ff;
3135 uint64_t mysignificand = i & 0xfffffffffffffLL;
3137 initialize(&semIEEEdouble);
3138 assert(partCount()==1);
3140 sign = static_cast<unsigned int>(i>>63);
3141 if (myexponent==0 && mysignificand==0) {
3142 // exponent, significand meaningless
3144 } else if (myexponent==0x7ff && mysignificand==0) {
3145 // exponent, significand meaningless
3146 category = fcInfinity;
3147 } else if (myexponent==0x7ff && mysignificand!=0) {
3148 // exponent meaningless
3150 *significandParts() = mysignificand;
3152 category = fcNormal;
3153 exponent = myexponent - 1023;
3154 *significandParts() = mysignificand;
3155 if (myexponent==0) // denormal
3158 *significandParts() |= 0x10000000000000LL; // integer bit
3162 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3163 assert(api.getBitWidth()==32);
3164 uint32_t i = (uint32_t)*api.getRawData();
3165 uint32_t myexponent = (i >> 23) & 0xff;
3166 uint32_t mysignificand = i & 0x7fffff;
3168 initialize(&semIEEEsingle);
3169 assert(partCount()==1);
3172 if (myexponent==0 && mysignificand==0) {
3173 // exponent, significand meaningless
3175 } else if (myexponent==0xff && mysignificand==0) {
3176 // exponent, significand meaningless
3177 category = fcInfinity;
3178 } else if (myexponent==0xff && mysignificand!=0) {
3179 // sign, exponent, significand meaningless
3181 *significandParts() = mysignificand;
3183 category = fcNormal;
3184 exponent = myexponent - 127; //bias
3185 *significandParts() = mysignificand;
3186 if (myexponent==0) // denormal
3189 *significandParts() |= 0x800000; // integer bit
3193 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3194 assert(api.getBitWidth()==16);
3195 uint32_t i = (uint32_t)*api.getRawData();
3196 uint32_t myexponent = (i >> 10) & 0x1f;
3197 uint32_t mysignificand = i & 0x3ff;
3199 initialize(&semIEEEhalf);
3200 assert(partCount()==1);
3203 if (myexponent==0 && mysignificand==0) {
3204 // exponent, significand meaningless
3206 } else if (myexponent==0x1f && mysignificand==0) {
3207 // exponent, significand meaningless
3208 category = fcInfinity;
3209 } else if (myexponent==0x1f && mysignificand!=0) {
3210 // sign, exponent, significand meaningless
3212 *significandParts() = mysignificand;
3214 category = fcNormal;
3215 exponent = myexponent - 15; //bias
3216 *significandParts() = mysignificand;
3217 if (myexponent==0) // denormal
3220 *significandParts() |= 0x400; // integer bit
3224 /// Treat api as containing the bits of a floating point number. Currently
3225 /// we infer the floating point type from the size of the APInt. The
3226 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3227 /// when the size is anything else).
3228 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3229 if (Sem == &semIEEEhalf)
3230 return initFromHalfAPInt(api);
3231 if (Sem == &semIEEEsingle)
3232 return initFromFloatAPInt(api);
3233 if (Sem == &semIEEEdouble)
3234 return initFromDoubleAPInt(api);
3235 if (Sem == &semX87DoubleExtended)
3236 return initFromF80LongDoubleAPInt(api);
3237 if (Sem == &semIEEEquad)
3238 return initFromQuadrupleAPInt(api);
3239 if (Sem == &semPPCDoubleDoubleLegacy)
3240 return initFromPPCDoubleDoubleAPInt(api);
3242 llvm_unreachable(nullptr);
3245 /// Make this number the largest magnitude normal number in the given
3247 void IEEEFloat::makeLargest(bool Negative) {
3248 // We want (in interchange format):
3249 // sign = {Negative}
3251 // significand = 1..1
3252 category = fcNormal;
3254 exponent = semantics->maxExponent;
3256 // Use memset to set all but the highest integerPart to all ones.
3257 integerPart *significand = significandParts();
3258 unsigned PartCount = partCount();
3259 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3261 // Set the high integerPart especially setting all unused top bits for
3262 // internal consistency.
3263 const unsigned NumUnusedHighBits =
3264 PartCount*integerPartWidth - semantics->precision;
3265 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3266 ? (~integerPart(0) >> NumUnusedHighBits)
3270 /// Make this number the smallest magnitude denormal number in the given
3272 void IEEEFloat::makeSmallest(bool Negative) {
3273 // We want (in interchange format):
3274 // sign = {Negative}
3276 // significand = 0..01
3277 category = fcNormal;
3279 exponent = semantics->minExponent;
3280 APInt::tcSet(significandParts(), 1, partCount());
3283 void IEEEFloat::makeSmallestNormalized(bool Negative) {
3284 // We want (in interchange format):
3285 // sign = {Negative}
3287 // significand = 10..0
3289 category = fcNormal;
3292 exponent = semantics->minExponent;
3293 significandParts()[partCountForBits(semantics->precision) - 1] |=
3294 (((integerPart)1) << ((semantics->precision - 1) % integerPartWidth));
3297 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
3298 initFromAPInt(&Sem, API);
3301 IEEEFloat::IEEEFloat(float f) {
3302 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
3305 IEEEFloat::IEEEFloat(double d) {
3306 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
3310 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3311 Buffer.append(Str.begin(), Str.end());
3314 /// Removes data from the given significand until it is no more
3315 /// precise than is required for the desired precision.
3316 void AdjustToPrecision(APInt &significand,
3317 int &exp, unsigned FormatPrecision) {
3318 unsigned bits = significand.getActiveBits();
3320 // 196/59 is a very slight overestimate of lg_2(10).
3321 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3323 if (bits <= bitsRequired) return;
3325 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3326 if (!tensRemovable) return;
3328 exp += tensRemovable;
3330 APInt divisor(significand.getBitWidth(), 1);
3331 APInt powten(significand.getBitWidth(), 10);
3333 if (tensRemovable & 1)
3335 tensRemovable >>= 1;
3336 if (!tensRemovable) break;
3340 significand = significand.udiv(divisor);
3342 // Truncate the significand down to its active bit count.
3343 significand = significand.trunc(significand.getActiveBits());
3347 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3348 int &exp, unsigned FormatPrecision) {
3349 unsigned N = buffer.size();
3350 if (N <= FormatPrecision) return;
3352 // The most significant figures are the last ones in the buffer.
3353 unsigned FirstSignificant = N - FormatPrecision;
3356 // FIXME: this probably shouldn't use 'round half up'.
3358 // Rounding down is just a truncation, except we also want to drop
3359 // trailing zeros from the new result.
3360 if (buffer[FirstSignificant - 1] < '5') {
3361 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3364 exp += FirstSignificant;
3365 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3369 // Rounding up requires a decimal add-with-carry. If we continue
3370 // the carry, the newly-introduced zeros will just be truncated.
3371 for (unsigned I = FirstSignificant; I != N; ++I) {
3372 if (buffer[I] == '9') {
3380 // If we carried through, we have exactly one digit of precision.
3381 if (FirstSignificant == N) {
3382 exp += FirstSignificant;
3384 buffer.push_back('1');
3388 exp += FirstSignificant;
3389 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3393 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
3394 unsigned FormatMaxPadding, bool TruncateZero) const {
3398 return append(Str, "-Inf");
3400 return append(Str, "+Inf");
3402 case fcNaN: return append(Str, "NaN");
3408 if (!FormatMaxPadding) {
3410 append(Str, "0.0E+0");
3413 if (FormatPrecision > 1)
3414 Str.append(FormatPrecision - 1, '0');
3415 append(Str, "e+00");
3428 // Decompose the number into an APInt and an exponent.
3429 int exp = exponent - ((int) semantics->precision - 1);
3430 APInt significand(semantics->precision,
3431 makeArrayRef(significandParts(),
3432 partCountForBits(semantics->precision)));
3434 // Set FormatPrecision if zero. We want to do this before we
3435 // truncate trailing zeros, as those are part of the precision.
3436 if (!FormatPrecision) {
3437 // We use enough digits so the number can be round-tripped back to an
3438 // APFloat. The formula comes from "How to Print Floating-Point Numbers
3439 // Accurately" by Steele and White.
3440 // FIXME: Using a formula based purely on the precision is conservative;
3441 // we can print fewer digits depending on the actual value being printed.
3443 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
3444 FormatPrecision = 2 + semantics->precision * 59 / 196;
3447 // Ignore trailing binary zeros.
3448 int trailingZeros = significand.countTrailingZeros();
3449 exp += trailingZeros;
3450 significand.lshrInPlace(trailingZeros);
3452 // Change the exponent from 2^e to 10^e.
3455 } else if (exp > 0) {
3457 significand = significand.zext(semantics->precision + exp);
3458 significand <<= exp;
3460 } else { /* exp < 0 */
3463 // We transform this using the identity:
3464 // (N)(2^-e) == (N)(5^e)(10^-e)
3465 // This means we have to multiply N (the significand) by 5^e.
3466 // To avoid overflow, we have to operate on numbers large
3467 // enough to store N * 5^e:
3468 // log2(N * 5^e) == log2(N) + e * log2(5)
3469 // <= semantics->precision + e * 137 / 59
3470 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3472 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3474 // Multiply significand by 5^e.
3475 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3476 significand = significand.zext(precision);
3477 APInt five_to_the_i(precision, 5);
3479 if (texp & 1) significand *= five_to_the_i;
3483 five_to_the_i *= five_to_the_i;
3487 AdjustToPrecision(significand, exp, FormatPrecision);
3489 SmallVector<char, 256> buffer;
3492 unsigned precision = significand.getBitWidth();
3493 APInt ten(precision, 10);
3494 APInt digit(precision, 0);
3496 bool inTrail = true;
3497 while (significand != 0) {
3498 // digit <- significand % 10
3499 // significand <- significand / 10
3500 APInt::udivrem(significand, ten, significand, digit);
3502 unsigned d = digit.getZExtValue();
3504 // Drop trailing zeros.
3505 if (inTrail && !d) exp++;
3507 buffer.push_back((char) ('0' + d));
3512 assert(!buffer.empty() && "no characters in buffer!");
3514 // Drop down to FormatPrecision.
3515 // TODO: don't do more precise calculations above than are required.
3516 AdjustToPrecision(buffer, exp, FormatPrecision);
3518 unsigned NDigits = buffer.size();
3520 // Check whether we should use scientific notation.
3521 bool FormatScientific;
3522 if (!FormatMaxPadding)
3523 FormatScientific = true;
3528 // But we shouldn't make the number look more precise than it is.
3529 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3530 NDigits + (unsigned) exp > FormatPrecision);
3532 // Power of the most significant digit.
3533 int MSD = exp + (int) (NDigits - 1);
3536 FormatScientific = false;
3538 // 765e-5 == 0.00765
3540 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3545 // Scientific formatting is pretty straightforward.
3546 if (FormatScientific) {
3547 exp += (NDigits - 1);
3549 Str.push_back(buffer[NDigits-1]);
3551 if (NDigits == 1 && TruncateZero)
3554 for (unsigned I = 1; I != NDigits; ++I)
3555 Str.push_back(buffer[NDigits-1-I]);
3556 // Fill with zeros up to FormatPrecision.
3557 if (!TruncateZero && FormatPrecision > NDigits - 1)
3558 Str.append(FormatPrecision - NDigits + 1, '0');
3559 // For !TruncateZero we use lower 'e'.
3560 Str.push_back(TruncateZero ? 'E' : 'e');
3562 Str.push_back(exp >= 0 ? '+' : '-');
3563 if (exp < 0) exp = -exp;
3564 SmallVector<char, 6> expbuf;
3566 expbuf.push_back((char) ('0' + (exp % 10)));
3569 // Exponent always at least two digits if we do not truncate zeros.
3570 if (!TruncateZero && expbuf.size() < 2)
3571 expbuf.push_back('0');
3572 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3573 Str.push_back(expbuf[E-1-I]);
3577 // Non-scientific, positive exponents.
3579 for (unsigned I = 0; I != NDigits; ++I)
3580 Str.push_back(buffer[NDigits-1-I]);
3581 for (unsigned I = 0; I != (unsigned) exp; ++I)
3586 // Non-scientific, negative exponents.
3588 // The number of digits to the left of the decimal point.
3589 int NWholeDigits = exp + (int) NDigits;
3592 if (NWholeDigits > 0) {
3593 for (; I != (unsigned) NWholeDigits; ++I)
3594 Str.push_back(buffer[NDigits-I-1]);
3597 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3601 for (unsigned Z = 1; Z != NZeros; ++Z)
3605 for (; I != NDigits; ++I)
3606 Str.push_back(buffer[NDigits-I-1]);
3609 bool IEEEFloat::getExactInverse(APFloat *inv) const {
3610 // Special floats and denormals have no exact inverse.
3611 if (!isFiniteNonZero())
3614 // Check that the number is a power of two by making sure that only the
3615 // integer bit is set in the significand.
3616 if (significandLSB() != semantics->precision - 1)
3620 IEEEFloat reciprocal(*semantics, 1ULL);
3621 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3624 // Avoid multiplication with a denormal, it is not safe on all platforms and
3625 // may be slower than a normal division.
3626 if (reciprocal.isDenormal())
3629 assert(reciprocal.isFiniteNonZero() &&
3630 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3633 *inv = APFloat(reciprocal, *semantics);
3638 bool IEEEFloat::isSignaling() const {
3642 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
3643 // first bit of the trailing significand being 0.
3644 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
3647 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
3649 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
3650 /// appropriate sign switching before/after the computation.
3651 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
3652 // If we are performing nextDown, swap sign so we have -x.
3656 // Compute nextUp(x)
3657 opStatus result = opOK;
3659 // Handle each float category separately.
3662 // nextUp(+inf) = +inf
3665 // nextUp(-inf) = -getLargest()
3669 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
3670 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
3671 // change the payload.
3672 if (isSignaling()) {
3673 result = opInvalidOp;
3674 // For consistency, propagate the sign of the sNaN to the qNaN.
3675 makeNaN(false, isNegative(), nullptr);
3679 // nextUp(pm 0) = +getSmallest()
3680 makeSmallest(false);
3683 // nextUp(-getSmallest()) = -0
3684 if (isSmallest() && isNegative()) {
3685 APInt::tcSet(significandParts(), 0, partCount());
3691 // nextUp(getLargest()) == INFINITY
3692 if (isLargest() && !isNegative()) {
3693 APInt::tcSet(significandParts(), 0, partCount());
3694 category = fcInfinity;
3695 exponent = semantics->maxExponent + 1;
3699 // nextUp(normal) == normal + inc.
3701 // If we are negative, we need to decrement the significand.
3703 // We only cross a binade boundary that requires adjusting the exponent
3705 // 1. exponent != semantics->minExponent. This implies we are not in the
3706 // smallest binade or are dealing with denormals.
3707 // 2. Our significand excluding the integral bit is all zeros.
3708 bool WillCrossBinadeBoundary =
3709 exponent != semantics->minExponent && isSignificandAllZeros();
3711 // Decrement the significand.
3713 // We always do this since:
3714 // 1. If we are dealing with a non-binade decrement, by definition we
3715 // just decrement the significand.
3716 // 2. If we are dealing with a normal -> normal binade decrement, since
3717 // we have an explicit integral bit the fact that all bits but the
3718 // integral bit are zero implies that subtracting one will yield a
3719 // significand with 0 integral bit and 1 in all other spots. Thus we
3720 // must just adjust the exponent and set the integral bit to 1.
3721 // 3. If we are dealing with a normal -> denormal binade decrement,
3722 // since we set the integral bit to 0 when we represent denormals, we
3723 // just decrement the significand.
3724 integerPart *Parts = significandParts();
3725 APInt::tcDecrement(Parts, partCount());
3727 if (WillCrossBinadeBoundary) {
3728 // Our result is a normal number. Do the following:
3729 // 1. Set the integral bit to 1.
3730 // 2. Decrement the exponent.
3731 APInt::tcSetBit(Parts, semantics->precision - 1);
3735 // If we are positive, we need to increment the significand.
3737 // We only cross a binade boundary that requires adjusting the exponent if
3738 // the input is not a denormal and all of said input's significand bits
3739 // are set. If all of said conditions are true: clear the significand, set
3740 // the integral bit to 1, and increment the exponent. If we have a
3741 // denormal always increment since moving denormals and the numbers in the
3742 // smallest normal binade have the same exponent in our representation.
3743 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
3745 if (WillCrossBinadeBoundary) {
3746 integerPart *Parts = significandParts();
3747 APInt::tcSet(Parts, 0, partCount());
3748 APInt::tcSetBit(Parts, semantics->precision - 1);
3749 assert(exponent != semantics->maxExponent &&
3750 "We can not increment an exponent beyond the maxExponent allowed"
3751 " by the given floating point semantics.");
3754 incrementSignificand();
3760 // If we are performing nextDown, swap sign so we have -nextUp(-x)
3767 void IEEEFloat::makeInf(bool Negative) {
3768 category = fcInfinity;
3770 exponent = semantics->maxExponent + 1;
3771 APInt::tcSet(significandParts(), 0, partCount());
3774 void IEEEFloat::makeZero(bool Negative) {
3777 exponent = semantics->minExponent-1;
3778 APInt::tcSet(significandParts(), 0, partCount());
3781 void IEEEFloat::makeQuiet() {
3783 APInt::tcSetBit(significandParts(), semantics->precision - 2);
3786 int ilogb(const IEEEFloat &Arg) {
3788 return IEEEFloat::IEK_NaN;
3790 return IEEEFloat::IEK_Zero;
3791 if (Arg.isInfinity())
3792 return IEEEFloat::IEK_Inf;
3793 if (!Arg.isDenormal())
3794 return Arg.exponent;
3796 IEEEFloat Normalized(Arg);
3797 int SignificandBits = Arg.getSemantics().precision - 1;
3799 Normalized.exponent += SignificandBits;
3800 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
3801 return Normalized.exponent - SignificandBits;
3804 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
3805 auto MaxExp = X.getSemantics().maxExponent;
3806 auto MinExp = X.getSemantics().minExponent;
3808 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
3809 // overflow; clamp it to a safe range before adding, but ensure that the range
3810 // is large enough that the clamp does not change the result. The range we
3811 // need to support is the difference between the largest possible exponent and
3812 // the normalized exponent of half the smallest denormal.
3814 int SignificandBits = X.getSemantics().precision - 1;
3815 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
3817 // Clamp to one past the range ends to let normalize handle overlflow.
3818 X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement);
3819 X.normalize(RoundingMode, lfExactlyZero);
3825 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
3828 // Quiet signalling nans.
3829 if (Exp == IEEEFloat::IEK_NaN) {
3830 IEEEFloat Quiet(Val);
3835 if (Exp == IEEEFloat::IEK_Inf)
3838 // 1 is added because frexp is defined to return a normalized fraction in
3839 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
3840 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
3841 return scalbn(Val, -Exp, RM);
3844 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
3846 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
3847 assert(Semantics == &semPPCDoubleDouble);
3850 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
3852 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
3853 APFloat(semIEEEdouble, uninitialized)}) {
3854 assert(Semantics == &semPPCDoubleDouble);
3857 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
3858 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
3859 APFloat(semIEEEdouble)}) {
3860 assert(Semantics == &semPPCDoubleDouble);
3863 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
3865 Floats(new APFloat[2]{
3866 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
3867 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
3868 assert(Semantics == &semPPCDoubleDouble);
3871 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
3874 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
3875 assert(Semantics == &semPPCDoubleDouble);
3876 assert(&Floats[0].getSemantics() == &semIEEEdouble);
3877 assert(&Floats[1].getSemantics() == &semIEEEdouble);
3880 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
3881 : Semantics(RHS.Semantics),
3882 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
3883 APFloat(RHS.Floats[1])}
3885 assert(Semantics == &semPPCDoubleDouble);
3888 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
3889 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
3890 RHS.Semantics = &semBogus;
3891 assert(Semantics == &semPPCDoubleDouble);
3894 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
3895 if (Semantics == RHS.Semantics && RHS.Floats) {
3896 Floats[0] = RHS.Floats[0];
3897 Floats[1] = RHS.Floats[1];
3898 } else if (this != &RHS) {
3899 this->~DoubleAPFloat();
3900 new (this) DoubleAPFloat(RHS);
3905 // Implement addition, subtraction, multiplication and division based on:
3906 // "Software for Doubled-Precision Floating-Point Computations",
3907 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
3908 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
3909 const APFloat &c, const APFloat &cc,
3913 Status |= z.add(c, RM);
3914 if (!z.isFinite()) {
3915 if (!z.isInfinity()) {
3916 Floats[0] = std::move(z);
3917 Floats[1].makeZero(/* Neg = */ false);
3918 return (opStatus)Status;
3921 auto AComparedToC = a.compareAbsoluteValue(c);
3923 Status |= z.add(aa, RM);
3924 if (AComparedToC == APFloat::cmpGreaterThan) {
3925 // z = cc + aa + c + a;
3926 Status |= z.add(c, RM);
3927 Status |= z.add(a, RM);
3929 // z = cc + aa + a + c;
3930 Status |= z.add(a, RM);
3931 Status |= z.add(c, RM);
3933 if (!z.isFinite()) {
3934 Floats[0] = std::move(z);
3935 Floats[1].makeZero(/* Neg = */ false);
3936 return (opStatus)Status;
3940 Status |= zz.add(cc, RM);
3941 if (AComparedToC == APFloat::cmpGreaterThan) {
3942 // Floats[1] = a - z + c + zz;
3944 Status |= Floats[1].subtract(z, RM);
3945 Status |= Floats[1].add(c, RM);
3946 Status |= Floats[1].add(zz, RM);
3948 // Floats[1] = c - z + a + zz;
3950 Status |= Floats[1].subtract(z, RM);
3951 Status |= Floats[1].add(a, RM);
3952 Status |= Floats[1].add(zz, RM);
3957 Status |= q.subtract(z, RM);
3959 // zz = q + c + (a - (q + z)) + aa + cc;
3960 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
3962 Status |= zz.add(c, RM);
3963 Status |= q.add(z, RM);
3964 Status |= q.subtract(a, RM);
3966 Status |= zz.add(q, RM);
3967 Status |= zz.add(aa, RM);
3968 Status |= zz.add(cc, RM);
3969 if (zz.isZero() && !zz.isNegative()) {
3970 Floats[0] = std::move(z);
3971 Floats[1].makeZero(/* Neg = */ false);
3975 Status |= Floats[0].add(zz, RM);
3976 if (!Floats[0].isFinite()) {
3977 Floats[1].makeZero(/* Neg = */ false);
3978 return (opStatus)Status;
3980 Floats[1] = std::move(z);
3981 Status |= Floats[1].subtract(Floats[0], RM);
3982 Status |= Floats[1].add(zz, RM);
3984 return (opStatus)Status;
3987 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
3988 const DoubleAPFloat &RHS,
3991 if (LHS.getCategory() == fcNaN) {
3995 if (RHS.getCategory() == fcNaN) {
3999 if (LHS.getCategory() == fcZero) {
4003 if (RHS.getCategory() == fcZero) {
4007 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4008 LHS.isNegative() != RHS.isNegative()) {
4009 Out.makeNaN(false, Out.isNegative(), nullptr);
4012 if (LHS.getCategory() == fcInfinity) {
4016 if (RHS.getCategory() == fcInfinity) {
4020 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4022 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4024 assert(&A.getSemantics() == &semIEEEdouble);
4025 assert(&AA.getSemantics() == &semIEEEdouble);
4026 assert(&C.getSemantics() == &semIEEEdouble);
4027 assert(&CC.getSemantics() == &semIEEEdouble);
4028 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4029 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4030 return Out.addImpl(A, AA, C, CC, RM);
4033 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4035 return addWithSpecial(*this, RHS, *this, RM);
4038 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4041 auto Ret = add(RHS, RM);
4046 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
4047 APFloat::roundingMode RM) {
4048 const auto &LHS = *this;
4050 /* Interesting observation: For special categories, finding the lowest
4051 common ancestor of the following layered graph gives the correct
4060 e.g. NaN * NaN = NaN
4062 Normal * Zero = Zero
4065 if (LHS.getCategory() == fcNaN) {
4069 if (RHS.getCategory() == fcNaN) {
4073 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4074 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4075 Out.makeNaN(false, false, nullptr);
4078 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4082 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4086 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4087 "Special cases not handled exhaustively");
4090 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4093 Status |= T.multiply(C, RM);
4094 if (!T.isFiniteNonZero()) {
4096 Floats[1].makeZero(/* Neg = */ false);
4097 return (opStatus)Status;
4100 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4103 Status |= Tau.fusedMultiplyAdd(C, T, RM);
4108 Status |= V.multiply(D, RM);
4111 Status |= W.multiply(C, RM);
4112 Status |= V.add(W, RM);
4114 Status |= Tau.add(V, RM);
4118 Status |= U.add(Tau, RM);
4121 if (!U.isFinite()) {
4122 Floats[1].makeZero(/* Neg = */ false);
4124 // Floats[1] = (t - u) + tau
4125 Status |= T.subtract(U, RM);
4126 Status |= T.add(Tau, RM);
4129 return (opStatus)Status;
4132 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
4133 APFloat::roundingMode RM) {
4134 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4135 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4137 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4138 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4142 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
4143 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4144 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4146 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4147 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4151 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
4152 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4153 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4154 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4155 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4160 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
4161 const DoubleAPFloat &Addend,
4162 APFloat::roundingMode RM) {
4163 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4164 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4165 auto Ret = Tmp.fusedMultiplyAdd(
4166 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
4167 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
4168 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4172 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
4173 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4174 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4175 auto Ret = Tmp.roundToIntegral(RM);
4176 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4180 void DoubleAPFloat::changeSign() {
4181 Floats[0].changeSign();
4182 Floats[1].changeSign();
4186 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
4187 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
4188 if (Result != cmpEqual)
4190 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
4191 if (Result == cmpLessThan || Result == cmpGreaterThan) {
4192 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
4193 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
4194 if (Against && !RHSAgainst)
4196 if (!Against && RHSAgainst)
4197 return cmpGreaterThan;
4198 if (!Against && !RHSAgainst)
4200 if (Against && RHSAgainst)
4201 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
4206 APFloat::fltCategory DoubleAPFloat::getCategory() const {
4207 return Floats[0].getCategory();
4210 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
4212 void DoubleAPFloat::makeInf(bool Neg) {
4213 Floats[0].makeInf(Neg);
4214 Floats[1].makeZero(/* Neg = */ false);
4217 void DoubleAPFloat::makeZero(bool Neg) {
4218 Floats[0].makeZero(Neg);
4219 Floats[1].makeZero(/* Neg = */ false);
4222 void DoubleAPFloat::makeLargest(bool Neg) {
4223 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4224 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
4225 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
4230 void DoubleAPFloat::makeSmallest(bool Neg) {
4231 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4232 Floats[0].makeSmallest(Neg);
4233 Floats[1].makeZero(/* Neg = */ false);
4236 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
4237 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4238 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
4240 Floats[0].changeSign();
4241 Floats[1].makeZero(/* Neg = */ false);
4244 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
4245 Floats[0].makeNaN(SNaN, Neg, fill);
4246 Floats[1].makeZero(/* Neg = */ false);
4249 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
4250 auto Result = Floats[0].compare(RHS.Floats[0]);
4251 // |Float[0]| > |Float[1]|
4252 if (Result == APFloat::cmpEqual)
4253 return Floats[1].compare(RHS.Floats[1]);
4257 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
4258 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
4259 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
4262 hash_code hash_value(const DoubleAPFloat &Arg) {
4264 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
4265 return hash_combine(Arg.Semantics);
4268 APInt DoubleAPFloat::bitcastToAPInt() const {
4269 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4271 Floats[0].bitcastToAPInt().getRawData()[0],
4272 Floats[1].bitcastToAPInt().getRawData()[0],
4274 return APInt(128, 2, Data);
4277 APFloat::opStatus DoubleAPFloat::convertFromString(StringRef S,
4279 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4280 APFloat Tmp(semPPCDoubleDoubleLegacy);
4281 auto Ret = Tmp.convertFromString(S, RM);
4282 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4286 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
4287 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4288 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4289 auto Ret = Tmp.next(nextDown);
4290 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4295 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
4296 unsigned int Width, bool IsSigned,
4297 roundingMode RM, bool *IsExact) const {
4298 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4299 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4300 .convertToInteger(Input, Width, IsSigned, RM, IsExact);
4303 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
4306 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4307 APFloat Tmp(semPPCDoubleDoubleLegacy);
4308 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
4309 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4314 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
4315 unsigned int InputSize,
4316 bool IsSigned, roundingMode RM) {
4317 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4318 APFloat Tmp(semPPCDoubleDoubleLegacy);
4319 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
4320 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4325 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
4326 unsigned int InputSize,
4327 bool IsSigned, roundingMode RM) {
4328 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4329 APFloat Tmp(semPPCDoubleDoubleLegacy);
4330 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
4331 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4335 unsigned int DoubleAPFloat::convertToHexString(char *DST,
4336 unsigned int HexDigits,
4338 roundingMode RM) const {
4339 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4340 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4341 .convertToHexString(DST, HexDigits, UpperCase, RM);
4344 bool DoubleAPFloat::isDenormal() const {
4345 return getCategory() == fcNormal &&
4346 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
4347 // (double)(Hi + Lo) == Hi defines a normal number.
4348 Floats[0].compare(Floats[0] + Floats[1]) != cmpEqual);
4351 bool DoubleAPFloat::isSmallest() const {
4352 if (getCategory() != fcNormal)
4354 DoubleAPFloat Tmp(*this);
4355 Tmp.makeSmallest(this->isNegative());
4356 return Tmp.compare(*this) == cmpEqual;
4359 bool DoubleAPFloat::isLargest() const {
4360 if (getCategory() != fcNormal)
4362 DoubleAPFloat Tmp(*this);
4363 Tmp.makeLargest(this->isNegative());
4364 return Tmp.compare(*this) == cmpEqual;
4367 bool DoubleAPFloat::isInteger() const {
4368 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4369 return Floats[0].isInteger() && Floats[1].isInteger();
4372 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
4373 unsigned FormatPrecision,
4374 unsigned FormatMaxPadding,
4375 bool TruncateZero) const {
4376 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4377 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4378 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
4381 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
4382 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4383 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4385 return Tmp.getExactInverse(nullptr);
4386 APFloat Inv(semPPCDoubleDoubleLegacy);
4387 auto Ret = Tmp.getExactInverse(&Inv);
4388 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
4392 DoubleAPFloat scalbn(DoubleAPFloat Arg, int Exp, APFloat::roundingMode RM) {
4393 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4394 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
4395 scalbn(Arg.Floats[1], Exp, RM));
4398 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
4399 APFloat::roundingMode RM) {
4400 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4401 APFloat First = frexp(Arg.Floats[0], Exp, RM);
4402 APFloat Second = Arg.Floats[1];
4403 if (Arg.getCategory() == APFloat::fcNormal)
4404 Second = scalbn(Second, -Exp, RM);
4405 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
4408 } // End detail namespace
4410 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
4411 if (usesLayout<IEEEFloat>(Semantics)) {
4412 new (&IEEE) IEEEFloat(std::move(F));
4415 if (usesLayout<DoubleAPFloat>(Semantics)) {
4417 DoubleAPFloat(Semantics, APFloat(std::move(F), F.getSemantics()),
4418 APFloat(semIEEEdouble));
4421 llvm_unreachable("Unexpected semantics");
4424 APFloat::opStatus APFloat::convertFromString(StringRef Str, roundingMode RM) {
4425 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
4428 hash_code hash_value(const APFloat &Arg) {
4429 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
4430 return hash_value(Arg.U.IEEE);
4431 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
4432 return hash_value(Arg.U.Double);
4433 llvm_unreachable("Unexpected semantics");
4436 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
4437 : APFloat(Semantics) {
4438 convertFromString(S, rmNearestTiesToEven);
4441 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
4442 roundingMode RM, bool *losesInfo) {
4443 if (&getSemantics() == &ToSemantics)
4445 if (usesLayout<IEEEFloat>(getSemantics()) &&
4446 usesLayout<IEEEFloat>(ToSemantics))
4447 return U.IEEE.convert(ToSemantics, RM, losesInfo);
4448 if (usesLayout<IEEEFloat>(getSemantics()) &&
4449 usesLayout<DoubleAPFloat>(ToSemantics)) {
4450 assert(&ToSemantics == &semPPCDoubleDouble);
4451 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
4452 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
4455 if (usesLayout<DoubleAPFloat>(getSemantics()) &&
4456 usesLayout<IEEEFloat>(ToSemantics)) {
4457 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
4458 *this = APFloat(std::move(getIEEE()), ToSemantics);
4461 llvm_unreachable("Unexpected semantics");
4464 APFloat APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) {
4468 return APFloat(semIEEEhalf, APInt::getAllOnesValue(BitWidth));
4470 return APFloat(semIEEEsingle, APInt::getAllOnesValue(BitWidth));
4472 return APFloat(semIEEEdouble, APInt::getAllOnesValue(BitWidth));
4474 return APFloat(semX87DoubleExtended, APInt::getAllOnesValue(BitWidth));
4476 return APFloat(semIEEEquad, APInt::getAllOnesValue(BitWidth));
4478 llvm_unreachable("Unknown floating bit width");
4481 assert(BitWidth == 128);
4482 return APFloat(semPPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
4486 void APFloat::print(raw_ostream &OS) const {
4487 SmallVector<char, 16> Buffer;
4489 OS << Buffer << "\n";
4492 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4493 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }
4496 void APFloat::Profile(FoldingSetNodeID &NID) const {
4497 NID.Add(bitcastToAPInt());
4500 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
4501 an APSInt, whose initial bit-width and signed-ness are used to determine the
4502 precision of the conversion.
4504 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
4505 roundingMode rounding_mode,
4506 bool *isExact) const {
4507 unsigned bitWidth = result.getBitWidth();
4508 SmallVector<uint64_t, 4> parts(result.getNumWords());
4509 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
4510 rounding_mode, isExact);
4511 // Keeps the original signed-ness.
4512 result = APInt(bitWidth, parts);
4516 } // End llvm namespace
4518 #undef APFLOAT_DISPATCH_ON_SEMANTICS