1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/FoldingSet.h"
19 #include "llvm/ADT/Hashing.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/ErrorHandling.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
29 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
31 if (usesLayout<IEEEFloat>(getSemantics())) \
32 return U.IEEE.METHOD_CALL; \
33 if (usesLayout<DoubleAPFloat>(getSemantics())) \
34 return U.Double.METHOD_CALL; \
35 llvm_unreachable("Unexpected semantics"); \
40 // TODO: Remove these and use APInt qualified types directly.
41 typedef APInt::WordType integerPart;
42 const unsigned int integerPartWidth = APInt::APINT_BITS_PER_WORD;
44 /// A macro used to combine two fcCategory enums into one key which can be used
45 /// in a switch statement to classify how the interaction of two APFloat's
46 /// categories affects an operation.
48 /// TODO: If clang source code is ever allowed to use constexpr in its own
49 /// codebase, change this into a static inline function.
50 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
52 /* Assumed in hexadecimal significand parsing, and conversion to
53 hexadecimal strings. */
54 static_assert(integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
57 /* Represents floating point arithmetic semantics. */
59 /* The largest E such that 2^E is representable; this matches the
60 definition of IEEE 754. */
61 APFloatBase::ExponentType maxExponent;
63 /* The smallest E such that 2^E is a normalized number; this
64 matches the definition of IEEE 754. */
65 APFloatBase::ExponentType minExponent;
67 /* Number of bits in the significand. This includes the integer
69 unsigned int precision;
71 /* Number of bits actually used in the semantics. */
72 unsigned int sizeInBits;
75 static const fltSemantics semIEEEhalf = {15, -14, 11, 16};
76 static const fltSemantics semIEEEsingle = {127, -126, 24, 32};
77 static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
78 static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};
79 static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
80 static const fltSemantics semBogus = {0, 0, 0, 0};
82 /* The IBM double-double semantics. Such a number consists of a pair of IEEE
83 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
84 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
85 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
86 to each other, and two 11-bit exponents.
88 Note: we need to make the value different from semBogus as otherwise
89 an unsafe optimization may collapse both values to a single address,
90 and we heavily rely on them having distinct addresses. */
91 static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0};
93 /* These are legacy semantics for the fallback, inaccrurate implementation of
94 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
95 operation. It's equivalent to having an IEEE number with consecutive 106
96 bits of mantissa and 11 bits of exponent.
98 It's not equivalent to IBM double-double. For example, a legit IBM
99 double-double, 1 + epsilon:
101 1 + epsilon = 1 + (1 >> 1076)
103 is not representable by a consecutive 106 bits of mantissa.
105 Currently, these semantics are used in the following way:
107 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
108 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
109 semPPCDoubleDoubleLegacy -> IEEE operations
111 We use bitcastToAPInt() to get the bit representation (in APInt) of the
112 underlying IEEEdouble, then use the APInt constructor to construct the
115 TODO: Implement all operations in semPPCDoubleDouble, and delete these
117 static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
120 const fltSemantics &APFloatBase::IEEEhalf() {
123 const fltSemantics &APFloatBase::IEEEsingle() {
124 return semIEEEsingle;
126 const fltSemantics &APFloatBase::IEEEdouble() {
127 return semIEEEdouble;
129 const fltSemantics &APFloatBase::IEEEquad() {
132 const fltSemantics &APFloatBase::x87DoubleExtended() {
133 return semX87DoubleExtended;
135 const fltSemantics &APFloatBase::Bogus() {
138 const fltSemantics &APFloatBase::PPCDoubleDouble() {
139 return semPPCDoubleDouble;
142 /* A tight upper bound on number of parts required to hold the value
145 power * 815 / (351 * integerPartWidth) + 1
147 However, whilst the result may require only this many parts,
148 because we are multiplying two values to get it, the
149 multiplication may require an extra part with the excess part
150 being zero (consider the trivial case of 1 * 1, tcFullMultiply
151 requires two parts to hold the single-part result). So we add an
152 extra one to guarantee enough space whilst multiplying. */
153 const unsigned int maxExponent = 16383;
154 const unsigned int maxPrecision = 113;
155 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
156 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
157 / (351 * integerPartWidth));
159 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
160 return semantics.precision;
162 APFloatBase::ExponentType
163 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
164 return semantics.maxExponent;
166 APFloatBase::ExponentType
167 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
168 return semantics.minExponent;
170 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
171 return semantics.sizeInBits;
174 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
175 return Sem.sizeInBits;
178 /* A bunch of private, handy routines. */
180 static inline unsigned int
181 partCountForBits(unsigned int bits)
183 return ((bits) + integerPartWidth - 1) / integerPartWidth;
186 /* Returns 0U-9U. Return values >= 10U are not digits. */
187 static inline unsigned int
188 decDigitValue(unsigned int c)
193 /* Return the value of a decimal exponent of the form
196 If the exponent overflows, returns a large exponent with the
199 readExponent(StringRef::iterator begin, StringRef::iterator end)
202 unsigned int absExponent;
203 const unsigned int overlargeExponent = 24000; /* FIXME. */
204 StringRef::iterator p = begin;
206 assert(p != end && "Exponent has no digits");
208 isNegative = (*p == '-');
209 if (*p == '-' || *p == '+') {
211 assert(p != end && "Exponent has no digits");
214 absExponent = decDigitValue(*p++);
215 assert(absExponent < 10U && "Invalid character in exponent");
217 for (; p != end; ++p) {
220 value = decDigitValue(*p);
221 assert(value < 10U && "Invalid character in exponent");
223 value += absExponent * 10;
224 if (absExponent >= overlargeExponent) {
225 absExponent = overlargeExponent;
226 p = end; /* outwit assert below */
232 assert(p == end && "Invalid exponent in exponent");
235 return -(int) absExponent;
237 return (int) absExponent;
240 /* This is ugly and needs cleaning up, but I don't immediately see
241 how whilst remaining safe. */
243 totalExponent(StringRef::iterator p, StringRef::iterator end,
244 int exponentAdjustment)
246 int unsignedExponent;
247 bool negative, overflow;
250 assert(p != end && "Exponent has no digits");
252 negative = *p == '-';
253 if (*p == '-' || *p == '+') {
255 assert(p != end && "Exponent has no digits");
258 unsignedExponent = 0;
260 for (; p != end; ++p) {
263 value = decDigitValue(*p);
264 assert(value < 10U && "Invalid character in exponent");
266 unsignedExponent = unsignedExponent * 10 + value;
267 if (unsignedExponent > 32767) {
273 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
277 exponent = unsignedExponent;
279 exponent = -exponent;
280 exponent += exponentAdjustment;
281 if (exponent > 32767 || exponent < -32768)
286 exponent = negative ? -32768: 32767;
291 static StringRef::iterator
292 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
293 StringRef::iterator *dot)
295 StringRef::iterator p = begin;
297 while (p != end && *p == '0')
300 if (p != end && *p == '.') {
303 assert(end - begin != 1 && "Significand has no digits");
305 while (p != end && *p == '0')
312 /* Given a normal decimal floating point number of the form
316 where the decimal point and exponent are optional, fill out the
317 structure D. Exponent is appropriate if the significand is
318 treated as an integer, and normalizedExponent if the significand
319 is taken to have the decimal point after a single leading
322 If the value is zero, V->firstSigDigit points to a non-digit, and
323 the return exponent is zero.
326 const char *firstSigDigit;
327 const char *lastSigDigit;
329 int normalizedExponent;
333 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
336 StringRef::iterator dot = end;
337 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
339 D->firstSigDigit = p;
341 D->normalizedExponent = 0;
343 for (; p != end; ++p) {
345 assert(dot == end && "String contains multiple dots");
350 if (decDigitValue(*p) >= 10U)
355 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
356 assert(p != begin && "Significand has no digits");
357 assert((dot == end || p - begin != 1) && "Significand has no digits");
359 /* p points to the first non-digit in the string */
360 D->exponent = readExponent(p + 1, end);
362 /* Implied decimal point? */
367 /* If number is all zeroes accept any exponent. */
368 if (p != D->firstSigDigit) {
369 /* Drop insignificant trailing zeroes. */
374 while (p != begin && *p == '0');
375 while (p != begin && *p == '.');
378 /* Adjust the exponents for any decimal point. */
379 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
380 D->normalizedExponent = (D->exponent +
381 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
382 - (dot > D->firstSigDigit && dot < p)));
388 /* Return the trailing fraction of a hexadecimal number.
389 DIGITVALUE is the first hex digit of the fraction, P points to
392 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
393 unsigned int digitValue)
395 unsigned int hexDigit;
397 /* If the first trailing digit isn't 0 or 8 we can work out the
398 fraction immediately. */
400 return lfMoreThanHalf;
401 else if (digitValue < 8 && digitValue > 0)
402 return lfLessThanHalf;
404 // Otherwise we need to find the first non-zero digit.
405 while (p != end && (*p == '0' || *p == '.'))
408 assert(p != end && "Invalid trailing hexadecimal fraction!");
410 hexDigit = hexDigitValue(*p);
412 /* If we ran off the end it is exactly zero or one-half, otherwise
415 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
417 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
420 /* Return the fraction lost were a bignum truncated losing the least
421 significant BITS bits. */
423 lostFractionThroughTruncation(const integerPart *parts,
424 unsigned int partCount,
429 lsb = APInt::tcLSB(parts, partCount);
431 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
433 return lfExactlyZero;
435 return lfExactlyHalf;
436 if (bits <= partCount * integerPartWidth &&
437 APInt::tcExtractBit(parts, bits - 1))
438 return lfMoreThanHalf;
440 return lfLessThanHalf;
443 /* Shift DST right BITS bits noting lost fraction. */
445 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
447 lostFraction lost_fraction;
449 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
451 APInt::tcShiftRight(dst, parts, bits);
453 return lost_fraction;
456 /* Combine the effect of two lost fractions. */
458 combineLostFractions(lostFraction moreSignificant,
459 lostFraction lessSignificant)
461 if (lessSignificant != lfExactlyZero) {
462 if (moreSignificant == lfExactlyZero)
463 moreSignificant = lfLessThanHalf;
464 else if (moreSignificant == lfExactlyHalf)
465 moreSignificant = lfMoreThanHalf;
468 return moreSignificant;
471 /* The error from the true value, in half-ulps, on multiplying two
472 floating point numbers, which differ from the value they
473 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
474 than the returned value.
476 See "How to Read Floating Point Numbers Accurately" by William D
479 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
481 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
483 if (HUerr1 + HUerr2 == 0)
484 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
486 return inexactMultiply + 2 * (HUerr1 + HUerr2);
489 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
490 when the least significant BITS are truncated. BITS cannot be
493 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
495 unsigned int count, partBits;
496 integerPart part, boundary;
501 count = bits / integerPartWidth;
502 partBits = bits % integerPartWidth + 1;
504 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
507 boundary = (integerPart) 1 << (partBits - 1);
512 if (part - boundary <= boundary - part)
513 return part - boundary;
515 return boundary - part;
518 if (part == boundary) {
521 return ~(integerPart) 0; /* A lot. */
524 } else if (part == boundary - 1) {
527 return ~(integerPart) 0; /* A lot. */
532 return ~(integerPart) 0; /* A lot. */
535 /* Place pow(5, power) in DST, and return the number of parts used.
536 DST must be at least one part larger than size of the answer. */
538 powerOf5(integerPart *dst, unsigned int power)
540 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
542 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
543 pow5s[0] = 78125 * 5;
545 unsigned int partsCount[16] = { 1 };
546 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
548 assert(power <= maxExponent);
553 *p1 = firstEightPowers[power & 7];
559 for (unsigned int n = 0; power; power >>= 1, n++) {
564 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
566 pc = partsCount[n - 1];
567 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
569 if (pow5[pc - 1] == 0)
577 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
579 if (p2[result - 1] == 0)
582 /* Now result is in p1 with partsCount parts and p2 is scratch
593 APInt::tcAssign(dst, p1, result);
598 /* Zero at the end to avoid modular arithmetic when adding one; used
599 when rounding up during hexadecimal output. */
600 static const char hexDigitsLower[] = "0123456789abcdef0";
601 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
602 static const char infinityL[] = "infinity";
603 static const char infinityU[] = "INFINITY";
604 static const char NaNL[] = "nan";
605 static const char NaNU[] = "NAN";
607 /* Write out an integerPart in hexadecimal, starting with the most
608 significant nibble. Write out exactly COUNT hexdigits, return
611 partAsHex (char *dst, integerPart part, unsigned int count,
612 const char *hexDigitChars)
614 unsigned int result = count;
616 assert(count != 0 && count <= integerPartWidth / 4);
618 part >>= (integerPartWidth - 4 * count);
620 dst[count] = hexDigitChars[part & 0xf];
627 /* Write out an unsigned decimal integer. */
629 writeUnsignedDecimal (char *dst, unsigned int n)
645 /* Write out a signed decimal integer. */
647 writeSignedDecimal (char *dst, int value)
651 dst = writeUnsignedDecimal(dst, -(unsigned) value);
653 dst = writeUnsignedDecimal(dst, value);
660 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
663 semantics = ourSemantics;
666 significand.parts = new integerPart[count];
669 void IEEEFloat::freeSignificand() {
671 delete [] significand.parts;
674 void IEEEFloat::assign(const IEEEFloat &rhs) {
675 assert(semantics == rhs.semantics);
678 category = rhs.category;
679 exponent = rhs.exponent;
680 if (isFiniteNonZero() || category == fcNaN)
681 copySignificand(rhs);
684 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
685 assert(isFiniteNonZero() || category == fcNaN);
686 assert(rhs.partCount() >= partCount());
688 APInt::tcAssign(significandParts(), rhs.significandParts(),
692 /* Make this number a NaN, with an arbitrary but deterministic value
693 for the significand. If double or longer, this is a signalling NaN,
694 which may not be ideal. If float, this is QNaN(0). */
695 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
699 integerPart *significand = significandParts();
700 unsigned numParts = partCount();
702 // Set the significand bits to the fill.
703 if (!fill || fill->getNumWords() < numParts)
704 APInt::tcSet(significand, 0, numParts);
706 APInt::tcAssign(significand, fill->getRawData(),
707 std::min(fill->getNumWords(), numParts));
709 // Zero out the excess bits of the significand.
710 unsigned bitsToPreserve = semantics->precision - 1;
711 unsigned part = bitsToPreserve / 64;
712 bitsToPreserve %= 64;
713 significand[part] &= ((1ULL << bitsToPreserve) - 1);
714 for (part++; part != numParts; ++part)
715 significand[part] = 0;
718 unsigned QNaNBit = semantics->precision - 2;
721 // We always have to clear the QNaN bit to make it an SNaN.
722 APInt::tcClearBit(significand, QNaNBit);
724 // If there are no bits set in the payload, we have to set
725 // *something* to make it a NaN instead of an infinity;
726 // conventionally, this is the next bit down from the QNaN bit.
727 if (APInt::tcIsZero(significand, numParts))
728 APInt::tcSetBit(significand, QNaNBit - 1);
730 // We always have to set the QNaN bit to make it a QNaN.
731 APInt::tcSetBit(significand, QNaNBit);
734 // For x87 extended precision, we want to make a NaN, not a
735 // pseudo-NaN. Maybe we should expose the ability to make
737 if (semantics == &semX87DoubleExtended)
738 APInt::tcSetBit(significand, QNaNBit + 1);
741 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
743 if (semantics != rhs.semantics) {
745 initialize(rhs.semantics);
753 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
756 semantics = rhs.semantics;
757 significand = rhs.significand;
758 exponent = rhs.exponent;
759 category = rhs.category;
762 rhs.semantics = &semBogus;
766 bool IEEEFloat::isDenormal() const {
767 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
768 (APInt::tcExtractBit(significandParts(),
769 semantics->precision - 1) == 0);
772 bool IEEEFloat::isSmallest() const {
773 // The smallest number by magnitude in our format will be the smallest
774 // denormal, i.e. the floating point number with exponent being minimum
775 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
776 return isFiniteNonZero() && exponent == semantics->minExponent &&
777 significandMSB() == 0;
780 bool IEEEFloat::isSignificandAllOnes() const {
781 // Test if the significand excluding the integral bit is all ones. This allows
782 // us to test for binade boundaries.
783 const integerPart *Parts = significandParts();
784 const unsigned PartCount = partCount();
785 for (unsigned i = 0; i < PartCount - 1; i++)
789 // Set the unused high bits to all ones when we compare.
790 const unsigned NumHighBits =
791 PartCount*integerPartWidth - semantics->precision + 1;
792 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
793 "fill than integerPartWidth");
794 const integerPart HighBitFill =
795 ~integerPart(0) << (integerPartWidth - NumHighBits);
796 if (~(Parts[PartCount - 1] | HighBitFill))
802 bool IEEEFloat::isSignificandAllZeros() const {
803 // Test if the significand excluding the integral bit is all zeros. This
804 // allows us to test for binade boundaries.
805 const integerPart *Parts = significandParts();
806 const unsigned PartCount = partCount();
808 for (unsigned i = 0; i < PartCount - 1; i++)
812 const unsigned NumHighBits =
813 PartCount*integerPartWidth - semantics->precision + 1;
814 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
815 "clear than integerPartWidth");
816 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
818 if (Parts[PartCount - 1] & HighBitMask)
824 bool IEEEFloat::isLargest() const {
825 // The largest number by magnitude in our format will be the floating point
826 // number with maximum exponent and with significand that is all ones.
827 return isFiniteNonZero() && exponent == semantics->maxExponent
828 && isSignificandAllOnes();
831 bool IEEEFloat::isInteger() const {
832 // This could be made more efficient; I'm going for obviously correct.
833 if (!isFinite()) return false;
834 IEEEFloat truncated = *this;
835 truncated.roundToIntegral(rmTowardZero);
836 return compare(truncated) == cmpEqual;
839 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
842 if (semantics != rhs.semantics ||
843 category != rhs.category ||
846 if (category==fcZero || category==fcInfinity)
849 if (isFiniteNonZero() && exponent != rhs.exponent)
852 return std::equal(significandParts(), significandParts() + partCount(),
853 rhs.significandParts());
856 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
857 initialize(&ourSemantics);
861 exponent = ourSemantics.precision - 1;
862 significandParts()[0] = value;
863 normalize(rmNearestTiesToEven, lfExactlyZero);
866 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
867 initialize(&ourSemantics);
872 // Delegate to the previous constructor, because later copy constructor may
873 // actually inspects category, which can't be garbage.
874 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
875 : IEEEFloat(ourSemantics) {}
877 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
878 initialize(rhs.semantics);
882 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
883 *this = std::move(rhs);
886 IEEEFloat::~IEEEFloat() { freeSignificand(); }
888 unsigned int IEEEFloat::partCount() const {
889 return partCountForBits(semantics->precision + 1);
892 const integerPart *IEEEFloat::significandParts() const {
893 return const_cast<IEEEFloat *>(this)->significandParts();
896 integerPart *IEEEFloat::significandParts() {
898 return significand.parts;
900 return &significand.part;
903 void IEEEFloat::zeroSignificand() {
904 APInt::tcSet(significandParts(), 0, partCount());
907 /* Increment an fcNormal floating point number's significand. */
908 void IEEEFloat::incrementSignificand() {
911 carry = APInt::tcIncrement(significandParts(), partCount());
913 /* Our callers should never cause us to overflow. */
918 /* Add the significand of the RHS. Returns the carry flag. */
919 integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
922 parts = significandParts();
924 assert(semantics == rhs.semantics);
925 assert(exponent == rhs.exponent);
927 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
930 /* Subtract the significand of the RHS with a borrow flag. Returns
932 integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
933 integerPart borrow) {
936 parts = significandParts();
938 assert(semantics == rhs.semantics);
939 assert(exponent == rhs.exponent);
941 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
945 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
946 on to the full-precision result of the multiplication. Returns the
948 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
949 const IEEEFloat *addend) {
950 unsigned int omsb; // One, not zero, based MSB.
951 unsigned int partsCount, newPartsCount, precision;
952 integerPart *lhsSignificand;
953 integerPart scratch[4];
954 integerPart *fullSignificand;
955 lostFraction lost_fraction;
958 assert(semantics == rhs.semantics);
960 precision = semantics->precision;
962 // Allocate space for twice as many bits as the original significand, plus one
963 // extra bit for the addition to overflow into.
964 newPartsCount = partCountForBits(precision * 2 + 1);
966 if (newPartsCount > 4)
967 fullSignificand = new integerPart[newPartsCount];
969 fullSignificand = scratch;
971 lhsSignificand = significandParts();
972 partsCount = partCount();
974 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
975 rhs.significandParts(), partsCount, partsCount);
977 lost_fraction = lfExactlyZero;
978 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
979 exponent += rhs.exponent;
981 // Assume the operands involved in the multiplication are single-precision
982 // FP, and the two multiplicants are:
983 // *this = a23 . a22 ... a0 * 2^e1
984 // rhs = b23 . b22 ... b0 * 2^e2
985 // the result of multiplication is:
986 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
987 // Note that there are three significant bits at the left-hand side of the
988 // radix point: two for the multiplication, and an overflow bit for the
989 // addition (that will always be zero at this point). Move the radix point
990 // toward left by two bits, and adjust exponent accordingly.
993 if (addend && addend->isNonZero()) {
994 // The intermediate result of the multiplication has "2 * precision"
995 // signicant bit; adjust the addend to be consistent with mul result.
997 Significand savedSignificand = significand;
998 const fltSemantics *savedSemantics = semantics;
999 fltSemantics extendedSemantics;
1001 unsigned int extendedPrecision;
1003 // Normalize our MSB to one below the top bit to allow for overflow.
1004 extendedPrecision = 2 * precision + 1;
1005 if (omsb != extendedPrecision - 1) {
1006 assert(extendedPrecision > omsb);
1007 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1008 (extendedPrecision - 1) - omsb);
1009 exponent -= (extendedPrecision - 1) - omsb;
1012 /* Create new semantics. */
1013 extendedSemantics = *semantics;
1014 extendedSemantics.precision = extendedPrecision;
1016 if (newPartsCount == 1)
1017 significand.part = fullSignificand[0];
1019 significand.parts = fullSignificand;
1020 semantics = &extendedSemantics;
1022 IEEEFloat extendedAddend(*addend);
1023 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1024 assert(status == opOK);
1027 // Shift the significand of the addend right by one bit. This guarantees
1028 // that the high bit of the significand is zero (same as fullSignificand),
1029 // so the addition will overflow (if it does overflow at all) into the top bit.
1030 lost_fraction = extendedAddend.shiftSignificandRight(1);
1031 assert(lost_fraction == lfExactlyZero &&
1032 "Lost precision while shifting addend for fused-multiply-add.");
1034 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1036 /* Restore our state. */
1037 if (newPartsCount == 1)
1038 fullSignificand[0] = significand.part;
1039 significand = savedSignificand;
1040 semantics = savedSemantics;
1042 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1045 // Convert the result having "2 * precision" significant-bits back to the one
1046 // having "precision" significant-bits. First, move the radix point from
1047 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1048 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1049 exponent -= precision + 1;
1051 // In case MSB resides at the left-hand side of radix point, shift the
1052 // mantissa right by some amount to make sure the MSB reside right before
1053 // the radix point (i.e. "MSB . rest-significant-bits").
1055 // Note that the result is not normalized when "omsb < precision". So, the
1056 // caller needs to call IEEEFloat::normalize() if normalized value is
1058 if (omsb > precision) {
1059 unsigned int bits, significantParts;
1062 bits = omsb - precision;
1063 significantParts = partCountForBits(omsb);
1064 lf = shiftRight(fullSignificand, significantParts, bits);
1065 lost_fraction = combineLostFractions(lf, lost_fraction);
1069 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1071 if (newPartsCount > 4)
1072 delete [] fullSignificand;
1074 return lost_fraction;
1077 /* Multiply the significands of LHS and RHS to DST. */
1078 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1079 unsigned int bit, i, partsCount;
1080 const integerPart *rhsSignificand;
1081 integerPart *lhsSignificand, *dividend, *divisor;
1082 integerPart scratch[4];
1083 lostFraction lost_fraction;
1085 assert(semantics == rhs.semantics);
1087 lhsSignificand = significandParts();
1088 rhsSignificand = rhs.significandParts();
1089 partsCount = partCount();
1092 dividend = new integerPart[partsCount * 2];
1096 divisor = dividend + partsCount;
1098 /* Copy the dividend and divisor as they will be modified in-place. */
1099 for (i = 0; i < partsCount; i++) {
1100 dividend[i] = lhsSignificand[i];
1101 divisor[i] = rhsSignificand[i];
1102 lhsSignificand[i] = 0;
1105 exponent -= rhs.exponent;
1107 unsigned int precision = semantics->precision;
1109 /* Normalize the divisor. */
1110 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1113 APInt::tcShiftLeft(divisor, partsCount, bit);
1116 /* Normalize the dividend. */
1117 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1120 APInt::tcShiftLeft(dividend, partsCount, bit);
1123 /* Ensure the dividend >= divisor initially for the loop below.
1124 Incidentally, this means that the division loop below is
1125 guaranteed to set the integer bit to one. */
1126 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1128 APInt::tcShiftLeft(dividend, partsCount, 1);
1129 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1132 /* Long division. */
1133 for (bit = precision; bit; bit -= 1) {
1134 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1135 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1136 APInt::tcSetBit(lhsSignificand, bit - 1);
1139 APInt::tcShiftLeft(dividend, partsCount, 1);
1142 /* Figure out the lost fraction. */
1143 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1146 lost_fraction = lfMoreThanHalf;
1148 lost_fraction = lfExactlyHalf;
1149 else if (APInt::tcIsZero(dividend, partsCount))
1150 lost_fraction = lfExactlyZero;
1152 lost_fraction = lfLessThanHalf;
1157 return lost_fraction;
1160 unsigned int IEEEFloat::significandMSB() const {
1161 return APInt::tcMSB(significandParts(), partCount());
1164 unsigned int IEEEFloat::significandLSB() const {
1165 return APInt::tcLSB(significandParts(), partCount());
1168 /* Note that a zero result is NOT normalized to fcZero. */
1169 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1170 /* Our exponent should not overflow. */
1171 assert((ExponentType) (exponent + bits) >= exponent);
1175 return shiftRight(significandParts(), partCount(), bits);
1178 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1179 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1180 assert(bits < semantics->precision);
1183 unsigned int partsCount = partCount();
1185 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1188 assert(!APInt::tcIsZero(significandParts(), partsCount));
1192 IEEEFloat::cmpResult
1193 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1196 assert(semantics == rhs.semantics);
1197 assert(isFiniteNonZero());
1198 assert(rhs.isFiniteNonZero());
1200 compare = exponent - rhs.exponent;
1202 /* If exponents are equal, do an unsigned bignum comparison of the
1205 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1209 return cmpGreaterThan;
1210 else if (compare < 0)
1216 /* Handle overflow. Sign is preserved. We either become infinity or
1217 the largest finite number. */
1218 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1220 if (rounding_mode == rmNearestTiesToEven ||
1221 rounding_mode == rmNearestTiesToAway ||
1222 (rounding_mode == rmTowardPositive && !sign) ||
1223 (rounding_mode == rmTowardNegative && sign)) {
1224 category = fcInfinity;
1225 return (opStatus) (opOverflow | opInexact);
1228 /* Otherwise we become the largest finite number. */
1229 category = fcNormal;
1230 exponent = semantics->maxExponent;
1231 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1232 semantics->precision);
1237 /* Returns TRUE if, when truncating the current number, with BIT the
1238 new LSB, with the given lost fraction and rounding mode, the result
1239 would need to be rounded away from zero (i.e., by increasing the
1240 signficand). This routine must work for fcZero of both signs, and
1241 fcNormal numbers. */
1242 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1243 lostFraction lost_fraction,
1244 unsigned int bit) const {
1245 /* NaNs and infinities should not have lost fractions. */
1246 assert(isFiniteNonZero() || category == fcZero);
1248 /* Current callers never pass this so we don't handle it. */
1249 assert(lost_fraction != lfExactlyZero);
1251 switch (rounding_mode) {
1252 case rmNearestTiesToAway:
1253 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1255 case rmNearestTiesToEven:
1256 if (lost_fraction == lfMoreThanHalf)
1259 /* Our zeroes don't have a significand to test. */
1260 if (lost_fraction == lfExactlyHalf && category != fcZero)
1261 return APInt::tcExtractBit(significandParts(), bit);
1268 case rmTowardPositive:
1271 case rmTowardNegative:
1274 llvm_unreachable("Invalid rounding mode found");
1277 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1278 lostFraction lost_fraction) {
1279 unsigned int omsb; /* One, not zero, based MSB. */
1282 if (!isFiniteNonZero())
1285 /* Before rounding normalize the exponent of fcNormal numbers. */
1286 omsb = significandMSB() + 1;
1289 /* OMSB is numbered from 1. We want to place it in the integer
1290 bit numbered PRECISION if possible, with a compensating change in
1292 exponentChange = omsb - semantics->precision;
1294 /* If the resulting exponent is too high, overflow according to
1295 the rounding mode. */
1296 if (exponent + exponentChange > semantics->maxExponent)
1297 return handleOverflow(rounding_mode);
1299 /* Subnormal numbers have exponent minExponent, and their MSB
1300 is forced based on that. */
1301 if (exponent + exponentChange < semantics->minExponent)
1302 exponentChange = semantics->minExponent - exponent;
1304 /* Shifting left is easy as we don't lose precision. */
1305 if (exponentChange < 0) {
1306 assert(lost_fraction == lfExactlyZero);
1308 shiftSignificandLeft(-exponentChange);
1313 if (exponentChange > 0) {
1316 /* Shift right and capture any new lost fraction. */
1317 lf = shiftSignificandRight(exponentChange);
1319 lost_fraction = combineLostFractions(lf, lost_fraction);
1321 /* Keep OMSB up-to-date. */
1322 if (omsb > (unsigned) exponentChange)
1323 omsb -= exponentChange;
1329 /* Now round the number according to rounding_mode given the lost
1332 /* As specified in IEEE 754, since we do not trap we do not report
1333 underflow for exact results. */
1334 if (lost_fraction == lfExactlyZero) {
1335 /* Canonicalize zeroes. */
1342 /* Increment the significand if we're rounding away from zero. */
1343 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1345 exponent = semantics->minExponent;
1347 incrementSignificand();
1348 omsb = significandMSB() + 1;
1350 /* Did the significand increment overflow? */
1351 if (omsb == (unsigned) semantics->precision + 1) {
1352 /* Renormalize by incrementing the exponent and shifting our
1353 significand right one. However if we already have the
1354 maximum exponent we overflow to infinity. */
1355 if (exponent == semantics->maxExponent) {
1356 category = fcInfinity;
1358 return (opStatus) (opOverflow | opInexact);
1361 shiftSignificandRight(1);
1367 /* The normal case - we were and are not denormal, and any
1368 significand increment above didn't overflow. */
1369 if (omsb == semantics->precision)
1372 /* We have a non-zero denormal. */
1373 assert(omsb < semantics->precision);
1375 /* Canonicalize zeroes. */
1379 /* The fcZero case is a denormal that underflowed to zero. */
1380 return (opStatus) (opUnderflow | opInexact);
1383 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1385 switch (PackCategoriesIntoKey(category, rhs.category)) {
1387 llvm_unreachable(nullptr);
1389 case PackCategoriesIntoKey(fcNaN, fcZero):
1390 case PackCategoriesIntoKey(fcNaN, fcNormal):
1391 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1392 case PackCategoriesIntoKey(fcNaN, fcNaN):
1393 case PackCategoriesIntoKey(fcNormal, fcZero):
1394 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1395 case PackCategoriesIntoKey(fcInfinity, fcZero):
1398 case PackCategoriesIntoKey(fcZero, fcNaN):
1399 case PackCategoriesIntoKey(fcNormal, fcNaN):
1400 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1401 // We need to be sure to flip the sign here for subtraction because we
1402 // don't have a separate negate operation so -NaN becomes 0 - NaN here.
1403 sign = rhs.sign ^ subtract;
1405 copySignificand(rhs);
1408 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1409 case PackCategoriesIntoKey(fcZero, fcInfinity):
1410 category = fcInfinity;
1411 sign = rhs.sign ^ subtract;
1414 case PackCategoriesIntoKey(fcZero, fcNormal):
1416 sign = rhs.sign ^ subtract;
1419 case PackCategoriesIntoKey(fcZero, fcZero):
1420 /* Sign depends on rounding mode; handled by caller. */
1423 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1424 /* Differently signed infinities can only be validly
1426 if (((sign ^ rhs.sign)!=0) != subtract) {
1433 case PackCategoriesIntoKey(fcNormal, fcNormal):
1438 /* Add or subtract two normal numbers. */
1439 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1442 lostFraction lost_fraction;
1445 /* Determine if the operation on the absolute values is effectively
1446 an addition or subtraction. */
1447 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1449 /* Are we bigger exponent-wise than the RHS? */
1450 bits = exponent - rhs.exponent;
1452 /* Subtraction is more subtle than one might naively expect. */
1454 IEEEFloat temp_rhs(rhs);
1458 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1459 lost_fraction = lfExactlyZero;
1460 } else if (bits > 0) {
1461 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1462 shiftSignificandLeft(1);
1465 lost_fraction = shiftSignificandRight(-bits - 1);
1466 temp_rhs.shiftSignificandLeft(1);
1471 carry = temp_rhs.subtractSignificand
1472 (*this, lost_fraction != lfExactlyZero);
1473 copySignificand(temp_rhs);
1476 carry = subtractSignificand
1477 (temp_rhs, lost_fraction != lfExactlyZero);
1480 /* Invert the lost fraction - it was on the RHS and
1482 if (lost_fraction == lfLessThanHalf)
1483 lost_fraction = lfMoreThanHalf;
1484 else if (lost_fraction == lfMoreThanHalf)
1485 lost_fraction = lfLessThanHalf;
1487 /* The code above is intended to ensure that no borrow is
1493 IEEEFloat temp_rhs(rhs);
1495 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1496 carry = addSignificand(temp_rhs);
1498 lost_fraction = shiftSignificandRight(-bits);
1499 carry = addSignificand(rhs);
1502 /* We have a guard bit; generating a carry cannot happen. */
1507 return lost_fraction;
1510 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1511 switch (PackCategoriesIntoKey(category, rhs.category)) {
1513 llvm_unreachable(nullptr);
1515 case PackCategoriesIntoKey(fcNaN, fcZero):
1516 case PackCategoriesIntoKey(fcNaN, fcNormal):
1517 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1518 case PackCategoriesIntoKey(fcNaN, fcNaN):
1522 case PackCategoriesIntoKey(fcZero, fcNaN):
1523 case PackCategoriesIntoKey(fcNormal, fcNaN):
1524 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1527 copySignificand(rhs);
1530 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1531 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1532 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1533 category = fcInfinity;
1536 case PackCategoriesIntoKey(fcZero, fcNormal):
1537 case PackCategoriesIntoKey(fcNormal, fcZero):
1538 case PackCategoriesIntoKey(fcZero, fcZero):
1542 case PackCategoriesIntoKey(fcZero, fcInfinity):
1543 case PackCategoriesIntoKey(fcInfinity, fcZero):
1547 case PackCategoriesIntoKey(fcNormal, fcNormal):
1552 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1553 switch (PackCategoriesIntoKey(category, rhs.category)) {
1555 llvm_unreachable(nullptr);
1557 case PackCategoriesIntoKey(fcZero, fcNaN):
1558 case PackCategoriesIntoKey(fcNormal, fcNaN):
1559 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1561 copySignificand(rhs);
1563 case PackCategoriesIntoKey(fcNaN, fcZero):
1564 case PackCategoriesIntoKey(fcNaN, fcNormal):
1565 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1566 case PackCategoriesIntoKey(fcNaN, fcNaN):
1569 case PackCategoriesIntoKey(fcInfinity, fcZero):
1570 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1571 case PackCategoriesIntoKey(fcZero, fcInfinity):
1572 case PackCategoriesIntoKey(fcZero, fcNormal):
1575 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1579 case PackCategoriesIntoKey(fcNormal, fcZero):
1580 category = fcInfinity;
1583 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1584 case PackCategoriesIntoKey(fcZero, fcZero):
1588 case PackCategoriesIntoKey(fcNormal, fcNormal):
1593 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1594 switch (PackCategoriesIntoKey(category, rhs.category)) {
1596 llvm_unreachable(nullptr);
1598 case PackCategoriesIntoKey(fcNaN, fcZero):
1599 case PackCategoriesIntoKey(fcNaN, fcNormal):
1600 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1601 case PackCategoriesIntoKey(fcNaN, fcNaN):
1602 case PackCategoriesIntoKey(fcZero, fcInfinity):
1603 case PackCategoriesIntoKey(fcZero, fcNormal):
1604 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1607 case PackCategoriesIntoKey(fcZero, fcNaN):
1608 case PackCategoriesIntoKey(fcNormal, fcNaN):
1609 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1612 copySignificand(rhs);
1615 case PackCategoriesIntoKey(fcNormal, fcZero):
1616 case PackCategoriesIntoKey(fcInfinity, fcZero):
1617 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1618 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1619 case PackCategoriesIntoKey(fcZero, fcZero):
1623 case PackCategoriesIntoKey(fcNormal, fcNormal):
1629 void IEEEFloat::changeSign() {
1630 /* Look mummy, this one's easy. */
1634 /* Normalized addition or subtraction. */
1635 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
1636 roundingMode rounding_mode,
1640 fs = addOrSubtractSpecials(rhs, subtract);
1642 /* This return code means it was not a simple case. */
1643 if (fs == opDivByZero) {
1644 lostFraction lost_fraction;
1646 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1647 fs = normalize(rounding_mode, lost_fraction);
1649 /* Can only be zero if we lost no fraction. */
1650 assert(category != fcZero || lost_fraction == lfExactlyZero);
1653 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1654 positive zero unless rounding to minus infinity, except that
1655 adding two like-signed zeroes gives that zero. */
1656 if (category == fcZero) {
1657 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1658 sign = (rounding_mode == rmTowardNegative);
1664 /* Normalized addition. */
1665 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
1666 roundingMode rounding_mode) {
1667 return addOrSubtract(rhs, rounding_mode, false);
1670 /* Normalized subtraction. */
1671 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
1672 roundingMode rounding_mode) {
1673 return addOrSubtract(rhs, rounding_mode, true);
1676 /* Normalized multiply. */
1677 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
1678 roundingMode rounding_mode) {
1682 fs = multiplySpecials(rhs);
1684 if (isFiniteNonZero()) {
1685 lostFraction lost_fraction = multiplySignificand(rhs, nullptr);
1686 fs = normalize(rounding_mode, lost_fraction);
1687 if (lost_fraction != lfExactlyZero)
1688 fs = (opStatus) (fs | opInexact);
1694 /* Normalized divide. */
1695 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
1696 roundingMode rounding_mode) {
1700 fs = divideSpecials(rhs);
1702 if (isFiniteNonZero()) {
1703 lostFraction lost_fraction = divideSignificand(rhs);
1704 fs = normalize(rounding_mode, lost_fraction);
1705 if (lost_fraction != lfExactlyZero)
1706 fs = (opStatus) (fs | opInexact);
1712 /* Normalized remainder. This is not currently correct in all cases. */
1713 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
1715 IEEEFloat V = *this;
1716 unsigned int origSign = sign;
1718 fs = V.divide(rhs, rmNearestTiesToEven);
1719 if (fs == opDivByZero)
1722 int parts = partCount();
1723 integerPart *x = new integerPart[parts];
1725 fs = V.convertToInteger(makeMutableArrayRef(x, parts),
1726 parts * integerPartWidth, true, rmNearestTiesToEven,
1728 if (fs == opInvalidOp) {
1733 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1734 rmNearestTiesToEven);
1735 assert(fs==opOK); // should always work
1737 fs = V.multiply(rhs, rmNearestTiesToEven);
1738 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1740 fs = subtract(V, rmNearestTiesToEven);
1741 assert(fs==opOK || fs==opInexact); // likewise
1744 sign = origSign; // IEEE754 requires this
1749 /* Normalized llvm frem (C fmod). */
1750 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
1752 fs = modSpecials(rhs);
1754 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
1755 compareAbsoluteValue(rhs) != cmpLessThan) {
1756 IEEEFloat V = scalbn(rhs, ilogb(*this) - ilogb(rhs), rmNearestTiesToEven);
1757 if (compareAbsoluteValue(V) == cmpLessThan)
1758 V = scalbn(V, -1, rmNearestTiesToEven);
1761 fs = subtract(V, rmNearestTiesToEven);
1767 /* Normalized fused-multiply-add. */
1768 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
1769 const IEEEFloat &addend,
1770 roundingMode rounding_mode) {
1773 /* Post-multiplication sign, before addition. */
1774 sign ^= multiplicand.sign;
1776 /* If and only if all arguments are normal do we need to do an
1777 extended-precision calculation. */
1778 if (isFiniteNonZero() &&
1779 multiplicand.isFiniteNonZero() &&
1780 addend.isFinite()) {
1781 lostFraction lost_fraction;
1783 lost_fraction = multiplySignificand(multiplicand, &addend);
1784 fs = normalize(rounding_mode, lost_fraction);
1785 if (lost_fraction != lfExactlyZero)
1786 fs = (opStatus) (fs | opInexact);
1788 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1789 positive zero unless rounding to minus infinity, except that
1790 adding two like-signed zeroes gives that zero. */
1791 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
1792 sign = (rounding_mode == rmTowardNegative);
1794 fs = multiplySpecials(multiplicand);
1796 /* FS can only be opOK or opInvalidOp. There is no more work
1797 to do in the latter case. The IEEE-754R standard says it is
1798 implementation-defined in this case whether, if ADDEND is a
1799 quiet NaN, we raise invalid op; this implementation does so.
1801 If we need to do the addition we can do so with normal
1804 fs = addOrSubtract(addend, rounding_mode, false);
1810 /* Rounding-mode corrrect round to integral value. */
1811 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
1814 // If the exponent is large enough, we know that this value is already
1815 // integral, and the arithmetic below would potentially cause it to saturate
1816 // to +/-Inf. Bail out early instead.
1817 if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics))
1820 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1821 // precision of our format, and then subtract it back off again. The choice
1822 // of rounding modes for the addition/subtraction determines the rounding mode
1823 // for our integral rounding as well.
1824 // NOTE: When the input value is negative, we do subtraction followed by
1825 // addition instead.
1826 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1827 IntegerConstant <<= semanticsPrecision(*semantics)-1;
1828 IEEEFloat MagicConstant(*semantics);
1829 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1830 rmNearestTiesToEven);
1831 MagicConstant.sign = sign;
1836 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1837 bool inputSign = isNegative();
1839 fs = add(MagicConstant, rounding_mode);
1840 if (fs != opOK && fs != opInexact)
1843 fs = subtract(MagicConstant, rounding_mode);
1845 // Restore the input sign.
1846 if (inputSign != isNegative())
1853 /* Comparison requires normalized numbers. */
1854 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
1857 assert(semantics == rhs.semantics);
1859 switch (PackCategoriesIntoKey(category, rhs.category)) {
1861 llvm_unreachable(nullptr);
1863 case PackCategoriesIntoKey(fcNaN, fcZero):
1864 case PackCategoriesIntoKey(fcNaN, fcNormal):
1865 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1866 case PackCategoriesIntoKey(fcNaN, fcNaN):
1867 case PackCategoriesIntoKey(fcZero, fcNaN):
1868 case PackCategoriesIntoKey(fcNormal, fcNaN):
1869 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1870 return cmpUnordered;
1872 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1873 case PackCategoriesIntoKey(fcInfinity, fcZero):
1874 case PackCategoriesIntoKey(fcNormal, fcZero):
1878 return cmpGreaterThan;
1880 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1881 case PackCategoriesIntoKey(fcZero, fcInfinity):
1882 case PackCategoriesIntoKey(fcZero, fcNormal):
1884 return cmpGreaterThan;
1888 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1889 if (sign == rhs.sign)
1894 return cmpGreaterThan;
1896 case PackCategoriesIntoKey(fcZero, fcZero):
1899 case PackCategoriesIntoKey(fcNormal, fcNormal):
1903 /* Two normal numbers. Do they have the same sign? */
1904 if (sign != rhs.sign) {
1906 result = cmpLessThan;
1908 result = cmpGreaterThan;
1910 /* Compare absolute values; invert result if negative. */
1911 result = compareAbsoluteValue(rhs);
1914 if (result == cmpLessThan)
1915 result = cmpGreaterThan;
1916 else if (result == cmpGreaterThan)
1917 result = cmpLessThan;
1924 /// IEEEFloat::convert - convert a value of one floating point type to another.
1925 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1926 /// records whether the transformation lost information, i.e. whether
1927 /// converting the result back to the original type will produce the
1928 /// original value (this is almost the same as return value==fsOK, but there
1929 /// are edge cases where this is not so).
1931 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
1932 roundingMode rounding_mode,
1934 lostFraction lostFraction;
1935 unsigned int newPartCount, oldPartCount;
1938 const fltSemantics &fromSemantics = *semantics;
1940 lostFraction = lfExactlyZero;
1941 newPartCount = partCountForBits(toSemantics.precision + 1);
1942 oldPartCount = partCount();
1943 shift = toSemantics.precision - fromSemantics.precision;
1945 bool X86SpecialNan = false;
1946 if (&fromSemantics == &semX87DoubleExtended &&
1947 &toSemantics != &semX87DoubleExtended && category == fcNaN &&
1948 (!(*significandParts() & 0x8000000000000000ULL) ||
1949 !(*significandParts() & 0x4000000000000000ULL))) {
1950 // x86 has some unusual NaNs which cannot be represented in any other
1951 // format; note them here.
1952 X86SpecialNan = true;
1955 // If this is a truncation of a denormal number, and the target semantics
1956 // has larger exponent range than the source semantics (this can happen
1957 // when truncating from PowerPC double-double to double format), the
1958 // right shift could lose result mantissa bits. Adjust exponent instead
1959 // of performing excessive shift.
1960 if (shift < 0 && isFiniteNonZero()) {
1961 int exponentChange = significandMSB() + 1 - fromSemantics.precision;
1962 if (exponent + exponentChange < toSemantics.minExponent)
1963 exponentChange = toSemantics.minExponent - exponent;
1964 if (exponentChange < shift)
1965 exponentChange = shift;
1966 if (exponentChange < 0) {
1967 shift -= exponentChange;
1968 exponent += exponentChange;
1972 // If this is a truncation, perform the shift before we narrow the storage.
1973 if (shift < 0 && (isFiniteNonZero() || category==fcNaN))
1974 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
1976 // Fix the storage so it can hold to new value.
1977 if (newPartCount > oldPartCount) {
1978 // The new type requires more storage; make it available.
1979 integerPart *newParts;
1980 newParts = new integerPart[newPartCount];
1981 APInt::tcSet(newParts, 0, newPartCount);
1982 if (isFiniteNonZero() || category==fcNaN)
1983 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1985 significand.parts = newParts;
1986 } else if (newPartCount == 1 && oldPartCount != 1) {
1987 // Switch to built-in storage for a single part.
1988 integerPart newPart = 0;
1989 if (isFiniteNonZero() || category==fcNaN)
1990 newPart = significandParts()[0];
1992 significand.part = newPart;
1995 // Now that we have the right storage, switch the semantics.
1996 semantics = &toSemantics;
1998 // If this is an extension, perform the shift now that the storage is
2000 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2001 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2003 if (isFiniteNonZero()) {
2004 fs = normalize(rounding_mode, lostFraction);
2005 *losesInfo = (fs != opOK);
2006 } else if (category == fcNaN) {
2007 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2009 // For x87 extended precision, we want to make a NaN, not a special NaN if
2010 // the input wasn't special either.
2011 if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2012 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2014 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
2015 // does not give you back the same bits. This is dubious, and we
2016 // don't currently do it. You're really supposed to get
2017 // an invalid operation signal at runtime, but nobody does that.
2027 /* Convert a floating point number to an integer according to the
2028 rounding mode. If the rounded integer value is out of range this
2029 returns an invalid operation exception and the contents of the
2030 destination parts are unspecified. If the rounded value is in
2031 range but the floating point number is not the exact integer, the C
2032 standard doesn't require an inexact exception to be raised. IEEE
2033 854 does require it so we do that.
2035 Note that for conversions to integer type the C standard requires
2036 round-to-zero to always be used. */
2037 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2038 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2039 roundingMode rounding_mode, bool *isExact) const {
2040 lostFraction lost_fraction;
2041 const integerPart *src;
2042 unsigned int dstPartsCount, truncatedBits;
2046 /* Handle the three special cases first. */
2047 if (category == fcInfinity || category == fcNaN)
2050 dstPartsCount = partCountForBits(width);
2051 assert(dstPartsCount <= parts.size() && "Integer too big");
2053 if (category == fcZero) {
2054 APInt::tcSet(parts.data(), 0, dstPartsCount);
2055 // Negative zero can't be represented as an int.
2060 src = significandParts();
2062 /* Step 1: place our absolute value, with any fraction truncated, in
2065 /* Our absolute value is less than one; truncate everything. */
2066 APInt::tcSet(parts.data(), 0, dstPartsCount);
2067 /* For exponent -1 the integer bit represents .5, look at that.
2068 For smaller exponents leftmost truncated bit is 0. */
2069 truncatedBits = semantics->precision -1U - exponent;
2071 /* We want the most significant (exponent + 1) bits; the rest are
2073 unsigned int bits = exponent + 1U;
2075 /* Hopelessly large in magnitude? */
2079 if (bits < semantics->precision) {
2080 /* We truncate (semantics->precision - bits) bits. */
2081 truncatedBits = semantics->precision - bits;
2082 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2084 /* We want at least as many bits as are available. */
2085 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2087 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2088 bits - semantics->precision);
2093 /* Step 2: work out any lost fraction, and increment the absolute
2094 value if we would round away from zero. */
2095 if (truncatedBits) {
2096 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2098 if (lost_fraction != lfExactlyZero &&
2099 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2100 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2101 return opInvalidOp; /* Overflow. */
2104 lost_fraction = lfExactlyZero;
2107 /* Step 3: check if we fit in the destination. */
2108 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2112 /* Negative numbers cannot be represented as unsigned. */
2116 /* It takes omsb bits to represent the unsigned integer value.
2117 We lose a bit for the sign, but care is needed as the
2118 maximally negative integer is a special case. */
2119 if (omsb == width &&
2120 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2123 /* This case can happen because of rounding. */
2128 APInt::tcNegate (parts.data(), dstPartsCount);
2130 if (omsb >= width + !isSigned)
2134 if (lost_fraction == lfExactlyZero) {
2141 /* Same as convertToSignExtendedInteger, except we provide
2142 deterministic values in case of an invalid operation exception,
2143 namely zero for NaNs and the minimal or maximal value respectively
2144 for underflow or overflow.
2145 The *isExact output tells whether the result is exact, in the sense
2146 that converting it back to the original floating point type produces
2147 the original value. This is almost equivalent to result==opOK,
2148 except for negative zeroes.
2151 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2152 unsigned int width, bool isSigned,
2153 roundingMode rounding_mode, bool *isExact) const {
2156 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2159 if (fs == opInvalidOp) {
2160 unsigned int bits, dstPartsCount;
2162 dstPartsCount = partCountForBits(width);
2163 assert(dstPartsCount <= parts.size() && "Integer too big");
2165 if (category == fcNaN)
2170 bits = width - isSigned;
2172 APInt::tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2173 if (sign && isSigned)
2174 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2180 /* Convert an unsigned integer SRC to a floating point number,
2181 rounding according to ROUNDING_MODE. The sign of the floating
2182 point number is not modified. */
2183 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2184 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2185 unsigned int omsb, precision, dstCount;
2187 lostFraction lost_fraction;
2189 category = fcNormal;
2190 omsb = APInt::tcMSB(src, srcCount) + 1;
2191 dst = significandParts();
2192 dstCount = partCount();
2193 precision = semantics->precision;
2195 /* We want the most significant PRECISION bits of SRC. There may not
2196 be that many; extract what we can. */
2197 if (precision <= omsb) {
2198 exponent = omsb - 1;
2199 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2201 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2203 exponent = precision - 1;
2204 lost_fraction = lfExactlyZero;
2205 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2208 return normalize(rounding_mode, lost_fraction);
2211 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2212 roundingMode rounding_mode) {
2213 unsigned int partCount = Val.getNumWords();
2217 if (isSigned && api.isNegative()) {
2222 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2225 /* Convert a two's complement integer SRC to a floating point number,
2226 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2227 integer is signed, in which case it must be sign-extended. */
2229 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2230 unsigned int srcCount, bool isSigned,
2231 roundingMode rounding_mode) {
2235 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2238 /* If we're signed and negative negate a copy. */
2240 copy = new integerPart[srcCount];
2241 APInt::tcAssign(copy, src, srcCount);
2242 APInt::tcNegate(copy, srcCount);
2243 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2247 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2253 /* FIXME: should this just take a const APInt reference? */
2255 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2256 unsigned int width, bool isSigned,
2257 roundingMode rounding_mode) {
2258 unsigned int partCount = partCountForBits(width);
2259 APInt api = APInt(width, makeArrayRef(parts, partCount));
2262 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2267 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2271 IEEEFloat::convertFromHexadecimalString(StringRef s,
2272 roundingMode rounding_mode) {
2273 lostFraction lost_fraction = lfExactlyZero;
2275 category = fcNormal;
2279 integerPart *significand = significandParts();
2280 unsigned partsCount = partCount();
2281 unsigned bitPos = partsCount * integerPartWidth;
2282 bool computedTrailingFraction = false;
2284 // Skip leading zeroes and any (hexa)decimal point.
2285 StringRef::iterator begin = s.begin();
2286 StringRef::iterator end = s.end();
2287 StringRef::iterator dot;
2288 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2289 StringRef::iterator firstSignificantDigit = p;
2292 integerPart hex_value;
2295 assert(dot == end && "String contains multiple dots");
2300 hex_value = hexDigitValue(*p);
2301 if (hex_value == -1U)
2306 // Store the number while we have space.
2309 hex_value <<= bitPos % integerPartWidth;
2310 significand[bitPos / integerPartWidth] |= hex_value;
2311 } else if (!computedTrailingFraction) {
2312 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2313 computedTrailingFraction = true;
2317 /* Hex floats require an exponent but not a hexadecimal point. */
2318 assert(p != end && "Hex strings require an exponent");
2319 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2320 assert(p != begin && "Significand has no digits");
2321 assert((dot == end || p - begin != 1) && "Significand has no digits");
2323 /* Ignore the exponent if we are zero. */
2324 if (p != firstSignificantDigit) {
2327 /* Implicit hexadecimal point? */
2331 /* Calculate the exponent adjustment implicit in the number of
2332 significant digits. */
2333 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2334 if (expAdjustment < 0)
2336 expAdjustment = expAdjustment * 4 - 1;
2338 /* Adjust for writing the significand starting at the most
2339 significant nibble. */
2340 expAdjustment += semantics->precision;
2341 expAdjustment -= partsCount * integerPartWidth;
2343 /* Adjust for the given exponent. */
2344 exponent = totalExponent(p + 1, end, expAdjustment);
2347 return normalize(rounding_mode, lost_fraction);
2351 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2352 unsigned sigPartCount, int exp,
2353 roundingMode rounding_mode) {
2354 unsigned int parts, pow5PartCount;
2355 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2356 integerPart pow5Parts[maxPowerOfFiveParts];
2359 isNearest = (rounding_mode == rmNearestTiesToEven ||
2360 rounding_mode == rmNearestTiesToAway);
2362 parts = partCountForBits(semantics->precision + 11);
2364 /* Calculate pow(5, abs(exp)). */
2365 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2367 for (;; parts *= 2) {
2368 opStatus sigStatus, powStatus;
2369 unsigned int excessPrecision, truncatedBits;
2371 calcSemantics.precision = parts * integerPartWidth - 1;
2372 excessPrecision = calcSemantics.precision - semantics->precision;
2373 truncatedBits = excessPrecision;
2375 IEEEFloat decSig(calcSemantics, uninitialized);
2376 decSig.makeZero(sign);
2377 IEEEFloat pow5(calcSemantics);
2379 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2380 rmNearestTiesToEven);
2381 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2382 rmNearestTiesToEven);
2383 /* Add exp, as 10^n = 5^n * 2^n. */
2384 decSig.exponent += exp;
2386 lostFraction calcLostFraction;
2387 integerPart HUerr, HUdistance;
2388 unsigned int powHUerr;
2391 /* multiplySignificand leaves the precision-th bit set to 1. */
2392 calcLostFraction = decSig.multiplySignificand(pow5, nullptr);
2393 powHUerr = powStatus != opOK;
2395 calcLostFraction = decSig.divideSignificand(pow5);
2396 /* Denormal numbers have less precision. */
2397 if (decSig.exponent < semantics->minExponent) {
2398 excessPrecision += (semantics->minExponent - decSig.exponent);
2399 truncatedBits = excessPrecision;
2400 if (excessPrecision > calcSemantics.precision)
2401 excessPrecision = calcSemantics.precision;
2403 /* Extra half-ulp lost in reciprocal of exponent. */
2404 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2407 /* Both multiplySignificand and divideSignificand return the
2408 result with the integer bit set. */
2409 assert(APInt::tcExtractBit
2410 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2412 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2414 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2415 excessPrecision, isNearest);
2417 /* Are we guaranteed to round correctly if we truncate? */
2418 if (HUdistance >= HUerr) {
2419 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2420 calcSemantics.precision - excessPrecision,
2422 /* Take the exponent of decSig. If we tcExtract-ed less bits
2423 above we must adjust our exponent to compensate for the
2424 implicit right shift. */
2425 exponent = (decSig.exponent + semantics->precision
2426 - (calcSemantics.precision - excessPrecision));
2427 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2430 return normalize(rounding_mode, calcLostFraction);
2436 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2440 /* Scan the text. */
2441 StringRef::iterator p = str.begin();
2442 interpretDecimal(p, str.end(), &D);
2444 /* Handle the quick cases. First the case of no significant digits,
2445 i.e. zero, and then exponents that are obviously too large or too
2446 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2447 definitely overflows if
2449 (exp - 1) * L >= maxExponent
2451 and definitely underflows to zero where
2453 (exp + 1) * L <= minExponent - precision
2455 With integer arithmetic the tightest bounds for L are
2457 93/28 < L < 196/59 [ numerator <= 256 ]
2458 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2461 // Test if we have a zero number allowing for strings with no null terminators
2462 // and zero decimals with non-zero exponents.
2464 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2465 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2466 // be at most one dot. On the other hand, if we have a zero with a non-zero
2467 // exponent, then we know that D.firstSigDigit will be non-numeric.
2468 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2472 /* Check whether the normalized exponent is high enough to overflow
2473 max during the log-rebasing in the max-exponent check below. */
2474 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2475 fs = handleOverflow(rounding_mode);
2477 /* If it wasn't, then it also wasn't high enough to overflow max
2478 during the log-rebasing in the min-exponent check. Check that it
2479 won't overflow min in either check, then perform the min-exponent
2481 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2482 (D.normalizedExponent + 1) * 28738 <=
2483 8651 * (semantics->minExponent - (int) semantics->precision)) {
2484 /* Underflow to zero and round. */
2485 category = fcNormal;
2487 fs = normalize(rounding_mode, lfLessThanHalf);
2489 /* We can finally safely perform the max-exponent check. */
2490 } else if ((D.normalizedExponent - 1) * 42039
2491 >= 12655 * semantics->maxExponent) {
2492 /* Overflow and round. */
2493 fs = handleOverflow(rounding_mode);
2495 integerPart *decSignificand;
2496 unsigned int partCount;
2498 /* A tight upper bound on number of bits required to hold an
2499 N-digit decimal integer is N * 196 / 59. Allocate enough space
2500 to hold the full significand, and an extra part required by
2502 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2503 partCount = partCountForBits(1 + 196 * partCount / 59);
2504 decSignificand = new integerPart[partCount + 1];
2507 /* Convert to binary efficiently - we do almost all multiplication
2508 in an integerPart. When this would overflow do we do a single
2509 bignum multiplication, and then revert again to multiplication
2510 in an integerPart. */
2512 integerPart decValue, val, multiplier;
2520 if (p == str.end()) {
2524 decValue = decDigitValue(*p++);
2525 assert(decValue < 10U && "Invalid character in significand");
2527 val = val * 10 + decValue;
2528 /* The maximum number that can be multiplied by ten with any
2529 digit added without overflowing an integerPart. */
2530 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2532 /* Multiply out the current part. */
2533 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2534 partCount, partCount + 1, false);
2536 /* If we used another part (likely but not guaranteed), increase
2538 if (decSignificand[partCount])
2540 } while (p <= D.lastSigDigit);
2542 category = fcNormal;
2543 fs = roundSignificandWithExponent(decSignificand, partCount,
2544 D.exponent, rounding_mode);
2546 delete [] decSignificand;
2552 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
2553 if (str.equals("inf") || str.equals("INFINITY")) {
2558 if (str.equals("-inf") || str.equals("-INFINITY")) {
2563 if (str.equals("nan") || str.equals("NaN")) {
2564 makeNaN(false, false);
2568 if (str.equals("-nan") || str.equals("-NaN")) {
2569 makeNaN(false, true);
2576 IEEEFloat::opStatus IEEEFloat::convertFromString(StringRef str,
2577 roundingMode rounding_mode) {
2578 assert(!str.empty() && "Invalid string length");
2580 // Handle special cases.
2581 if (convertFromStringSpecials(str))
2584 /* Handle a leading minus sign. */
2585 StringRef::iterator p = str.begin();
2586 size_t slen = str.size();
2587 sign = *p == '-' ? 1 : 0;
2588 if (*p == '-' || *p == '+') {
2591 assert(slen && "String has no digits");
2594 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2595 assert(slen - 2 && "Invalid string");
2596 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2600 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2603 /* Write out a hexadecimal representation of the floating point value
2604 to DST, which must be of sufficient size, in the C99 form
2605 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2606 excluding the terminating NUL.
2608 If UPPERCASE, the output is in upper case, otherwise in lower case.
2610 HEXDIGITS digits appear altogether, rounding the value if
2611 necessary. If HEXDIGITS is 0, the minimal precision to display the
2612 number precisely is used instead. If nothing would appear after
2613 the decimal point it is suppressed.
2615 The decimal exponent is always printed and has at least one digit.
2616 Zero values display an exponent of zero. Infinities and NaNs
2617 appear as "infinity" or "nan" respectively.
2619 The above rules are as specified by C99. There is ambiguity about
2620 what the leading hexadecimal digit should be. This implementation
2621 uses whatever is necessary so that the exponent is displayed as
2622 stored. This implies the exponent will fall within the IEEE format
2623 range, and the leading hexadecimal digit will be 0 (for denormals),
2624 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2625 any other digits zero).
2627 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
2629 roundingMode rounding_mode) const {
2638 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2639 dst += sizeof infinityL - 1;
2643 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2644 dst += sizeof NaNU - 1;
2649 *dst++ = upperCase ? 'X': 'x';
2651 if (hexDigits > 1) {
2653 memset (dst, '0', hexDigits - 1);
2654 dst += hexDigits - 1;
2656 *dst++ = upperCase ? 'P': 'p';
2661 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2667 return static_cast<unsigned int>(dst - p);
2670 /* Does the hard work of outputting the correctly rounded hexadecimal
2671 form of a normal floating point number with the specified number of
2672 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2673 digits necessary to print the value precisely is output. */
2674 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2676 roundingMode rounding_mode) const {
2677 unsigned int count, valueBits, shift, partsCount, outputDigits;
2678 const char *hexDigitChars;
2679 const integerPart *significand;
2684 *dst++ = upperCase ? 'X': 'x';
2687 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2689 significand = significandParts();
2690 partsCount = partCount();
2692 /* +3 because the first digit only uses the single integer bit, so
2693 we have 3 virtual zero most-significant-bits. */
2694 valueBits = semantics->precision + 3;
2695 shift = integerPartWidth - valueBits % integerPartWidth;
2697 /* The natural number of digits required ignoring trailing
2698 insignificant zeroes. */
2699 outputDigits = (valueBits - significandLSB () + 3) / 4;
2701 /* hexDigits of zero means use the required number for the
2702 precision. Otherwise, see if we are truncating. If we are,
2703 find out if we need to round away from zero. */
2705 if (hexDigits < outputDigits) {
2706 /* We are dropping non-zero bits, so need to check how to round.
2707 "bits" is the number of dropped bits. */
2709 lostFraction fraction;
2711 bits = valueBits - hexDigits * 4;
2712 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2713 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2715 outputDigits = hexDigits;
2718 /* Write the digits consecutively, and start writing in the location
2719 of the hexadecimal point. We move the most significant digit
2720 left and add the hexadecimal point later. */
2723 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2725 while (outputDigits && count) {
2728 /* Put the most significant integerPartWidth bits in "part". */
2729 if (--count == partsCount)
2730 part = 0; /* An imaginary higher zero part. */
2732 part = significand[count] << shift;
2735 part |= significand[count - 1] >> (integerPartWidth - shift);
2737 /* Convert as much of "part" to hexdigits as we can. */
2738 unsigned int curDigits = integerPartWidth / 4;
2740 if (curDigits > outputDigits)
2741 curDigits = outputDigits;
2742 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2743 outputDigits -= curDigits;
2749 /* Note that hexDigitChars has a trailing '0'. */
2752 *q = hexDigitChars[hexDigitValue (*q) + 1];
2753 } while (*q == '0');
2756 /* Add trailing zeroes. */
2757 memset (dst, '0', outputDigits);
2758 dst += outputDigits;
2761 /* Move the most significant digit to before the point, and if there
2762 is something after the decimal point add it. This must come
2763 after rounding above. */
2770 /* Finally output the exponent. */
2771 *dst++ = upperCase ? 'P': 'p';
2773 return writeSignedDecimal (dst, exponent);
2776 hash_code hash_value(const IEEEFloat &Arg) {
2777 if (!Arg.isFiniteNonZero())
2778 return hash_combine((uint8_t)Arg.category,
2779 // NaN has no sign, fix it at zero.
2780 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2781 Arg.semantics->precision);
2783 // Normal floats need their exponent and significand hashed.
2784 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2785 Arg.semantics->precision, Arg.exponent,
2787 Arg.significandParts(),
2788 Arg.significandParts() + Arg.partCount()));
2791 // Conversion from APFloat to/from host float/double. It may eventually be
2792 // possible to eliminate these and have everybody deal with APFloats, but that
2793 // will take a while. This approach will not easily extend to long double.
2794 // Current implementation requires integerPartWidth==64, which is correct at
2795 // the moment but could be made more general.
2797 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2798 // the actual IEEE respresentations. We compensate for that here.
2800 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
2801 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
2802 assert(partCount()==2);
2804 uint64_t myexponent, mysignificand;
2806 if (isFiniteNonZero()) {
2807 myexponent = exponent+16383; //bias
2808 mysignificand = significandParts()[0];
2809 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2810 myexponent = 0; // denormal
2811 } else if (category==fcZero) {
2814 } else if (category==fcInfinity) {
2815 myexponent = 0x7fff;
2816 mysignificand = 0x8000000000000000ULL;
2818 assert(category == fcNaN && "Unknown category");
2819 myexponent = 0x7fff;
2820 mysignificand = significandParts()[0];
2824 words[0] = mysignificand;
2825 words[1] = ((uint64_t)(sign & 1) << 15) |
2826 (myexponent & 0x7fffLL);
2827 return APInt(80, words);
2830 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
2831 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
2832 assert(partCount()==2);
2838 // Convert number to double. To avoid spurious underflows, we re-
2839 // normalize against the "double" minExponent first, and only *then*
2840 // truncate the mantissa. The result of that second conversion
2841 // may be inexact, but should never underflow.
2842 // Declare fltSemantics before APFloat that uses it (and
2843 // saves pointer to it) to ensure correct destruction order.
2844 fltSemantics extendedSemantics = *semantics;
2845 extendedSemantics.minExponent = semIEEEdouble.minExponent;
2846 IEEEFloat extended(*this);
2847 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2848 assert(fs == opOK && !losesInfo);
2851 IEEEFloat u(extended);
2852 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
2853 assert(fs == opOK || fs == opInexact);
2855 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2857 // If conversion was exact or resulted in a special case, we're done;
2858 // just set the second double to zero. Otherwise, re-convert back to
2859 // the extended format and compute the difference. This now should
2860 // convert exactly to double.
2861 if (u.isFiniteNonZero() && losesInfo) {
2862 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2863 assert(fs == opOK && !losesInfo);
2866 IEEEFloat v(extended);
2867 v.subtract(u, rmNearestTiesToEven);
2868 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
2869 assert(fs == opOK && !losesInfo);
2871 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2876 return APInt(128, words);
2879 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
2880 assert(semantics == (const llvm::fltSemantics*)&semIEEEquad);
2881 assert(partCount()==2);
2883 uint64_t myexponent, mysignificand, mysignificand2;
2885 if (isFiniteNonZero()) {
2886 myexponent = exponent+16383; //bias
2887 mysignificand = significandParts()[0];
2888 mysignificand2 = significandParts()[1];
2889 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2890 myexponent = 0; // denormal
2891 } else if (category==fcZero) {
2893 mysignificand = mysignificand2 = 0;
2894 } else if (category==fcInfinity) {
2895 myexponent = 0x7fff;
2896 mysignificand = mysignificand2 = 0;
2898 assert(category == fcNaN && "Unknown category!");
2899 myexponent = 0x7fff;
2900 mysignificand = significandParts()[0];
2901 mysignificand2 = significandParts()[1];
2905 words[0] = mysignificand;
2906 words[1] = ((uint64_t)(sign & 1) << 63) |
2907 ((myexponent & 0x7fff) << 48) |
2908 (mysignificand2 & 0xffffffffffffLL);
2910 return APInt(128, words);
2913 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
2914 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble);
2915 assert(partCount()==1);
2917 uint64_t myexponent, mysignificand;
2919 if (isFiniteNonZero()) {
2920 myexponent = exponent+1023; //bias
2921 mysignificand = *significandParts();
2922 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2923 myexponent = 0; // denormal
2924 } else if (category==fcZero) {
2927 } else if (category==fcInfinity) {
2931 assert(category == fcNaN && "Unknown category!");
2933 mysignificand = *significandParts();
2936 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2937 ((myexponent & 0x7ff) << 52) |
2938 (mysignificand & 0xfffffffffffffLL))));
2941 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
2942 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle);
2943 assert(partCount()==1);
2945 uint32_t myexponent, mysignificand;
2947 if (isFiniteNonZero()) {
2948 myexponent = exponent+127; //bias
2949 mysignificand = (uint32_t)*significandParts();
2950 if (myexponent == 1 && !(mysignificand & 0x800000))
2951 myexponent = 0; // denormal
2952 } else if (category==fcZero) {
2955 } else if (category==fcInfinity) {
2959 assert(category == fcNaN && "Unknown category!");
2961 mysignificand = (uint32_t)*significandParts();
2964 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2965 (mysignificand & 0x7fffff)));
2968 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
2969 assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf);
2970 assert(partCount()==1);
2972 uint32_t myexponent, mysignificand;
2974 if (isFiniteNonZero()) {
2975 myexponent = exponent+15; //bias
2976 mysignificand = (uint32_t)*significandParts();
2977 if (myexponent == 1 && !(mysignificand & 0x400))
2978 myexponent = 0; // denormal
2979 } else if (category==fcZero) {
2982 } else if (category==fcInfinity) {
2986 assert(category == fcNaN && "Unknown category!");
2988 mysignificand = (uint32_t)*significandParts();
2991 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
2992 (mysignificand & 0x3ff)));
2995 // This function creates an APInt that is just a bit map of the floating
2996 // point constant as it would appear in memory. It is not a conversion,
2997 // and treating the result as a normal integer is unlikely to be useful.
2999 APInt IEEEFloat::bitcastToAPInt() const {
3000 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3001 return convertHalfAPFloatToAPInt();
3003 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3004 return convertFloatAPFloatToAPInt();
3006 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3007 return convertDoubleAPFloatToAPInt();
3009 if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3010 return convertQuadrupleAPFloatToAPInt();
3012 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3013 return convertPPCDoubleDoubleAPFloatToAPInt();
3015 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3017 return convertF80LongDoubleAPFloatToAPInt();
3020 float IEEEFloat::convertToFloat() const {
3021 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3022 "Float semantics are not IEEEsingle");
3023 APInt api = bitcastToAPInt();
3024 return api.bitsToFloat();
3027 double IEEEFloat::convertToDouble() const {
3028 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3029 "Float semantics are not IEEEdouble");
3030 APInt api = bitcastToAPInt();
3031 return api.bitsToDouble();
3034 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3035 /// does not support these bit patterns:
3036 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3037 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3038 /// exponent = 0, integer bit 1 ("pseudodenormal")
3039 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3040 /// At the moment, the first two are treated as NaNs, the second two as Normal.
3041 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3042 assert(api.getBitWidth()==80);
3043 uint64_t i1 = api.getRawData()[0];
3044 uint64_t i2 = api.getRawData()[1];
3045 uint64_t myexponent = (i2 & 0x7fff);
3046 uint64_t mysignificand = i1;
3048 initialize(&semX87DoubleExtended);
3049 assert(partCount()==2);
3051 sign = static_cast<unsigned int>(i2>>15);
3052 if (myexponent==0 && mysignificand==0) {
3053 // exponent, significand meaningless
3055 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3056 // exponent, significand meaningless
3057 category = fcInfinity;
3058 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
3059 // exponent meaningless
3061 significandParts()[0] = mysignificand;
3062 significandParts()[1] = 0;
3064 category = fcNormal;
3065 exponent = myexponent - 16383;
3066 significandParts()[0] = mysignificand;
3067 significandParts()[1] = 0;
3068 if (myexponent==0) // denormal
3073 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3074 assert(api.getBitWidth()==128);
3075 uint64_t i1 = api.getRawData()[0];
3076 uint64_t i2 = api.getRawData()[1];
3080 // Get the first double and convert to our format.
3081 initFromDoubleAPInt(APInt(64, i1));
3082 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3083 assert(fs == opOK && !losesInfo);
3086 // Unless we have a special case, add in second double.
3087 if (isFiniteNonZero()) {
3088 IEEEFloat v(semIEEEdouble, APInt(64, i2));
3089 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3090 assert(fs == opOK && !losesInfo);
3093 add(v, rmNearestTiesToEven);
3097 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3098 assert(api.getBitWidth()==128);
3099 uint64_t i1 = api.getRawData()[0];
3100 uint64_t i2 = api.getRawData()[1];
3101 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3102 uint64_t mysignificand = i1;
3103 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3105 initialize(&semIEEEquad);
3106 assert(partCount()==2);
3108 sign = static_cast<unsigned int>(i2>>63);
3109 if (myexponent==0 &&
3110 (mysignificand==0 && mysignificand2==0)) {
3111 // exponent, significand meaningless
3113 } else if (myexponent==0x7fff &&
3114 (mysignificand==0 && mysignificand2==0)) {
3115 // exponent, significand meaningless
3116 category = fcInfinity;
3117 } else if (myexponent==0x7fff &&
3118 (mysignificand!=0 || mysignificand2 !=0)) {
3119 // exponent meaningless
3121 significandParts()[0] = mysignificand;
3122 significandParts()[1] = mysignificand2;
3124 category = fcNormal;
3125 exponent = myexponent - 16383;
3126 significandParts()[0] = mysignificand;
3127 significandParts()[1] = mysignificand2;
3128 if (myexponent==0) // denormal
3131 significandParts()[1] |= 0x1000000000000LL; // integer bit
3135 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3136 assert(api.getBitWidth()==64);
3137 uint64_t i = *api.getRawData();
3138 uint64_t myexponent = (i >> 52) & 0x7ff;
3139 uint64_t mysignificand = i & 0xfffffffffffffLL;
3141 initialize(&semIEEEdouble);
3142 assert(partCount()==1);
3144 sign = static_cast<unsigned int>(i>>63);
3145 if (myexponent==0 && mysignificand==0) {
3146 // exponent, significand meaningless
3148 } else if (myexponent==0x7ff && mysignificand==0) {
3149 // exponent, significand meaningless
3150 category = fcInfinity;
3151 } else if (myexponent==0x7ff && mysignificand!=0) {
3152 // exponent meaningless
3154 *significandParts() = mysignificand;
3156 category = fcNormal;
3157 exponent = myexponent - 1023;
3158 *significandParts() = mysignificand;
3159 if (myexponent==0) // denormal
3162 *significandParts() |= 0x10000000000000LL; // integer bit
3166 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3167 assert(api.getBitWidth()==32);
3168 uint32_t i = (uint32_t)*api.getRawData();
3169 uint32_t myexponent = (i >> 23) & 0xff;
3170 uint32_t mysignificand = i & 0x7fffff;
3172 initialize(&semIEEEsingle);
3173 assert(partCount()==1);
3176 if (myexponent==0 && mysignificand==0) {
3177 // exponent, significand meaningless
3179 } else if (myexponent==0xff && mysignificand==0) {
3180 // exponent, significand meaningless
3181 category = fcInfinity;
3182 } else if (myexponent==0xff && mysignificand!=0) {
3183 // sign, exponent, significand meaningless
3185 *significandParts() = mysignificand;
3187 category = fcNormal;
3188 exponent = myexponent - 127; //bias
3189 *significandParts() = mysignificand;
3190 if (myexponent==0) // denormal
3193 *significandParts() |= 0x800000; // integer bit
3197 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3198 assert(api.getBitWidth()==16);
3199 uint32_t i = (uint32_t)*api.getRawData();
3200 uint32_t myexponent = (i >> 10) & 0x1f;
3201 uint32_t mysignificand = i & 0x3ff;
3203 initialize(&semIEEEhalf);
3204 assert(partCount()==1);
3207 if (myexponent==0 && mysignificand==0) {
3208 // exponent, significand meaningless
3210 } else if (myexponent==0x1f && mysignificand==0) {
3211 // exponent, significand meaningless
3212 category = fcInfinity;
3213 } else if (myexponent==0x1f && mysignificand!=0) {
3214 // sign, exponent, significand meaningless
3216 *significandParts() = mysignificand;
3218 category = fcNormal;
3219 exponent = myexponent - 15; //bias
3220 *significandParts() = mysignificand;
3221 if (myexponent==0) // denormal
3224 *significandParts() |= 0x400; // integer bit
3228 /// Treat api as containing the bits of a floating point number. Currently
3229 /// we infer the floating point type from the size of the APInt. The
3230 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3231 /// when the size is anything else).
3232 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3233 if (Sem == &semIEEEhalf)
3234 return initFromHalfAPInt(api);
3235 if (Sem == &semIEEEsingle)
3236 return initFromFloatAPInt(api);
3237 if (Sem == &semIEEEdouble)
3238 return initFromDoubleAPInt(api);
3239 if (Sem == &semX87DoubleExtended)
3240 return initFromF80LongDoubleAPInt(api);
3241 if (Sem == &semIEEEquad)
3242 return initFromQuadrupleAPInt(api);
3243 if (Sem == &semPPCDoubleDoubleLegacy)
3244 return initFromPPCDoubleDoubleAPInt(api);
3246 llvm_unreachable(nullptr);
3249 /// Make this number the largest magnitude normal number in the given
3251 void IEEEFloat::makeLargest(bool Negative) {
3252 // We want (in interchange format):
3253 // sign = {Negative}
3255 // significand = 1..1
3256 category = fcNormal;
3258 exponent = semantics->maxExponent;
3260 // Use memset to set all but the highest integerPart to all ones.
3261 integerPart *significand = significandParts();
3262 unsigned PartCount = partCount();
3263 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3265 // Set the high integerPart especially setting all unused top bits for
3266 // internal consistency.
3267 const unsigned NumUnusedHighBits =
3268 PartCount*integerPartWidth - semantics->precision;
3269 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3270 ? (~integerPart(0) >> NumUnusedHighBits)
3274 /// Make this number the smallest magnitude denormal number in the given
3276 void IEEEFloat::makeSmallest(bool Negative) {
3277 // We want (in interchange format):
3278 // sign = {Negative}
3280 // significand = 0..01
3281 category = fcNormal;
3283 exponent = semantics->minExponent;
3284 APInt::tcSet(significandParts(), 1, partCount());
3287 void IEEEFloat::makeSmallestNormalized(bool Negative) {
3288 // We want (in interchange format):
3289 // sign = {Negative}
3291 // significand = 10..0
3293 category = fcNormal;
3296 exponent = semantics->minExponent;
3297 significandParts()[partCountForBits(semantics->precision) - 1] |=
3298 (((integerPart)1) << ((semantics->precision - 1) % integerPartWidth));
3301 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
3302 initFromAPInt(&Sem, API);
3305 IEEEFloat::IEEEFloat(float f) {
3306 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
3309 IEEEFloat::IEEEFloat(double d) {
3310 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
3314 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3315 Buffer.append(Str.begin(), Str.end());
3318 /// Removes data from the given significand until it is no more
3319 /// precise than is required for the desired precision.
3320 void AdjustToPrecision(APInt &significand,
3321 int &exp, unsigned FormatPrecision) {
3322 unsigned bits = significand.getActiveBits();
3324 // 196/59 is a very slight overestimate of lg_2(10).
3325 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3327 if (bits <= bitsRequired) return;
3329 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3330 if (!tensRemovable) return;
3332 exp += tensRemovable;
3334 APInt divisor(significand.getBitWidth(), 1);
3335 APInt powten(significand.getBitWidth(), 10);
3337 if (tensRemovable & 1)
3339 tensRemovable >>= 1;
3340 if (!tensRemovable) break;
3344 significand = significand.udiv(divisor);
3346 // Truncate the significand down to its active bit count.
3347 significand = significand.trunc(significand.getActiveBits());
3351 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3352 int &exp, unsigned FormatPrecision) {
3353 unsigned N = buffer.size();
3354 if (N <= FormatPrecision) return;
3356 // The most significant figures are the last ones in the buffer.
3357 unsigned FirstSignificant = N - FormatPrecision;
3360 // FIXME: this probably shouldn't use 'round half up'.
3362 // Rounding down is just a truncation, except we also want to drop
3363 // trailing zeros from the new result.
3364 if (buffer[FirstSignificant - 1] < '5') {
3365 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3368 exp += FirstSignificant;
3369 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3373 // Rounding up requires a decimal add-with-carry. If we continue
3374 // the carry, the newly-introduced zeros will just be truncated.
3375 for (unsigned I = FirstSignificant; I != N; ++I) {
3376 if (buffer[I] == '9') {
3384 // If we carried through, we have exactly one digit of precision.
3385 if (FirstSignificant == N) {
3386 exp += FirstSignificant;
3388 buffer.push_back('1');
3392 exp += FirstSignificant;
3393 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3397 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
3398 unsigned FormatMaxPadding, bool TruncateZero) const {
3402 return append(Str, "-Inf");
3404 return append(Str, "+Inf");
3406 case fcNaN: return append(Str, "NaN");
3412 if (!FormatMaxPadding) {
3414 append(Str, "0.0E+0");
3417 if (FormatPrecision > 1)
3418 Str.append(FormatPrecision - 1, '0');
3419 append(Str, "e+00");
3432 // Decompose the number into an APInt and an exponent.
3433 int exp = exponent - ((int) semantics->precision - 1);
3434 APInt significand(semantics->precision,
3435 makeArrayRef(significandParts(),
3436 partCountForBits(semantics->precision)));
3438 // Set FormatPrecision if zero. We want to do this before we
3439 // truncate trailing zeros, as those are part of the precision.
3440 if (!FormatPrecision) {
3441 // We use enough digits so the number can be round-tripped back to an
3442 // APFloat. The formula comes from "How to Print Floating-Point Numbers
3443 // Accurately" by Steele and White.
3444 // FIXME: Using a formula based purely on the precision is conservative;
3445 // we can print fewer digits depending on the actual value being printed.
3447 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
3448 FormatPrecision = 2 + semantics->precision * 59 / 196;
3451 // Ignore trailing binary zeros.
3452 int trailingZeros = significand.countTrailingZeros();
3453 exp += trailingZeros;
3454 significand.lshrInPlace(trailingZeros);
3456 // Change the exponent from 2^e to 10^e.
3459 } else if (exp > 0) {
3461 significand = significand.zext(semantics->precision + exp);
3462 significand <<= exp;
3464 } else { /* exp < 0 */
3467 // We transform this using the identity:
3468 // (N)(2^-e) == (N)(5^e)(10^-e)
3469 // This means we have to multiply N (the significand) by 5^e.
3470 // To avoid overflow, we have to operate on numbers large
3471 // enough to store N * 5^e:
3472 // log2(N * 5^e) == log2(N) + e * log2(5)
3473 // <= semantics->precision + e * 137 / 59
3474 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3476 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3478 // Multiply significand by 5^e.
3479 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3480 significand = significand.zext(precision);
3481 APInt five_to_the_i(precision, 5);
3483 if (texp & 1) significand *= five_to_the_i;
3487 five_to_the_i *= five_to_the_i;
3491 AdjustToPrecision(significand, exp, FormatPrecision);
3493 SmallVector<char, 256> buffer;
3496 unsigned precision = significand.getBitWidth();
3497 APInt ten(precision, 10);
3498 APInt digit(precision, 0);
3500 bool inTrail = true;
3501 while (significand != 0) {
3502 // digit <- significand % 10
3503 // significand <- significand / 10
3504 APInt::udivrem(significand, ten, significand, digit);
3506 unsigned d = digit.getZExtValue();
3508 // Drop trailing zeros.
3509 if (inTrail && !d) exp++;
3511 buffer.push_back((char) ('0' + d));
3516 assert(!buffer.empty() && "no characters in buffer!");
3518 // Drop down to FormatPrecision.
3519 // TODO: don't do more precise calculations above than are required.
3520 AdjustToPrecision(buffer, exp, FormatPrecision);
3522 unsigned NDigits = buffer.size();
3524 // Check whether we should use scientific notation.
3525 bool FormatScientific;
3526 if (!FormatMaxPadding)
3527 FormatScientific = true;
3532 // But we shouldn't make the number look more precise than it is.
3533 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3534 NDigits + (unsigned) exp > FormatPrecision);
3536 // Power of the most significant digit.
3537 int MSD = exp + (int) (NDigits - 1);
3540 FormatScientific = false;
3542 // 765e-5 == 0.00765
3544 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3549 // Scientific formatting is pretty straightforward.
3550 if (FormatScientific) {
3551 exp += (NDigits - 1);
3553 Str.push_back(buffer[NDigits-1]);
3555 if (NDigits == 1 && TruncateZero)
3558 for (unsigned I = 1; I != NDigits; ++I)
3559 Str.push_back(buffer[NDigits-1-I]);
3560 // Fill with zeros up to FormatPrecision.
3561 if (!TruncateZero && FormatPrecision > NDigits - 1)
3562 Str.append(FormatPrecision - NDigits + 1, '0');
3563 // For !TruncateZero we use lower 'e'.
3564 Str.push_back(TruncateZero ? 'E' : 'e');
3566 Str.push_back(exp >= 0 ? '+' : '-');
3567 if (exp < 0) exp = -exp;
3568 SmallVector<char, 6> expbuf;
3570 expbuf.push_back((char) ('0' + (exp % 10)));
3573 // Exponent always at least two digits if we do not truncate zeros.
3574 if (!TruncateZero && expbuf.size() < 2)
3575 expbuf.push_back('0');
3576 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3577 Str.push_back(expbuf[E-1-I]);
3581 // Non-scientific, positive exponents.
3583 for (unsigned I = 0; I != NDigits; ++I)
3584 Str.push_back(buffer[NDigits-1-I]);
3585 for (unsigned I = 0; I != (unsigned) exp; ++I)
3590 // Non-scientific, negative exponents.
3592 // The number of digits to the left of the decimal point.
3593 int NWholeDigits = exp + (int) NDigits;
3596 if (NWholeDigits > 0) {
3597 for (; I != (unsigned) NWholeDigits; ++I)
3598 Str.push_back(buffer[NDigits-I-1]);
3601 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3605 for (unsigned Z = 1; Z != NZeros; ++Z)
3609 for (; I != NDigits; ++I)
3610 Str.push_back(buffer[NDigits-I-1]);
3613 bool IEEEFloat::getExactInverse(APFloat *inv) const {
3614 // Special floats and denormals have no exact inverse.
3615 if (!isFiniteNonZero())
3618 // Check that the number is a power of two by making sure that only the
3619 // integer bit is set in the significand.
3620 if (significandLSB() != semantics->precision - 1)
3624 IEEEFloat reciprocal(*semantics, 1ULL);
3625 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3628 // Avoid multiplication with a denormal, it is not safe on all platforms and
3629 // may be slower than a normal division.
3630 if (reciprocal.isDenormal())
3633 assert(reciprocal.isFiniteNonZero() &&
3634 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3637 *inv = APFloat(reciprocal, *semantics);
3642 bool IEEEFloat::isSignaling() const {
3646 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
3647 // first bit of the trailing significand being 0.
3648 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
3651 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
3653 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
3654 /// appropriate sign switching before/after the computation.
3655 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
3656 // If we are performing nextDown, swap sign so we have -x.
3660 // Compute nextUp(x)
3661 opStatus result = opOK;
3663 // Handle each float category separately.
3666 // nextUp(+inf) = +inf
3669 // nextUp(-inf) = -getLargest()
3673 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
3674 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
3675 // change the payload.
3676 if (isSignaling()) {
3677 result = opInvalidOp;
3678 // For consistency, propagate the sign of the sNaN to the qNaN.
3679 makeNaN(false, isNegative(), nullptr);
3683 // nextUp(pm 0) = +getSmallest()
3684 makeSmallest(false);
3687 // nextUp(-getSmallest()) = -0
3688 if (isSmallest() && isNegative()) {
3689 APInt::tcSet(significandParts(), 0, partCount());
3695 // nextUp(getLargest()) == INFINITY
3696 if (isLargest() && !isNegative()) {
3697 APInt::tcSet(significandParts(), 0, partCount());
3698 category = fcInfinity;
3699 exponent = semantics->maxExponent + 1;
3703 // nextUp(normal) == normal + inc.
3705 // If we are negative, we need to decrement the significand.
3707 // We only cross a binade boundary that requires adjusting the exponent
3709 // 1. exponent != semantics->minExponent. This implies we are not in the
3710 // smallest binade or are dealing with denormals.
3711 // 2. Our significand excluding the integral bit is all zeros.
3712 bool WillCrossBinadeBoundary =
3713 exponent != semantics->minExponent && isSignificandAllZeros();
3715 // Decrement the significand.
3717 // We always do this since:
3718 // 1. If we are dealing with a non-binade decrement, by definition we
3719 // just decrement the significand.
3720 // 2. If we are dealing with a normal -> normal binade decrement, since
3721 // we have an explicit integral bit the fact that all bits but the
3722 // integral bit are zero implies that subtracting one will yield a
3723 // significand with 0 integral bit and 1 in all other spots. Thus we
3724 // must just adjust the exponent and set the integral bit to 1.
3725 // 3. If we are dealing with a normal -> denormal binade decrement,
3726 // since we set the integral bit to 0 when we represent denormals, we
3727 // just decrement the significand.
3728 integerPart *Parts = significandParts();
3729 APInt::tcDecrement(Parts, partCount());
3731 if (WillCrossBinadeBoundary) {
3732 // Our result is a normal number. Do the following:
3733 // 1. Set the integral bit to 1.
3734 // 2. Decrement the exponent.
3735 APInt::tcSetBit(Parts, semantics->precision - 1);
3739 // If we are positive, we need to increment the significand.
3741 // We only cross a binade boundary that requires adjusting the exponent if
3742 // the input is not a denormal and all of said input's significand bits
3743 // are set. If all of said conditions are true: clear the significand, set
3744 // the integral bit to 1, and increment the exponent. If we have a
3745 // denormal always increment since moving denormals and the numbers in the
3746 // smallest normal binade have the same exponent in our representation.
3747 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
3749 if (WillCrossBinadeBoundary) {
3750 integerPart *Parts = significandParts();
3751 APInt::tcSet(Parts, 0, partCount());
3752 APInt::tcSetBit(Parts, semantics->precision - 1);
3753 assert(exponent != semantics->maxExponent &&
3754 "We can not increment an exponent beyond the maxExponent allowed"
3755 " by the given floating point semantics.");
3758 incrementSignificand();
3764 // If we are performing nextDown, swap sign so we have -nextUp(-x)
3771 void IEEEFloat::makeInf(bool Negative) {
3772 category = fcInfinity;
3774 exponent = semantics->maxExponent + 1;
3775 APInt::tcSet(significandParts(), 0, partCount());
3778 void IEEEFloat::makeZero(bool Negative) {
3781 exponent = semantics->minExponent-1;
3782 APInt::tcSet(significandParts(), 0, partCount());
3785 void IEEEFloat::makeQuiet() {
3787 APInt::tcSetBit(significandParts(), semantics->precision - 2);
3790 int ilogb(const IEEEFloat &Arg) {
3792 return IEEEFloat::IEK_NaN;
3794 return IEEEFloat::IEK_Zero;
3795 if (Arg.isInfinity())
3796 return IEEEFloat::IEK_Inf;
3797 if (!Arg.isDenormal())
3798 return Arg.exponent;
3800 IEEEFloat Normalized(Arg);
3801 int SignificandBits = Arg.getSemantics().precision - 1;
3803 Normalized.exponent += SignificandBits;
3804 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
3805 return Normalized.exponent - SignificandBits;
3808 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
3809 auto MaxExp = X.getSemantics().maxExponent;
3810 auto MinExp = X.getSemantics().minExponent;
3812 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
3813 // overflow; clamp it to a safe range before adding, but ensure that the range
3814 // is large enough that the clamp does not change the result. The range we
3815 // need to support is the difference between the largest possible exponent and
3816 // the normalized exponent of half the smallest denormal.
3818 int SignificandBits = X.getSemantics().precision - 1;
3819 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
3821 // Clamp to one past the range ends to let normalize handle overlflow.
3822 X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement);
3823 X.normalize(RoundingMode, lfExactlyZero);
3829 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
3832 // Quiet signalling nans.
3833 if (Exp == IEEEFloat::IEK_NaN) {
3834 IEEEFloat Quiet(Val);
3839 if (Exp == IEEEFloat::IEK_Inf)
3842 // 1 is added because frexp is defined to return a normalized fraction in
3843 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
3844 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
3845 return scalbn(Val, -Exp, RM);
3848 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
3850 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
3851 assert(Semantics == &semPPCDoubleDouble);
3854 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
3856 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
3857 APFloat(semIEEEdouble, uninitialized)}) {
3858 assert(Semantics == &semPPCDoubleDouble);
3861 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
3862 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
3863 APFloat(semIEEEdouble)}) {
3864 assert(Semantics == &semPPCDoubleDouble);
3867 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
3869 Floats(new APFloat[2]{
3870 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
3871 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
3872 assert(Semantics == &semPPCDoubleDouble);
3875 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
3878 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
3879 assert(Semantics == &semPPCDoubleDouble);
3880 assert(&Floats[0].getSemantics() == &semIEEEdouble);
3881 assert(&Floats[1].getSemantics() == &semIEEEdouble);
3884 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
3885 : Semantics(RHS.Semantics),
3886 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
3887 APFloat(RHS.Floats[1])}
3889 assert(Semantics == &semPPCDoubleDouble);
3892 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
3893 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
3894 RHS.Semantics = &semBogus;
3895 assert(Semantics == &semPPCDoubleDouble);
3898 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
3899 if (Semantics == RHS.Semantics && RHS.Floats) {
3900 Floats[0] = RHS.Floats[0];
3901 Floats[1] = RHS.Floats[1];
3902 } else if (this != &RHS) {
3903 this->~DoubleAPFloat();
3904 new (this) DoubleAPFloat(RHS);
3909 // Implement addition, subtraction, multiplication and division based on:
3910 // "Software for Doubled-Precision Floating-Point Computations",
3911 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
3912 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
3913 const APFloat &c, const APFloat &cc,
3917 Status |= z.add(c, RM);
3918 if (!z.isFinite()) {
3919 if (!z.isInfinity()) {
3920 Floats[0] = std::move(z);
3921 Floats[1].makeZero(/* Neg = */ false);
3922 return (opStatus)Status;
3925 auto AComparedToC = a.compareAbsoluteValue(c);
3927 Status |= z.add(aa, RM);
3928 if (AComparedToC == APFloat::cmpGreaterThan) {
3929 // z = cc + aa + c + a;
3930 Status |= z.add(c, RM);
3931 Status |= z.add(a, RM);
3933 // z = cc + aa + a + c;
3934 Status |= z.add(a, RM);
3935 Status |= z.add(c, RM);
3937 if (!z.isFinite()) {
3938 Floats[0] = std::move(z);
3939 Floats[1].makeZero(/* Neg = */ false);
3940 return (opStatus)Status;
3944 Status |= zz.add(cc, RM);
3945 if (AComparedToC == APFloat::cmpGreaterThan) {
3946 // Floats[1] = a - z + c + zz;
3948 Status |= Floats[1].subtract(z, RM);
3949 Status |= Floats[1].add(c, RM);
3950 Status |= Floats[1].add(zz, RM);
3952 // Floats[1] = c - z + a + zz;
3954 Status |= Floats[1].subtract(z, RM);
3955 Status |= Floats[1].add(a, RM);
3956 Status |= Floats[1].add(zz, RM);
3961 Status |= q.subtract(z, RM);
3963 // zz = q + c + (a - (q + z)) + aa + cc;
3964 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
3966 Status |= zz.add(c, RM);
3967 Status |= q.add(z, RM);
3968 Status |= q.subtract(a, RM);
3970 Status |= zz.add(q, RM);
3971 Status |= zz.add(aa, RM);
3972 Status |= zz.add(cc, RM);
3973 if (zz.isZero() && !zz.isNegative()) {
3974 Floats[0] = std::move(z);
3975 Floats[1].makeZero(/* Neg = */ false);
3979 Status |= Floats[0].add(zz, RM);
3980 if (!Floats[0].isFinite()) {
3981 Floats[1].makeZero(/* Neg = */ false);
3982 return (opStatus)Status;
3984 Floats[1] = std::move(z);
3985 Status |= Floats[1].subtract(Floats[0], RM);
3986 Status |= Floats[1].add(zz, RM);
3988 return (opStatus)Status;
3991 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
3992 const DoubleAPFloat &RHS,
3995 if (LHS.getCategory() == fcNaN) {
3999 if (RHS.getCategory() == fcNaN) {
4003 if (LHS.getCategory() == fcZero) {
4007 if (RHS.getCategory() == fcZero) {
4011 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4012 LHS.isNegative() != RHS.isNegative()) {
4013 Out.makeNaN(false, Out.isNegative(), nullptr);
4016 if (LHS.getCategory() == fcInfinity) {
4020 if (RHS.getCategory() == fcInfinity) {
4024 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4026 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4028 assert(&A.getSemantics() == &semIEEEdouble);
4029 assert(&AA.getSemantics() == &semIEEEdouble);
4030 assert(&C.getSemantics() == &semIEEEdouble);
4031 assert(&CC.getSemantics() == &semIEEEdouble);
4032 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4033 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4034 return Out.addImpl(A, AA, C, CC, RM);
4037 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4039 return addWithSpecial(*this, RHS, *this, RM);
4042 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4045 auto Ret = add(RHS, RM);
4050 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
4051 APFloat::roundingMode RM) {
4052 const auto &LHS = *this;
4054 /* Interesting observation: For special categories, finding the lowest
4055 common ancestor of the following layered graph gives the correct
4064 e.g. NaN * NaN = NaN
4066 Normal * Zero = Zero
4069 if (LHS.getCategory() == fcNaN) {
4073 if (RHS.getCategory() == fcNaN) {
4077 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4078 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4079 Out.makeNaN(false, false, nullptr);
4082 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4086 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4090 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4091 "Special cases not handled exhaustively");
4094 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4097 Status |= T.multiply(C, RM);
4098 if (!T.isFiniteNonZero()) {
4100 Floats[1].makeZero(/* Neg = */ false);
4101 return (opStatus)Status;
4104 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4107 Status |= Tau.fusedMultiplyAdd(C, T, RM);
4112 Status |= V.multiply(D, RM);
4115 Status |= W.multiply(C, RM);
4116 Status |= V.add(W, RM);
4118 Status |= Tau.add(V, RM);
4122 Status |= U.add(Tau, RM);
4125 if (!U.isFinite()) {
4126 Floats[1].makeZero(/* Neg = */ false);
4128 // Floats[1] = (t - u) + tau
4129 Status |= T.subtract(U, RM);
4130 Status |= T.add(Tau, RM);
4133 return (opStatus)Status;
4136 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
4137 APFloat::roundingMode RM) {
4138 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4139 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4141 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4142 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4146 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
4147 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4148 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4150 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4151 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4155 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
4156 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4157 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4158 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4159 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4164 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
4165 const DoubleAPFloat &Addend,
4166 APFloat::roundingMode RM) {
4167 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4168 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4169 auto Ret = Tmp.fusedMultiplyAdd(
4170 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
4171 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
4172 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4176 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
4177 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4178 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4179 auto Ret = Tmp.roundToIntegral(RM);
4180 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4184 void DoubleAPFloat::changeSign() {
4185 Floats[0].changeSign();
4186 Floats[1].changeSign();
4190 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
4191 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
4192 if (Result != cmpEqual)
4194 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
4195 if (Result == cmpLessThan || Result == cmpGreaterThan) {
4196 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
4197 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
4198 if (Against && !RHSAgainst)
4200 if (!Against && RHSAgainst)
4201 return cmpGreaterThan;
4202 if (!Against && !RHSAgainst)
4204 if (Against && RHSAgainst)
4205 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
4210 APFloat::fltCategory DoubleAPFloat::getCategory() const {
4211 return Floats[0].getCategory();
4214 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
4216 void DoubleAPFloat::makeInf(bool Neg) {
4217 Floats[0].makeInf(Neg);
4218 Floats[1].makeZero(/* Neg = */ false);
4221 void DoubleAPFloat::makeZero(bool Neg) {
4222 Floats[0].makeZero(Neg);
4223 Floats[1].makeZero(/* Neg = */ false);
4226 void DoubleAPFloat::makeLargest(bool Neg) {
4227 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4228 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
4229 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
4234 void DoubleAPFloat::makeSmallest(bool Neg) {
4235 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4236 Floats[0].makeSmallest(Neg);
4237 Floats[1].makeZero(/* Neg = */ false);
4240 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
4241 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4242 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
4244 Floats[0].changeSign();
4245 Floats[1].makeZero(/* Neg = */ false);
4248 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
4249 Floats[0].makeNaN(SNaN, Neg, fill);
4250 Floats[1].makeZero(/* Neg = */ false);
4253 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
4254 auto Result = Floats[0].compare(RHS.Floats[0]);
4255 // |Float[0]| > |Float[1]|
4256 if (Result == APFloat::cmpEqual)
4257 return Floats[1].compare(RHS.Floats[1]);
4261 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
4262 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
4263 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
4266 hash_code hash_value(const DoubleAPFloat &Arg) {
4268 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
4269 return hash_combine(Arg.Semantics);
4272 APInt DoubleAPFloat::bitcastToAPInt() const {
4273 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4275 Floats[0].bitcastToAPInt().getRawData()[0],
4276 Floats[1].bitcastToAPInt().getRawData()[0],
4278 return APInt(128, 2, Data);
4281 APFloat::opStatus DoubleAPFloat::convertFromString(StringRef S,
4283 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4284 APFloat Tmp(semPPCDoubleDoubleLegacy);
4285 auto Ret = Tmp.convertFromString(S, RM);
4286 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4290 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
4291 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4292 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4293 auto Ret = Tmp.next(nextDown);
4294 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4299 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
4300 unsigned int Width, bool IsSigned,
4301 roundingMode RM, bool *IsExact) const {
4302 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4303 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4304 .convertToInteger(Input, Width, IsSigned, RM, IsExact);
4307 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
4310 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4311 APFloat Tmp(semPPCDoubleDoubleLegacy);
4312 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
4313 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4318 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
4319 unsigned int InputSize,
4320 bool IsSigned, roundingMode RM) {
4321 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4322 APFloat Tmp(semPPCDoubleDoubleLegacy);
4323 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
4324 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4329 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
4330 unsigned int InputSize,
4331 bool IsSigned, roundingMode RM) {
4332 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4333 APFloat Tmp(semPPCDoubleDoubleLegacy);
4334 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
4335 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4339 unsigned int DoubleAPFloat::convertToHexString(char *DST,
4340 unsigned int HexDigits,
4342 roundingMode RM) const {
4343 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4344 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4345 .convertToHexString(DST, HexDigits, UpperCase, RM);
4348 bool DoubleAPFloat::isDenormal() const {
4349 return getCategory() == fcNormal &&
4350 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
4351 // (double)(Hi + Lo) == Hi defines a normal number.
4352 Floats[0].compare(Floats[0] + Floats[1]) != cmpEqual);
4355 bool DoubleAPFloat::isSmallest() const {
4356 if (getCategory() != fcNormal)
4358 DoubleAPFloat Tmp(*this);
4359 Tmp.makeSmallest(this->isNegative());
4360 return Tmp.compare(*this) == cmpEqual;
4363 bool DoubleAPFloat::isLargest() const {
4364 if (getCategory() != fcNormal)
4366 DoubleAPFloat Tmp(*this);
4367 Tmp.makeLargest(this->isNegative());
4368 return Tmp.compare(*this) == cmpEqual;
4371 bool DoubleAPFloat::isInteger() const {
4372 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4373 APFloat Tmp(semPPCDoubleDoubleLegacy);
4374 (void)Tmp.add(Floats[0], rmNearestTiesToEven);
4375 (void)Tmp.add(Floats[1], rmNearestTiesToEven);
4376 return Tmp.isInteger();
4379 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
4380 unsigned FormatPrecision,
4381 unsigned FormatMaxPadding,
4382 bool TruncateZero) const {
4383 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4384 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4385 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
4388 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
4389 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4390 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4392 return Tmp.getExactInverse(nullptr);
4393 APFloat Inv(semPPCDoubleDoubleLegacy);
4394 auto Ret = Tmp.getExactInverse(&Inv);
4395 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
4399 DoubleAPFloat scalbn(DoubleAPFloat Arg, int Exp, APFloat::roundingMode RM) {
4400 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4401 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
4402 scalbn(Arg.Floats[1], Exp, RM));
4405 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
4406 APFloat::roundingMode RM) {
4407 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4408 APFloat First = frexp(Arg.Floats[0], Exp, RM);
4409 APFloat Second = Arg.Floats[1];
4410 if (Arg.getCategory() == APFloat::fcNormal)
4411 Second = scalbn(Second, -Exp, RM);
4412 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
4415 } // End detail namespace
4417 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
4418 if (usesLayout<IEEEFloat>(Semantics)) {
4419 new (&IEEE) IEEEFloat(std::move(F));
4422 if (usesLayout<DoubleAPFloat>(Semantics)) {
4424 DoubleAPFloat(Semantics, APFloat(std::move(F), F.getSemantics()),
4425 APFloat(semIEEEdouble));
4428 llvm_unreachable("Unexpected semantics");
4431 APFloat::opStatus APFloat::convertFromString(StringRef Str, roundingMode RM) {
4432 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
4435 hash_code hash_value(const APFloat &Arg) {
4436 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
4437 return hash_value(Arg.U.IEEE);
4438 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
4439 return hash_value(Arg.U.Double);
4440 llvm_unreachable("Unexpected semantics");
4443 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
4444 : APFloat(Semantics) {
4445 convertFromString(S, rmNearestTiesToEven);
4448 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
4449 roundingMode RM, bool *losesInfo) {
4450 if (&getSemantics() == &ToSemantics)
4452 if (usesLayout<IEEEFloat>(getSemantics()) &&
4453 usesLayout<IEEEFloat>(ToSemantics))
4454 return U.IEEE.convert(ToSemantics, RM, losesInfo);
4455 if (usesLayout<IEEEFloat>(getSemantics()) &&
4456 usesLayout<DoubleAPFloat>(ToSemantics)) {
4457 assert(&ToSemantics == &semPPCDoubleDouble);
4458 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
4459 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
4462 if (usesLayout<DoubleAPFloat>(getSemantics()) &&
4463 usesLayout<IEEEFloat>(ToSemantics)) {
4464 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
4465 *this = APFloat(std::move(getIEEE()), ToSemantics);
4468 llvm_unreachable("Unexpected semantics");
4471 APFloat APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) {
4475 return APFloat(semIEEEhalf, APInt::getAllOnesValue(BitWidth));
4477 return APFloat(semIEEEsingle, APInt::getAllOnesValue(BitWidth));
4479 return APFloat(semIEEEdouble, APInt::getAllOnesValue(BitWidth));
4481 return APFloat(semX87DoubleExtended, APInt::getAllOnesValue(BitWidth));
4483 return APFloat(semIEEEquad, APInt::getAllOnesValue(BitWidth));
4485 llvm_unreachable("Unknown floating bit width");
4488 assert(BitWidth == 128);
4489 return APFloat(semPPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
4493 void APFloat::print(raw_ostream &OS) const {
4494 SmallVector<char, 16> Buffer;
4496 OS << Buffer << "\n";
4499 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4500 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }
4503 void APFloat::Profile(FoldingSetNodeID &NID) const {
4504 NID.Add(bitcastToAPInt());
4507 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
4508 an APSInt, whose initial bit-width and signed-ness are used to determine the
4509 precision of the conversion.
4511 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
4512 roundingMode rounding_mode,
4513 bool *isExact) const {
4514 unsigned bitWidth = result.getBitWidth();
4515 SmallVector<uint64_t, 4> parts(result.getNumWords());
4516 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
4517 rounding_mode, isExact);
4518 // Keeps the original signed-ness.
4519 result = APInt(bitWidth, parts);
4523 } // End llvm namespace
4525 #undef APFLOAT_DISPATCH_ON_SEMANTICS