1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/FoldingSet.h"
19 #include "llvm/ADT/Hashing.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/ErrorHandling.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
29 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
31 if (usesLayout<IEEEFloat>(getSemantics())) \
32 return U.IEEE.METHOD_CALL; \
33 if (usesLayout<DoubleAPFloat>(getSemantics())) \
34 return U.Double.METHOD_CALL; \
35 llvm_unreachable("Unexpected semantics"); \
40 // TODO: Remove these and use APInt qualified types directly.
41 typedef APInt::WordType integerPart;
42 const unsigned int integerPartWidth = APInt::APINT_BITS_PER_WORD;
44 /// A macro used to combine two fcCategory enums into one key which can be used
45 /// in a switch statement to classify how the interaction of two APFloat's
46 /// categories affects an operation.
48 /// TODO: If clang source code is ever allowed to use constexpr in its own
49 /// codebase, change this into a static inline function.
50 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
52 /* Assumed in hexadecimal significand parsing, and conversion to
53 hexadecimal strings. */
54 static_assert(integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
57 /* Represents floating point arithmetic semantics. */
59 /* The largest E such that 2^E is representable; this matches the
60 definition of IEEE 754. */
61 APFloatBase::ExponentType maxExponent;
63 /* The smallest E such that 2^E is a normalized number; this
64 matches the definition of IEEE 754. */
65 APFloatBase::ExponentType minExponent;
67 /* Number of bits in the significand. This includes the integer
69 unsigned int precision;
71 /* Number of bits actually used in the semantics. */
72 unsigned int sizeInBits;
75 static const fltSemantics semIEEEhalf = {15, -14, 11, 16};
76 static const fltSemantics semIEEEsingle = {127, -126, 24, 32};
77 static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
78 static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};
79 static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
80 static const fltSemantics semBogus = {0, 0, 0, 0};
82 /* The IBM double-double semantics. Such a number consists of a pair of IEEE
83 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
84 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
85 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
86 to each other, and two 11-bit exponents.
88 Note: we need to make the value different from semBogus as otherwise
89 an unsafe optimization may collapse both values to a single address,
90 and we heavily rely on them having distinct addresses. */
91 static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0};
93 /* These are legacy semantics for the fallback, inaccrurate implementation of
94 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
95 operation. It's equivalent to having an IEEE number with consecutive 106
96 bits of mantissa and 11 bits of exponent.
98 It's not equivalent to IBM double-double. For example, a legit IBM
99 double-double, 1 + epsilon:
101 1 + epsilon = 1 + (1 >> 1076)
103 is not representable by a consecutive 106 bits of mantissa.
105 Currently, these semantics are used in the following way:
107 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
108 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
109 semPPCDoubleDoubleLegacy -> IEEE operations
111 We use bitcastToAPInt() to get the bit representation (in APInt) of the
112 underlying IEEEdouble, then use the APInt constructor to construct the
115 TODO: Implement all operations in semPPCDoubleDouble, and delete these
117 static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
120 const fltSemantics &APFloatBase::IEEEhalf() {
123 const fltSemantics &APFloatBase::IEEEsingle() {
124 return semIEEEsingle;
126 const fltSemantics &APFloatBase::IEEEdouble() {
127 return semIEEEdouble;
129 const fltSemantics &APFloatBase::IEEEquad() {
132 const fltSemantics &APFloatBase::x87DoubleExtended() {
133 return semX87DoubleExtended;
135 const fltSemantics &APFloatBase::Bogus() {
138 const fltSemantics &APFloatBase::PPCDoubleDouble() {
139 return semPPCDoubleDouble;
142 /* A tight upper bound on number of parts required to hold the value
145 power * 815 / (351 * integerPartWidth) + 1
147 However, whilst the result may require only this many parts,
148 because we are multiplying two values to get it, the
149 multiplication may require an extra part with the excess part
150 being zero (consider the trivial case of 1 * 1, tcFullMultiply
151 requires two parts to hold the single-part result). So we add an
152 extra one to guarantee enough space whilst multiplying. */
153 const unsigned int maxExponent = 16383;
154 const unsigned int maxPrecision = 113;
155 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
156 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
157 / (351 * integerPartWidth));
159 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
160 return semantics.precision;
162 APFloatBase::ExponentType
163 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
164 return semantics.maxExponent;
166 APFloatBase::ExponentType
167 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
168 return semantics.minExponent;
170 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
171 return semantics.sizeInBits;
174 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
175 return Sem.sizeInBits;
178 /* A bunch of private, handy routines. */
180 static inline unsigned int
181 partCountForBits(unsigned int bits)
183 return ((bits) + integerPartWidth - 1) / integerPartWidth;
186 /* Returns 0U-9U. Return values >= 10U are not digits. */
187 static inline unsigned int
188 decDigitValue(unsigned int c)
193 /* Return the value of a decimal exponent of the form
196 If the exponent overflows, returns a large exponent with the
199 readExponent(StringRef::iterator begin, StringRef::iterator end)
202 unsigned int absExponent;
203 const unsigned int overlargeExponent = 24000; /* FIXME. */
204 StringRef::iterator p = begin;
206 assert(p != end && "Exponent has no digits");
208 isNegative = (*p == '-');
209 if (*p == '-' || *p == '+') {
211 assert(p != end && "Exponent has no digits");
214 absExponent = decDigitValue(*p++);
215 assert(absExponent < 10U && "Invalid character in exponent");
217 for (; p != end; ++p) {
220 value = decDigitValue(*p);
221 assert(value < 10U && "Invalid character in exponent");
223 value += absExponent * 10;
224 if (absExponent >= overlargeExponent) {
225 absExponent = overlargeExponent;
226 p = end; /* outwit assert below */
232 assert(p == end && "Invalid exponent in exponent");
235 return -(int) absExponent;
237 return (int) absExponent;
240 /* This is ugly and needs cleaning up, but I don't immediately see
241 how whilst remaining safe. */
243 totalExponent(StringRef::iterator p, StringRef::iterator end,
244 int exponentAdjustment)
246 int unsignedExponent;
247 bool negative, overflow;
250 assert(p != end && "Exponent has no digits");
252 negative = *p == '-';
253 if (*p == '-' || *p == '+') {
255 assert(p != end && "Exponent has no digits");
258 unsignedExponent = 0;
260 for (; p != end; ++p) {
263 value = decDigitValue(*p);
264 assert(value < 10U && "Invalid character in exponent");
266 unsignedExponent = unsignedExponent * 10 + value;
267 if (unsignedExponent > 32767) {
273 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
277 exponent = unsignedExponent;
279 exponent = -exponent;
280 exponent += exponentAdjustment;
281 if (exponent > 32767 || exponent < -32768)
286 exponent = negative ? -32768: 32767;
291 static StringRef::iterator
292 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
293 StringRef::iterator *dot)
295 StringRef::iterator p = begin;
297 while (p != end && *p == '0')
300 if (p != end && *p == '.') {
303 assert(end - begin != 1 && "Significand has no digits");
305 while (p != end && *p == '0')
312 /* Given a normal decimal floating point number of the form
316 where the decimal point and exponent are optional, fill out the
317 structure D. Exponent is appropriate if the significand is
318 treated as an integer, and normalizedExponent if the significand
319 is taken to have the decimal point after a single leading
322 If the value is zero, V->firstSigDigit points to a non-digit, and
323 the return exponent is zero.
326 const char *firstSigDigit;
327 const char *lastSigDigit;
329 int normalizedExponent;
333 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
336 StringRef::iterator dot = end;
337 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
339 D->firstSigDigit = p;
341 D->normalizedExponent = 0;
343 for (; p != end; ++p) {
345 assert(dot == end && "String contains multiple dots");
350 if (decDigitValue(*p) >= 10U)
355 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
356 assert(p != begin && "Significand has no digits");
357 assert((dot == end || p - begin != 1) && "Significand has no digits");
359 /* p points to the first non-digit in the string */
360 D->exponent = readExponent(p + 1, end);
362 /* Implied decimal point? */
367 /* If number is all zeroes accept any exponent. */
368 if (p != D->firstSigDigit) {
369 /* Drop insignificant trailing zeroes. */
374 while (p != begin && *p == '0');
375 while (p != begin && *p == '.');
378 /* Adjust the exponents for any decimal point. */
379 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
380 D->normalizedExponent = (D->exponent +
381 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
382 - (dot > D->firstSigDigit && dot < p)));
388 /* Return the trailing fraction of a hexadecimal number.
389 DIGITVALUE is the first hex digit of the fraction, P points to
392 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
393 unsigned int digitValue)
395 unsigned int hexDigit;
397 /* If the first trailing digit isn't 0 or 8 we can work out the
398 fraction immediately. */
400 return lfMoreThanHalf;
401 else if (digitValue < 8 && digitValue > 0)
402 return lfLessThanHalf;
404 // Otherwise we need to find the first non-zero digit.
405 while (p != end && (*p == '0' || *p == '.'))
408 assert(p != end && "Invalid trailing hexadecimal fraction!");
410 hexDigit = hexDigitValue(*p);
412 /* If we ran off the end it is exactly zero or one-half, otherwise
415 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
417 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
420 /* Return the fraction lost were a bignum truncated losing the least
421 significant BITS bits. */
423 lostFractionThroughTruncation(const integerPart *parts,
424 unsigned int partCount,
429 lsb = APInt::tcLSB(parts, partCount);
431 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
433 return lfExactlyZero;
435 return lfExactlyHalf;
436 if (bits <= partCount * integerPartWidth &&
437 APInt::tcExtractBit(parts, bits - 1))
438 return lfMoreThanHalf;
440 return lfLessThanHalf;
443 /* Shift DST right BITS bits noting lost fraction. */
445 shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
447 lostFraction lost_fraction;
449 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
451 APInt::tcShiftRight(dst, parts, bits);
453 return lost_fraction;
456 /* Combine the effect of two lost fractions. */
458 combineLostFractions(lostFraction moreSignificant,
459 lostFraction lessSignificant)
461 if (lessSignificant != lfExactlyZero) {
462 if (moreSignificant == lfExactlyZero)
463 moreSignificant = lfLessThanHalf;
464 else if (moreSignificant == lfExactlyHalf)
465 moreSignificant = lfMoreThanHalf;
468 return moreSignificant;
471 /* The error from the true value, in half-ulps, on multiplying two
472 floating point numbers, which differ from the value they
473 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
474 than the returned value.
476 See "How to Read Floating Point Numbers Accurately" by William D
479 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
481 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
483 if (HUerr1 + HUerr2 == 0)
484 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
486 return inexactMultiply + 2 * (HUerr1 + HUerr2);
489 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
490 when the least significant BITS are truncated. BITS cannot be
493 ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
495 unsigned int count, partBits;
496 integerPart part, boundary;
501 count = bits / integerPartWidth;
502 partBits = bits % integerPartWidth + 1;
504 part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
507 boundary = (integerPart) 1 << (partBits - 1);
512 if (part - boundary <= boundary - part)
513 return part - boundary;
515 return boundary - part;
518 if (part == boundary) {
521 return ~(integerPart) 0; /* A lot. */
524 } else if (part == boundary - 1) {
527 return ~(integerPart) 0; /* A lot. */
532 return ~(integerPart) 0; /* A lot. */
535 /* Place pow(5, power) in DST, and return the number of parts used.
536 DST must be at least one part larger than size of the answer. */
538 powerOf5(integerPart *dst, unsigned int power)
540 static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
542 integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
543 pow5s[0] = 78125 * 5;
545 unsigned int partsCount[16] = { 1 };
546 integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
548 assert(power <= maxExponent);
553 *p1 = firstEightPowers[power & 7];
559 for (unsigned int n = 0; power; power >>= 1, n++) {
564 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
566 pc = partsCount[n - 1];
567 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
569 if (pow5[pc - 1] == 0)
577 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
579 if (p2[result - 1] == 0)
582 /* Now result is in p1 with partsCount parts and p2 is scratch
593 APInt::tcAssign(dst, p1, result);
598 /* Zero at the end to avoid modular arithmetic when adding one; used
599 when rounding up during hexadecimal output. */
600 static const char hexDigitsLower[] = "0123456789abcdef0";
601 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
602 static const char infinityL[] = "infinity";
603 static const char infinityU[] = "INFINITY";
604 static const char NaNL[] = "nan";
605 static const char NaNU[] = "NAN";
607 /* Write out an integerPart in hexadecimal, starting with the most
608 significant nibble. Write out exactly COUNT hexdigits, return
611 partAsHex (char *dst, integerPart part, unsigned int count,
612 const char *hexDigitChars)
614 unsigned int result = count;
616 assert(count != 0 && count <= integerPartWidth / 4);
618 part >>= (integerPartWidth - 4 * count);
620 dst[count] = hexDigitChars[part & 0xf];
627 /* Write out an unsigned decimal integer. */
629 writeUnsignedDecimal (char *dst, unsigned int n)
645 /* Write out a signed decimal integer. */
647 writeSignedDecimal (char *dst, int value)
651 dst = writeUnsignedDecimal(dst, -(unsigned) value);
653 dst = writeUnsignedDecimal(dst, value);
660 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
663 semantics = ourSemantics;
666 significand.parts = new integerPart[count];
669 void IEEEFloat::freeSignificand() {
671 delete [] significand.parts;
674 void IEEEFloat::assign(const IEEEFloat &rhs) {
675 assert(semantics == rhs.semantics);
678 category = rhs.category;
679 exponent = rhs.exponent;
680 if (isFiniteNonZero() || category == fcNaN)
681 copySignificand(rhs);
684 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
685 assert(isFiniteNonZero() || category == fcNaN);
686 assert(rhs.partCount() >= partCount());
688 APInt::tcAssign(significandParts(), rhs.significandParts(),
692 /* Make this number a NaN, with an arbitrary but deterministic value
693 for the significand. If double or longer, this is a signalling NaN,
694 which may not be ideal. If float, this is QNaN(0). */
695 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
699 integerPart *significand = significandParts();
700 unsigned numParts = partCount();
702 // Set the significand bits to the fill.
703 if (!fill || fill->getNumWords() < numParts)
704 APInt::tcSet(significand, 0, numParts);
706 APInt::tcAssign(significand, fill->getRawData(),
707 std::min(fill->getNumWords(), numParts));
709 // Zero out the excess bits of the significand.
710 unsigned bitsToPreserve = semantics->precision - 1;
711 unsigned part = bitsToPreserve / 64;
712 bitsToPreserve %= 64;
713 significand[part] &= ((1ULL << bitsToPreserve) - 1);
714 for (part++; part != numParts; ++part)
715 significand[part] = 0;
718 unsigned QNaNBit = semantics->precision - 2;
721 // We always have to clear the QNaN bit to make it an SNaN.
722 APInt::tcClearBit(significand, QNaNBit);
724 // If there are no bits set in the payload, we have to set
725 // *something* to make it a NaN instead of an infinity;
726 // conventionally, this is the next bit down from the QNaN bit.
727 if (APInt::tcIsZero(significand, numParts))
728 APInt::tcSetBit(significand, QNaNBit - 1);
730 // We always have to set the QNaN bit to make it a QNaN.
731 APInt::tcSetBit(significand, QNaNBit);
734 // For x87 extended precision, we want to make a NaN, not a
735 // pseudo-NaN. Maybe we should expose the ability to make
737 if (semantics == &semX87DoubleExtended)
738 APInt::tcSetBit(significand, QNaNBit + 1);
741 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
743 if (semantics != rhs.semantics) {
745 initialize(rhs.semantics);
753 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
756 semantics = rhs.semantics;
757 significand = rhs.significand;
758 exponent = rhs.exponent;
759 category = rhs.category;
762 rhs.semantics = &semBogus;
766 bool IEEEFloat::isDenormal() const {
767 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
768 (APInt::tcExtractBit(significandParts(),
769 semantics->precision - 1) == 0);
772 bool IEEEFloat::isSmallest() const {
773 // The smallest number by magnitude in our format will be the smallest
774 // denormal, i.e. the floating point number with exponent being minimum
775 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
776 return isFiniteNonZero() && exponent == semantics->minExponent &&
777 significandMSB() == 0;
780 bool IEEEFloat::isSignificandAllOnes() const {
781 // Test if the significand excluding the integral bit is all ones. This allows
782 // us to test for binade boundaries.
783 const integerPart *Parts = significandParts();
784 const unsigned PartCount = partCount();
785 for (unsigned i = 0; i < PartCount - 1; i++)
789 // Set the unused high bits to all ones when we compare.
790 const unsigned NumHighBits =
791 PartCount*integerPartWidth - semantics->precision + 1;
792 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
793 "fill than integerPartWidth");
794 const integerPart HighBitFill =
795 ~integerPart(0) << (integerPartWidth - NumHighBits);
796 if (~(Parts[PartCount - 1] | HighBitFill))
802 bool IEEEFloat::isSignificandAllZeros() const {
803 // Test if the significand excluding the integral bit is all zeros. This
804 // allows us to test for binade boundaries.
805 const integerPart *Parts = significandParts();
806 const unsigned PartCount = partCount();
808 for (unsigned i = 0; i < PartCount - 1; i++)
812 const unsigned NumHighBits =
813 PartCount*integerPartWidth - semantics->precision + 1;
814 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
815 "clear than integerPartWidth");
816 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
818 if (Parts[PartCount - 1] & HighBitMask)
824 bool IEEEFloat::isLargest() const {
825 // The largest number by magnitude in our format will be the floating point
826 // number with maximum exponent and with significand that is all ones.
827 return isFiniteNonZero() && exponent == semantics->maxExponent
828 && isSignificandAllOnes();
831 bool IEEEFloat::isInteger() const {
832 // This could be made more efficient; I'm going for obviously correct.
833 if (!isFinite()) return false;
834 IEEEFloat truncated = *this;
835 truncated.roundToIntegral(rmTowardZero);
836 return compare(truncated) == cmpEqual;
839 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
842 if (semantics != rhs.semantics ||
843 category != rhs.category ||
846 if (category==fcZero || category==fcInfinity)
849 if (isFiniteNonZero() && exponent != rhs.exponent)
852 return std::equal(significandParts(), significandParts() + partCount(),
853 rhs.significandParts());
856 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
857 initialize(&ourSemantics);
861 exponent = ourSemantics.precision - 1;
862 significandParts()[0] = value;
863 normalize(rmNearestTiesToEven, lfExactlyZero);
866 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
867 initialize(&ourSemantics);
872 // Delegate to the previous constructor, because later copy constructor may
873 // actually inspects category, which can't be garbage.
874 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
875 : IEEEFloat(ourSemantics) {}
877 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
878 initialize(rhs.semantics);
882 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
883 *this = std::move(rhs);
886 IEEEFloat::~IEEEFloat() { freeSignificand(); }
888 unsigned int IEEEFloat::partCount() const {
889 return partCountForBits(semantics->precision + 1);
892 const integerPart *IEEEFloat::significandParts() const {
893 return const_cast<IEEEFloat *>(this)->significandParts();
896 integerPart *IEEEFloat::significandParts() {
898 return significand.parts;
900 return &significand.part;
903 void IEEEFloat::zeroSignificand() {
904 APInt::tcSet(significandParts(), 0, partCount());
907 /* Increment an fcNormal floating point number's significand. */
908 void IEEEFloat::incrementSignificand() {
911 carry = APInt::tcIncrement(significandParts(), partCount());
913 /* Our callers should never cause us to overflow. */
918 /* Add the significand of the RHS. Returns the carry flag. */
919 integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
922 parts = significandParts();
924 assert(semantics == rhs.semantics);
925 assert(exponent == rhs.exponent);
927 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
930 /* Subtract the significand of the RHS with a borrow flag. Returns
932 integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
933 integerPart borrow) {
936 parts = significandParts();
938 assert(semantics == rhs.semantics);
939 assert(exponent == rhs.exponent);
941 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
945 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
946 on to the full-precision result of the multiplication. Returns the
948 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
949 const IEEEFloat *addend) {
950 unsigned int omsb; // One, not zero, based MSB.
951 unsigned int partsCount, newPartsCount, precision;
952 integerPart *lhsSignificand;
953 integerPart scratch[4];
954 integerPart *fullSignificand;
955 lostFraction lost_fraction;
958 assert(semantics == rhs.semantics);
960 precision = semantics->precision;
962 // Allocate space for twice as many bits as the original significand, plus one
963 // extra bit for the addition to overflow into.
964 newPartsCount = partCountForBits(precision * 2 + 1);
966 if (newPartsCount > 4)
967 fullSignificand = new integerPart[newPartsCount];
969 fullSignificand = scratch;
971 lhsSignificand = significandParts();
972 partsCount = partCount();
974 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
975 rhs.significandParts(), partsCount, partsCount);
977 lost_fraction = lfExactlyZero;
978 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
979 exponent += rhs.exponent;
981 // Assume the operands involved in the multiplication are single-precision
982 // FP, and the two multiplicants are:
983 // *this = a23 . a22 ... a0 * 2^e1
984 // rhs = b23 . b22 ... b0 * 2^e2
985 // the result of multiplication is:
986 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
987 // Note that there are three significant bits at the left-hand side of the
988 // radix point: two for the multiplication, and an overflow bit for the
989 // addition (that will always be zero at this point). Move the radix point
990 // toward left by two bits, and adjust exponent accordingly.
993 if (addend && addend->isNonZero()) {
994 // The intermediate result of the multiplication has "2 * precision"
995 // signicant bit; adjust the addend to be consistent with mul result.
997 Significand savedSignificand = significand;
998 const fltSemantics *savedSemantics = semantics;
999 fltSemantics extendedSemantics;
1001 unsigned int extendedPrecision;
1003 // Normalize our MSB to one below the top bit to allow for overflow.
1004 extendedPrecision = 2 * precision + 1;
1005 if (omsb != extendedPrecision - 1) {
1006 assert(extendedPrecision > omsb);
1007 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1008 (extendedPrecision - 1) - omsb);
1009 exponent -= (extendedPrecision - 1) - omsb;
1012 /* Create new semantics. */
1013 extendedSemantics = *semantics;
1014 extendedSemantics.precision = extendedPrecision;
1016 if (newPartsCount == 1)
1017 significand.part = fullSignificand[0];
1019 significand.parts = fullSignificand;
1020 semantics = &extendedSemantics;
1022 IEEEFloat extendedAddend(*addend);
1023 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1024 assert(status == opOK);
1027 // Shift the significand of the addend right by one bit. This guarantees
1028 // that the high bit of the significand is zero (same as fullSignificand),
1029 // so the addition will overflow (if it does overflow at all) into the top bit.
1030 lost_fraction = extendedAddend.shiftSignificandRight(1);
1031 assert(lost_fraction == lfExactlyZero &&
1032 "Lost precision while shifting addend for fused-multiply-add.");
1034 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1036 /* Restore our state. */
1037 if (newPartsCount == 1)
1038 fullSignificand[0] = significand.part;
1039 significand = savedSignificand;
1040 semantics = savedSemantics;
1042 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1045 // Convert the result having "2 * precision" significant-bits back to the one
1046 // having "precision" significant-bits. First, move the radix point from
1047 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1048 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1049 exponent -= precision + 1;
1051 // In case MSB resides at the left-hand side of radix point, shift the
1052 // mantissa right by some amount to make sure the MSB reside right before
1053 // the radix point (i.e. "MSB . rest-significant-bits").
1055 // Note that the result is not normalized when "omsb < precision". So, the
1056 // caller needs to call IEEEFloat::normalize() if normalized value is
1058 if (omsb > precision) {
1059 unsigned int bits, significantParts;
1062 bits = omsb - precision;
1063 significantParts = partCountForBits(omsb);
1064 lf = shiftRight(fullSignificand, significantParts, bits);
1065 lost_fraction = combineLostFractions(lf, lost_fraction);
1069 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1071 if (newPartsCount > 4)
1072 delete [] fullSignificand;
1074 return lost_fraction;
1077 /* Multiply the significands of LHS and RHS to DST. */
1078 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1079 unsigned int bit, i, partsCount;
1080 const integerPart *rhsSignificand;
1081 integerPart *lhsSignificand, *dividend, *divisor;
1082 integerPart scratch[4];
1083 lostFraction lost_fraction;
1085 assert(semantics == rhs.semantics);
1087 lhsSignificand = significandParts();
1088 rhsSignificand = rhs.significandParts();
1089 partsCount = partCount();
1092 dividend = new integerPart[partsCount * 2];
1096 divisor = dividend + partsCount;
1098 /* Copy the dividend and divisor as they will be modified in-place. */
1099 for (i = 0; i < partsCount; i++) {
1100 dividend[i] = lhsSignificand[i];
1101 divisor[i] = rhsSignificand[i];
1102 lhsSignificand[i] = 0;
1105 exponent -= rhs.exponent;
1107 unsigned int precision = semantics->precision;
1109 /* Normalize the divisor. */
1110 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1113 APInt::tcShiftLeft(divisor, partsCount, bit);
1116 /* Normalize the dividend. */
1117 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1120 APInt::tcShiftLeft(dividend, partsCount, bit);
1123 /* Ensure the dividend >= divisor initially for the loop below.
1124 Incidentally, this means that the division loop below is
1125 guaranteed to set the integer bit to one. */
1126 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1128 APInt::tcShiftLeft(dividend, partsCount, 1);
1129 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1132 /* Long division. */
1133 for (bit = precision; bit; bit -= 1) {
1134 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1135 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1136 APInt::tcSetBit(lhsSignificand, bit - 1);
1139 APInt::tcShiftLeft(dividend, partsCount, 1);
1142 /* Figure out the lost fraction. */
1143 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1146 lost_fraction = lfMoreThanHalf;
1148 lost_fraction = lfExactlyHalf;
1149 else if (APInt::tcIsZero(dividend, partsCount))
1150 lost_fraction = lfExactlyZero;
1152 lost_fraction = lfLessThanHalf;
1157 return lost_fraction;
1160 unsigned int IEEEFloat::significandMSB() const {
1161 return APInt::tcMSB(significandParts(), partCount());
1164 unsigned int IEEEFloat::significandLSB() const {
1165 return APInt::tcLSB(significandParts(), partCount());
1168 /* Note that a zero result is NOT normalized to fcZero. */
1169 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1170 /* Our exponent should not overflow. */
1171 assert((ExponentType) (exponent + bits) >= exponent);
1175 return shiftRight(significandParts(), partCount(), bits);
1178 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1179 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1180 assert(bits < semantics->precision);
1183 unsigned int partsCount = partCount();
1185 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1188 assert(!APInt::tcIsZero(significandParts(), partsCount));
1192 IEEEFloat::cmpResult
1193 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1196 assert(semantics == rhs.semantics);
1197 assert(isFiniteNonZero());
1198 assert(rhs.isFiniteNonZero());
1200 compare = exponent - rhs.exponent;
1202 /* If exponents are equal, do an unsigned bignum comparison of the
1205 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1209 return cmpGreaterThan;
1210 else if (compare < 0)
1216 /* Handle overflow. Sign is preserved. We either become infinity or
1217 the largest finite number. */
1218 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1220 if (rounding_mode == rmNearestTiesToEven ||
1221 rounding_mode == rmNearestTiesToAway ||
1222 (rounding_mode == rmTowardPositive && !sign) ||
1223 (rounding_mode == rmTowardNegative && sign)) {
1224 category = fcInfinity;
1225 return (opStatus) (opOverflow | opInexact);
1228 /* Otherwise we become the largest finite number. */
1229 category = fcNormal;
1230 exponent = semantics->maxExponent;
1231 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1232 semantics->precision);
1237 /* Returns TRUE if, when truncating the current number, with BIT the
1238 new LSB, with the given lost fraction and rounding mode, the result
1239 would need to be rounded away from zero (i.e., by increasing the
1240 signficand). This routine must work for fcZero of both signs, and
1241 fcNormal numbers. */
1242 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1243 lostFraction lost_fraction,
1244 unsigned int bit) const {
1245 /* NaNs and infinities should not have lost fractions. */
1246 assert(isFiniteNonZero() || category == fcZero);
1248 /* Current callers never pass this so we don't handle it. */
1249 assert(lost_fraction != lfExactlyZero);
1251 switch (rounding_mode) {
1252 case rmNearestTiesToAway:
1253 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1255 case rmNearestTiesToEven:
1256 if (lost_fraction == lfMoreThanHalf)
1259 /* Our zeroes don't have a significand to test. */
1260 if (lost_fraction == lfExactlyHalf && category != fcZero)
1261 return APInt::tcExtractBit(significandParts(), bit);
1268 case rmTowardPositive:
1271 case rmTowardNegative:
1274 llvm_unreachable("Invalid rounding mode found");
1277 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1278 lostFraction lost_fraction) {
1279 unsigned int omsb; /* One, not zero, based MSB. */
1282 if (!isFiniteNonZero())
1285 /* Before rounding normalize the exponent of fcNormal numbers. */
1286 omsb = significandMSB() + 1;
1289 /* OMSB is numbered from 1. We want to place it in the integer
1290 bit numbered PRECISION if possible, with a compensating change in
1292 exponentChange = omsb - semantics->precision;
1294 /* If the resulting exponent is too high, overflow according to
1295 the rounding mode. */
1296 if (exponent + exponentChange > semantics->maxExponent)
1297 return handleOverflow(rounding_mode);
1299 /* Subnormal numbers have exponent minExponent, and their MSB
1300 is forced based on that. */
1301 if (exponent + exponentChange < semantics->minExponent)
1302 exponentChange = semantics->minExponent - exponent;
1304 /* Shifting left is easy as we don't lose precision. */
1305 if (exponentChange < 0) {
1306 assert(lost_fraction == lfExactlyZero);
1308 shiftSignificandLeft(-exponentChange);
1313 if (exponentChange > 0) {
1316 /* Shift right and capture any new lost fraction. */
1317 lf = shiftSignificandRight(exponentChange);
1319 lost_fraction = combineLostFractions(lf, lost_fraction);
1321 /* Keep OMSB up-to-date. */
1322 if (omsb > (unsigned) exponentChange)
1323 omsb -= exponentChange;
1329 /* Now round the number according to rounding_mode given the lost
1332 /* As specified in IEEE 754, since we do not trap we do not report
1333 underflow for exact results. */
1334 if (lost_fraction == lfExactlyZero) {
1335 /* Canonicalize zeroes. */
1342 /* Increment the significand if we're rounding away from zero. */
1343 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1345 exponent = semantics->minExponent;
1347 incrementSignificand();
1348 omsb = significandMSB() + 1;
1350 /* Did the significand increment overflow? */
1351 if (omsb == (unsigned) semantics->precision + 1) {
1352 /* Renormalize by incrementing the exponent and shifting our
1353 significand right one. However if we already have the
1354 maximum exponent we overflow to infinity. */
1355 if (exponent == semantics->maxExponent) {
1356 category = fcInfinity;
1358 return (opStatus) (opOverflow | opInexact);
1361 shiftSignificandRight(1);
1367 /* The normal case - we were and are not denormal, and any
1368 significand increment above didn't overflow. */
1369 if (omsb == semantics->precision)
1372 /* We have a non-zero denormal. */
1373 assert(omsb < semantics->precision);
1375 /* Canonicalize zeroes. */
1379 /* The fcZero case is a denormal that underflowed to zero. */
1380 return (opStatus) (opUnderflow | opInexact);
1383 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1385 switch (PackCategoriesIntoKey(category, rhs.category)) {
1387 llvm_unreachable(nullptr);
1389 case PackCategoriesIntoKey(fcNaN, fcZero):
1390 case PackCategoriesIntoKey(fcNaN, fcNormal):
1391 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1392 case PackCategoriesIntoKey(fcNaN, fcNaN):
1393 case PackCategoriesIntoKey(fcNormal, fcZero):
1394 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1395 case PackCategoriesIntoKey(fcInfinity, fcZero):
1398 case PackCategoriesIntoKey(fcZero, fcNaN):
1399 case PackCategoriesIntoKey(fcNormal, fcNaN):
1400 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1401 // We need to be sure to flip the sign here for subtraction because we
1402 // don't have a separate negate operation so -NaN becomes 0 - NaN here.
1403 sign = rhs.sign ^ subtract;
1405 copySignificand(rhs);
1408 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1409 case PackCategoriesIntoKey(fcZero, fcInfinity):
1410 category = fcInfinity;
1411 sign = rhs.sign ^ subtract;
1414 case PackCategoriesIntoKey(fcZero, fcNormal):
1416 sign = rhs.sign ^ subtract;
1419 case PackCategoriesIntoKey(fcZero, fcZero):
1420 /* Sign depends on rounding mode; handled by caller. */
1423 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1424 /* Differently signed infinities can only be validly
1426 if (((sign ^ rhs.sign)!=0) != subtract) {
1433 case PackCategoriesIntoKey(fcNormal, fcNormal):
1438 /* Add or subtract two normal numbers. */
1439 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1442 lostFraction lost_fraction;
1445 /* Determine if the operation on the absolute values is effectively
1446 an addition or subtraction. */
1447 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1449 /* Are we bigger exponent-wise than the RHS? */
1450 bits = exponent - rhs.exponent;
1452 /* Subtraction is more subtle than one might naively expect. */
1454 IEEEFloat temp_rhs(rhs);
1458 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1459 lost_fraction = lfExactlyZero;
1460 } else if (bits > 0) {
1461 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1462 shiftSignificandLeft(1);
1465 lost_fraction = shiftSignificandRight(-bits - 1);
1466 temp_rhs.shiftSignificandLeft(1);
1471 carry = temp_rhs.subtractSignificand
1472 (*this, lost_fraction != lfExactlyZero);
1473 copySignificand(temp_rhs);
1476 carry = subtractSignificand
1477 (temp_rhs, lost_fraction != lfExactlyZero);
1480 /* Invert the lost fraction - it was on the RHS and
1482 if (lost_fraction == lfLessThanHalf)
1483 lost_fraction = lfMoreThanHalf;
1484 else if (lost_fraction == lfMoreThanHalf)
1485 lost_fraction = lfLessThanHalf;
1487 /* The code above is intended to ensure that no borrow is
1493 IEEEFloat temp_rhs(rhs);
1495 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1496 carry = addSignificand(temp_rhs);
1498 lost_fraction = shiftSignificandRight(-bits);
1499 carry = addSignificand(rhs);
1502 /* We have a guard bit; generating a carry cannot happen. */
1507 return lost_fraction;
1510 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1511 switch (PackCategoriesIntoKey(category, rhs.category)) {
1513 llvm_unreachable(nullptr);
1515 case PackCategoriesIntoKey(fcNaN, fcZero):
1516 case PackCategoriesIntoKey(fcNaN, fcNormal):
1517 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1518 case PackCategoriesIntoKey(fcNaN, fcNaN):
1522 case PackCategoriesIntoKey(fcZero, fcNaN):
1523 case PackCategoriesIntoKey(fcNormal, fcNaN):
1524 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1527 copySignificand(rhs);
1530 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1531 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1532 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1533 category = fcInfinity;
1536 case PackCategoriesIntoKey(fcZero, fcNormal):
1537 case PackCategoriesIntoKey(fcNormal, fcZero):
1538 case PackCategoriesIntoKey(fcZero, fcZero):
1542 case PackCategoriesIntoKey(fcZero, fcInfinity):
1543 case PackCategoriesIntoKey(fcInfinity, fcZero):
1547 case PackCategoriesIntoKey(fcNormal, fcNormal):
1552 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1553 switch (PackCategoriesIntoKey(category, rhs.category)) {
1555 llvm_unreachable(nullptr);
1557 case PackCategoriesIntoKey(fcZero, fcNaN):
1558 case PackCategoriesIntoKey(fcNormal, fcNaN):
1559 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1561 copySignificand(rhs);
1562 case PackCategoriesIntoKey(fcNaN, fcZero):
1563 case PackCategoriesIntoKey(fcNaN, fcNormal):
1564 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1565 case PackCategoriesIntoKey(fcNaN, fcNaN):
1567 case PackCategoriesIntoKey(fcInfinity, fcZero):
1568 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1569 case PackCategoriesIntoKey(fcZero, fcInfinity):
1570 case PackCategoriesIntoKey(fcZero, fcNormal):
1573 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1577 case PackCategoriesIntoKey(fcNormal, fcZero):
1578 category = fcInfinity;
1581 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1582 case PackCategoriesIntoKey(fcZero, fcZero):
1586 case PackCategoriesIntoKey(fcNormal, fcNormal):
1591 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1592 switch (PackCategoriesIntoKey(category, rhs.category)) {
1594 llvm_unreachable(nullptr);
1596 case PackCategoriesIntoKey(fcNaN, fcZero):
1597 case PackCategoriesIntoKey(fcNaN, fcNormal):
1598 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1599 case PackCategoriesIntoKey(fcNaN, fcNaN):
1600 case PackCategoriesIntoKey(fcZero, fcInfinity):
1601 case PackCategoriesIntoKey(fcZero, fcNormal):
1602 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1605 case PackCategoriesIntoKey(fcZero, fcNaN):
1606 case PackCategoriesIntoKey(fcNormal, fcNaN):
1607 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1610 copySignificand(rhs);
1613 case PackCategoriesIntoKey(fcNormal, fcZero):
1614 case PackCategoriesIntoKey(fcInfinity, fcZero):
1615 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1616 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1617 case PackCategoriesIntoKey(fcZero, fcZero):
1621 case PackCategoriesIntoKey(fcNormal, fcNormal):
1627 void IEEEFloat::changeSign() {
1628 /* Look mummy, this one's easy. */
1632 /* Normalized addition or subtraction. */
1633 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
1634 roundingMode rounding_mode,
1638 fs = addOrSubtractSpecials(rhs, subtract);
1640 /* This return code means it was not a simple case. */
1641 if (fs == opDivByZero) {
1642 lostFraction lost_fraction;
1644 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1645 fs = normalize(rounding_mode, lost_fraction);
1647 /* Can only be zero if we lost no fraction. */
1648 assert(category != fcZero || lost_fraction == lfExactlyZero);
1651 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1652 positive zero unless rounding to minus infinity, except that
1653 adding two like-signed zeroes gives that zero. */
1654 if (category == fcZero) {
1655 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1656 sign = (rounding_mode == rmTowardNegative);
1662 /* Normalized addition. */
1663 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
1664 roundingMode rounding_mode) {
1665 return addOrSubtract(rhs, rounding_mode, false);
1668 /* Normalized subtraction. */
1669 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
1670 roundingMode rounding_mode) {
1671 return addOrSubtract(rhs, rounding_mode, true);
1674 /* Normalized multiply. */
1675 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
1676 roundingMode rounding_mode) {
1680 fs = multiplySpecials(rhs);
1682 if (isFiniteNonZero()) {
1683 lostFraction lost_fraction = multiplySignificand(rhs, nullptr);
1684 fs = normalize(rounding_mode, lost_fraction);
1685 if (lost_fraction != lfExactlyZero)
1686 fs = (opStatus) (fs | opInexact);
1692 /* Normalized divide. */
1693 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
1694 roundingMode rounding_mode) {
1698 fs = divideSpecials(rhs);
1700 if (isFiniteNonZero()) {
1701 lostFraction lost_fraction = divideSignificand(rhs);
1702 fs = normalize(rounding_mode, lost_fraction);
1703 if (lost_fraction != lfExactlyZero)
1704 fs = (opStatus) (fs | opInexact);
1710 /* Normalized remainder. This is not currently correct in all cases. */
1711 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
1713 IEEEFloat V = *this;
1714 unsigned int origSign = sign;
1716 fs = V.divide(rhs, rmNearestTiesToEven);
1717 if (fs == opDivByZero)
1720 int parts = partCount();
1721 integerPart *x = new integerPart[parts];
1723 fs = V.convertToInteger(makeMutableArrayRef(x, parts),
1724 parts * integerPartWidth, true, rmNearestTiesToEven,
1726 if (fs == opInvalidOp) {
1731 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1732 rmNearestTiesToEven);
1733 assert(fs==opOK); // should always work
1735 fs = V.multiply(rhs, rmNearestTiesToEven);
1736 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1738 fs = subtract(V, rmNearestTiesToEven);
1739 assert(fs==opOK || fs==opInexact); // likewise
1742 sign = origSign; // IEEE754 requires this
1747 /* Normalized llvm frem (C fmod). */
1748 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
1750 fs = modSpecials(rhs);
1752 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
1753 compareAbsoluteValue(rhs) != cmpLessThan) {
1754 IEEEFloat V = scalbn(rhs, ilogb(*this) - ilogb(rhs), rmNearestTiesToEven);
1755 if (compareAbsoluteValue(V) == cmpLessThan)
1756 V = scalbn(V, -1, rmNearestTiesToEven);
1759 fs = subtract(V, rmNearestTiesToEven);
1765 /* Normalized fused-multiply-add. */
1766 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
1767 const IEEEFloat &addend,
1768 roundingMode rounding_mode) {
1771 /* Post-multiplication sign, before addition. */
1772 sign ^= multiplicand.sign;
1774 /* If and only if all arguments are normal do we need to do an
1775 extended-precision calculation. */
1776 if (isFiniteNonZero() &&
1777 multiplicand.isFiniteNonZero() &&
1778 addend.isFinite()) {
1779 lostFraction lost_fraction;
1781 lost_fraction = multiplySignificand(multiplicand, &addend);
1782 fs = normalize(rounding_mode, lost_fraction);
1783 if (lost_fraction != lfExactlyZero)
1784 fs = (opStatus) (fs | opInexact);
1786 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1787 positive zero unless rounding to minus infinity, except that
1788 adding two like-signed zeroes gives that zero. */
1789 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
1790 sign = (rounding_mode == rmTowardNegative);
1792 fs = multiplySpecials(multiplicand);
1794 /* FS can only be opOK or opInvalidOp. There is no more work
1795 to do in the latter case. The IEEE-754R standard says it is
1796 implementation-defined in this case whether, if ADDEND is a
1797 quiet NaN, we raise invalid op; this implementation does so.
1799 If we need to do the addition we can do so with normal
1802 fs = addOrSubtract(addend, rounding_mode, false);
1808 /* Rounding-mode corrrect round to integral value. */
1809 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
1812 // If the exponent is large enough, we know that this value is already
1813 // integral, and the arithmetic below would potentially cause it to saturate
1814 // to +/-Inf. Bail out early instead.
1815 if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics))
1818 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1819 // precision of our format, and then subtract it back off again. The choice
1820 // of rounding modes for the addition/subtraction determines the rounding mode
1821 // for our integral rounding as well.
1822 // NOTE: When the input value is negative, we do subtraction followed by
1823 // addition instead.
1824 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1825 IntegerConstant <<= semanticsPrecision(*semantics)-1;
1826 IEEEFloat MagicConstant(*semantics);
1827 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1828 rmNearestTiesToEven);
1829 MagicConstant.sign = sign;
1834 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1835 bool inputSign = isNegative();
1837 fs = add(MagicConstant, rounding_mode);
1838 if (fs != opOK && fs != opInexact)
1841 fs = subtract(MagicConstant, rounding_mode);
1843 // Restore the input sign.
1844 if (inputSign != isNegative())
1851 /* Comparison requires normalized numbers. */
1852 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
1855 assert(semantics == rhs.semantics);
1857 switch (PackCategoriesIntoKey(category, rhs.category)) {
1859 llvm_unreachable(nullptr);
1861 case PackCategoriesIntoKey(fcNaN, fcZero):
1862 case PackCategoriesIntoKey(fcNaN, fcNormal):
1863 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1864 case PackCategoriesIntoKey(fcNaN, fcNaN):
1865 case PackCategoriesIntoKey(fcZero, fcNaN):
1866 case PackCategoriesIntoKey(fcNormal, fcNaN):
1867 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1868 return cmpUnordered;
1870 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1871 case PackCategoriesIntoKey(fcInfinity, fcZero):
1872 case PackCategoriesIntoKey(fcNormal, fcZero):
1876 return cmpGreaterThan;
1878 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1879 case PackCategoriesIntoKey(fcZero, fcInfinity):
1880 case PackCategoriesIntoKey(fcZero, fcNormal):
1882 return cmpGreaterThan;
1886 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1887 if (sign == rhs.sign)
1892 return cmpGreaterThan;
1894 case PackCategoriesIntoKey(fcZero, fcZero):
1897 case PackCategoriesIntoKey(fcNormal, fcNormal):
1901 /* Two normal numbers. Do they have the same sign? */
1902 if (sign != rhs.sign) {
1904 result = cmpLessThan;
1906 result = cmpGreaterThan;
1908 /* Compare absolute values; invert result if negative. */
1909 result = compareAbsoluteValue(rhs);
1912 if (result == cmpLessThan)
1913 result = cmpGreaterThan;
1914 else if (result == cmpGreaterThan)
1915 result = cmpLessThan;
1922 /// IEEEFloat::convert - convert a value of one floating point type to another.
1923 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1924 /// records whether the transformation lost information, i.e. whether
1925 /// converting the result back to the original type will produce the
1926 /// original value (this is almost the same as return value==fsOK, but there
1927 /// are edge cases where this is not so).
1929 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
1930 roundingMode rounding_mode,
1932 lostFraction lostFraction;
1933 unsigned int newPartCount, oldPartCount;
1936 const fltSemantics &fromSemantics = *semantics;
1938 lostFraction = lfExactlyZero;
1939 newPartCount = partCountForBits(toSemantics.precision + 1);
1940 oldPartCount = partCount();
1941 shift = toSemantics.precision - fromSemantics.precision;
1943 bool X86SpecialNan = false;
1944 if (&fromSemantics == &semX87DoubleExtended &&
1945 &toSemantics != &semX87DoubleExtended && category == fcNaN &&
1946 (!(*significandParts() & 0x8000000000000000ULL) ||
1947 !(*significandParts() & 0x4000000000000000ULL))) {
1948 // x86 has some unusual NaNs which cannot be represented in any other
1949 // format; note them here.
1950 X86SpecialNan = true;
1953 // If this is a truncation of a denormal number, and the target semantics
1954 // has larger exponent range than the source semantics (this can happen
1955 // when truncating from PowerPC double-double to double format), the
1956 // right shift could lose result mantissa bits. Adjust exponent instead
1957 // of performing excessive shift.
1958 if (shift < 0 && isFiniteNonZero()) {
1959 int exponentChange = significandMSB() + 1 - fromSemantics.precision;
1960 if (exponent + exponentChange < toSemantics.minExponent)
1961 exponentChange = toSemantics.minExponent - exponent;
1962 if (exponentChange < shift)
1963 exponentChange = shift;
1964 if (exponentChange < 0) {
1965 shift -= exponentChange;
1966 exponent += exponentChange;
1970 // If this is a truncation, perform the shift before we narrow the storage.
1971 if (shift < 0 && (isFiniteNonZero() || category==fcNaN))
1972 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
1974 // Fix the storage so it can hold to new value.
1975 if (newPartCount > oldPartCount) {
1976 // The new type requires more storage; make it available.
1977 integerPart *newParts;
1978 newParts = new integerPart[newPartCount];
1979 APInt::tcSet(newParts, 0, newPartCount);
1980 if (isFiniteNonZero() || category==fcNaN)
1981 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1983 significand.parts = newParts;
1984 } else if (newPartCount == 1 && oldPartCount != 1) {
1985 // Switch to built-in storage for a single part.
1986 integerPart newPart = 0;
1987 if (isFiniteNonZero() || category==fcNaN)
1988 newPart = significandParts()[0];
1990 significand.part = newPart;
1993 // Now that we have the right storage, switch the semantics.
1994 semantics = &toSemantics;
1996 // If this is an extension, perform the shift now that the storage is
1998 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
1999 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2001 if (isFiniteNonZero()) {
2002 fs = normalize(rounding_mode, lostFraction);
2003 *losesInfo = (fs != opOK);
2004 } else if (category == fcNaN) {
2005 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2007 // For x87 extended precision, we want to make a NaN, not a special NaN if
2008 // the input wasn't special either.
2009 if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2010 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2012 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
2013 // does not give you back the same bits. This is dubious, and we
2014 // don't currently do it. You're really supposed to get
2015 // an invalid operation signal at runtime, but nobody does that.
2025 /* Convert a floating point number to an integer according to the
2026 rounding mode. If the rounded integer value is out of range this
2027 returns an invalid operation exception and the contents of the
2028 destination parts are unspecified. If the rounded value is in
2029 range but the floating point number is not the exact integer, the C
2030 standard doesn't require an inexact exception to be raised. IEEE
2031 854 does require it so we do that.
2033 Note that for conversions to integer type the C standard requires
2034 round-to-zero to always be used. */
2035 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2036 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2037 roundingMode rounding_mode, bool *isExact) const {
2038 lostFraction lost_fraction;
2039 const integerPart *src;
2040 unsigned int dstPartsCount, truncatedBits;
2044 /* Handle the three special cases first. */
2045 if (category == fcInfinity || category == fcNaN)
2048 dstPartsCount = partCountForBits(width);
2049 assert(dstPartsCount <= parts.size() && "Integer too big");
2051 if (category == fcZero) {
2052 APInt::tcSet(parts.data(), 0, dstPartsCount);
2053 // Negative zero can't be represented as an int.
2058 src = significandParts();
2060 /* Step 1: place our absolute value, with any fraction truncated, in
2063 /* Our absolute value is less than one; truncate everything. */
2064 APInt::tcSet(parts.data(), 0, dstPartsCount);
2065 /* For exponent -1 the integer bit represents .5, look at that.
2066 For smaller exponents leftmost truncated bit is 0. */
2067 truncatedBits = semantics->precision -1U - exponent;
2069 /* We want the most significant (exponent + 1) bits; the rest are
2071 unsigned int bits = exponent + 1U;
2073 /* Hopelessly large in magnitude? */
2077 if (bits < semantics->precision) {
2078 /* We truncate (semantics->precision - bits) bits. */
2079 truncatedBits = semantics->precision - bits;
2080 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2082 /* We want at least as many bits as are available. */
2083 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2085 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2086 bits - semantics->precision);
2091 /* Step 2: work out any lost fraction, and increment the absolute
2092 value if we would round away from zero. */
2093 if (truncatedBits) {
2094 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2096 if (lost_fraction != lfExactlyZero &&
2097 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2098 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2099 return opInvalidOp; /* Overflow. */
2102 lost_fraction = lfExactlyZero;
2105 /* Step 3: check if we fit in the destination. */
2106 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2110 /* Negative numbers cannot be represented as unsigned. */
2114 /* It takes omsb bits to represent the unsigned integer value.
2115 We lose a bit for the sign, but care is needed as the
2116 maximally negative integer is a special case. */
2117 if (omsb == width &&
2118 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2121 /* This case can happen because of rounding. */
2126 APInt::tcNegate (parts.data(), dstPartsCount);
2128 if (omsb >= width + !isSigned)
2132 if (lost_fraction == lfExactlyZero) {
2139 /* Same as convertToSignExtendedInteger, except we provide
2140 deterministic values in case of an invalid operation exception,
2141 namely zero for NaNs and the minimal or maximal value respectively
2142 for underflow or overflow.
2143 The *isExact output tells whether the result is exact, in the sense
2144 that converting it back to the original floating point type produces
2145 the original value. This is almost equivalent to result==opOK,
2146 except for negative zeroes.
2149 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2150 unsigned int width, bool isSigned,
2151 roundingMode rounding_mode, bool *isExact) const {
2154 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2157 if (fs == opInvalidOp) {
2158 unsigned int bits, dstPartsCount;
2160 dstPartsCount = partCountForBits(width);
2161 assert(dstPartsCount <= parts.size() && "Integer too big");
2163 if (category == fcNaN)
2168 bits = width - isSigned;
2170 APInt::tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2171 if (sign && isSigned)
2172 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2178 /* Convert an unsigned integer SRC to a floating point number,
2179 rounding according to ROUNDING_MODE. The sign of the floating
2180 point number is not modified. */
2181 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2182 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2183 unsigned int omsb, precision, dstCount;
2185 lostFraction lost_fraction;
2187 category = fcNormal;
2188 omsb = APInt::tcMSB(src, srcCount) + 1;
2189 dst = significandParts();
2190 dstCount = partCount();
2191 precision = semantics->precision;
2193 /* We want the most significant PRECISION bits of SRC. There may not
2194 be that many; extract what we can. */
2195 if (precision <= omsb) {
2196 exponent = omsb - 1;
2197 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2199 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2201 exponent = precision - 1;
2202 lost_fraction = lfExactlyZero;
2203 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2206 return normalize(rounding_mode, lost_fraction);
2209 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2210 roundingMode rounding_mode) {
2211 unsigned int partCount = Val.getNumWords();
2215 if (isSigned && api.isNegative()) {
2220 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2223 /* Convert a two's complement integer SRC to a floating point number,
2224 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2225 integer is signed, in which case it must be sign-extended. */
2227 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2228 unsigned int srcCount, bool isSigned,
2229 roundingMode rounding_mode) {
2233 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2236 /* If we're signed and negative negate a copy. */
2238 copy = new integerPart[srcCount];
2239 APInt::tcAssign(copy, src, srcCount);
2240 APInt::tcNegate(copy, srcCount);
2241 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2245 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2251 /* FIXME: should this just take a const APInt reference? */
2253 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2254 unsigned int width, bool isSigned,
2255 roundingMode rounding_mode) {
2256 unsigned int partCount = partCountForBits(width);
2257 APInt api = APInt(width, makeArrayRef(parts, partCount));
2260 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2265 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2269 IEEEFloat::convertFromHexadecimalString(StringRef s,
2270 roundingMode rounding_mode) {
2271 lostFraction lost_fraction = lfExactlyZero;
2273 category = fcNormal;
2277 integerPart *significand = significandParts();
2278 unsigned partsCount = partCount();
2279 unsigned bitPos = partsCount * integerPartWidth;
2280 bool computedTrailingFraction = false;
2282 // Skip leading zeroes and any (hexa)decimal point.
2283 StringRef::iterator begin = s.begin();
2284 StringRef::iterator end = s.end();
2285 StringRef::iterator dot;
2286 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2287 StringRef::iterator firstSignificantDigit = p;
2290 integerPart hex_value;
2293 assert(dot == end && "String contains multiple dots");
2298 hex_value = hexDigitValue(*p);
2299 if (hex_value == -1U)
2304 // Store the number while we have space.
2307 hex_value <<= bitPos % integerPartWidth;
2308 significand[bitPos / integerPartWidth] |= hex_value;
2309 } else if (!computedTrailingFraction) {
2310 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2311 computedTrailingFraction = true;
2315 /* Hex floats require an exponent but not a hexadecimal point. */
2316 assert(p != end && "Hex strings require an exponent");
2317 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2318 assert(p != begin && "Significand has no digits");
2319 assert((dot == end || p - begin != 1) && "Significand has no digits");
2321 /* Ignore the exponent if we are zero. */
2322 if (p != firstSignificantDigit) {
2325 /* Implicit hexadecimal point? */
2329 /* Calculate the exponent adjustment implicit in the number of
2330 significant digits. */
2331 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2332 if (expAdjustment < 0)
2334 expAdjustment = expAdjustment * 4 - 1;
2336 /* Adjust for writing the significand starting at the most
2337 significant nibble. */
2338 expAdjustment += semantics->precision;
2339 expAdjustment -= partsCount * integerPartWidth;
2341 /* Adjust for the given exponent. */
2342 exponent = totalExponent(p + 1, end, expAdjustment);
2345 return normalize(rounding_mode, lost_fraction);
2349 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2350 unsigned sigPartCount, int exp,
2351 roundingMode rounding_mode) {
2352 unsigned int parts, pow5PartCount;
2353 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2354 integerPart pow5Parts[maxPowerOfFiveParts];
2357 isNearest = (rounding_mode == rmNearestTiesToEven ||
2358 rounding_mode == rmNearestTiesToAway);
2360 parts = partCountForBits(semantics->precision + 11);
2362 /* Calculate pow(5, abs(exp)). */
2363 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2365 for (;; parts *= 2) {
2366 opStatus sigStatus, powStatus;
2367 unsigned int excessPrecision, truncatedBits;
2369 calcSemantics.precision = parts * integerPartWidth - 1;
2370 excessPrecision = calcSemantics.precision - semantics->precision;
2371 truncatedBits = excessPrecision;
2373 IEEEFloat decSig(calcSemantics, uninitialized);
2374 decSig.makeZero(sign);
2375 IEEEFloat pow5(calcSemantics);
2377 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2378 rmNearestTiesToEven);
2379 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2380 rmNearestTiesToEven);
2381 /* Add exp, as 10^n = 5^n * 2^n. */
2382 decSig.exponent += exp;
2384 lostFraction calcLostFraction;
2385 integerPart HUerr, HUdistance;
2386 unsigned int powHUerr;
2389 /* multiplySignificand leaves the precision-th bit set to 1. */
2390 calcLostFraction = decSig.multiplySignificand(pow5, nullptr);
2391 powHUerr = powStatus != opOK;
2393 calcLostFraction = decSig.divideSignificand(pow5);
2394 /* Denormal numbers have less precision. */
2395 if (decSig.exponent < semantics->minExponent) {
2396 excessPrecision += (semantics->minExponent - decSig.exponent);
2397 truncatedBits = excessPrecision;
2398 if (excessPrecision > calcSemantics.precision)
2399 excessPrecision = calcSemantics.precision;
2401 /* Extra half-ulp lost in reciprocal of exponent. */
2402 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2405 /* Both multiplySignificand and divideSignificand return the
2406 result with the integer bit set. */
2407 assert(APInt::tcExtractBit
2408 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2410 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2412 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2413 excessPrecision, isNearest);
2415 /* Are we guaranteed to round correctly if we truncate? */
2416 if (HUdistance >= HUerr) {
2417 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2418 calcSemantics.precision - excessPrecision,
2420 /* Take the exponent of decSig. If we tcExtract-ed less bits
2421 above we must adjust our exponent to compensate for the
2422 implicit right shift. */
2423 exponent = (decSig.exponent + semantics->precision
2424 - (calcSemantics.precision - excessPrecision));
2425 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2428 return normalize(rounding_mode, calcLostFraction);
2434 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2438 /* Scan the text. */
2439 StringRef::iterator p = str.begin();
2440 interpretDecimal(p, str.end(), &D);
2442 /* Handle the quick cases. First the case of no significant digits,
2443 i.e. zero, and then exponents that are obviously too large or too
2444 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2445 definitely overflows if
2447 (exp - 1) * L >= maxExponent
2449 and definitely underflows to zero where
2451 (exp + 1) * L <= minExponent - precision
2453 With integer arithmetic the tightest bounds for L are
2455 93/28 < L < 196/59 [ numerator <= 256 ]
2456 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2459 // Test if we have a zero number allowing for strings with no null terminators
2460 // and zero decimals with non-zero exponents.
2462 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2463 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2464 // be at most one dot. On the other hand, if we have a zero with a non-zero
2465 // exponent, then we know that D.firstSigDigit will be non-numeric.
2466 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2470 /* Check whether the normalized exponent is high enough to overflow
2471 max during the log-rebasing in the max-exponent check below. */
2472 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2473 fs = handleOverflow(rounding_mode);
2475 /* If it wasn't, then it also wasn't high enough to overflow max
2476 during the log-rebasing in the min-exponent check. Check that it
2477 won't overflow min in either check, then perform the min-exponent
2479 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2480 (D.normalizedExponent + 1) * 28738 <=
2481 8651 * (semantics->minExponent - (int) semantics->precision)) {
2482 /* Underflow to zero and round. */
2483 category = fcNormal;
2485 fs = normalize(rounding_mode, lfLessThanHalf);
2487 /* We can finally safely perform the max-exponent check. */
2488 } else if ((D.normalizedExponent - 1) * 42039
2489 >= 12655 * semantics->maxExponent) {
2490 /* Overflow and round. */
2491 fs = handleOverflow(rounding_mode);
2493 integerPart *decSignificand;
2494 unsigned int partCount;
2496 /* A tight upper bound on number of bits required to hold an
2497 N-digit decimal integer is N * 196 / 59. Allocate enough space
2498 to hold the full significand, and an extra part required by
2500 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2501 partCount = partCountForBits(1 + 196 * partCount / 59);
2502 decSignificand = new integerPart[partCount + 1];
2505 /* Convert to binary efficiently - we do almost all multiplication
2506 in an integerPart. When this would overflow do we do a single
2507 bignum multiplication, and then revert again to multiplication
2508 in an integerPart. */
2510 integerPart decValue, val, multiplier;
2518 if (p == str.end()) {
2522 decValue = decDigitValue(*p++);
2523 assert(decValue < 10U && "Invalid character in significand");
2525 val = val * 10 + decValue;
2526 /* The maximum number that can be multiplied by ten with any
2527 digit added without overflowing an integerPart. */
2528 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2530 /* Multiply out the current part. */
2531 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2532 partCount, partCount + 1, false);
2534 /* If we used another part (likely but not guaranteed), increase
2536 if (decSignificand[partCount])
2538 } while (p <= D.lastSigDigit);
2540 category = fcNormal;
2541 fs = roundSignificandWithExponent(decSignificand, partCount,
2542 D.exponent, rounding_mode);
2544 delete [] decSignificand;
2550 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
2551 if (str.equals("inf") || str.equals("INFINITY")) {
2556 if (str.equals("-inf") || str.equals("-INFINITY")) {
2561 if (str.equals("nan") || str.equals("NaN")) {
2562 makeNaN(false, false);
2566 if (str.equals("-nan") || str.equals("-NaN")) {
2567 makeNaN(false, true);
2574 IEEEFloat::opStatus IEEEFloat::convertFromString(StringRef str,
2575 roundingMode rounding_mode) {
2576 assert(!str.empty() && "Invalid string length");
2578 // Handle special cases.
2579 if (convertFromStringSpecials(str))
2582 /* Handle a leading minus sign. */
2583 StringRef::iterator p = str.begin();
2584 size_t slen = str.size();
2585 sign = *p == '-' ? 1 : 0;
2586 if (*p == '-' || *p == '+') {
2589 assert(slen && "String has no digits");
2592 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2593 assert(slen - 2 && "Invalid string");
2594 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2598 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2601 /* Write out a hexadecimal representation of the floating point value
2602 to DST, which must be of sufficient size, in the C99 form
2603 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2604 excluding the terminating NUL.
2606 If UPPERCASE, the output is in upper case, otherwise in lower case.
2608 HEXDIGITS digits appear altogether, rounding the value if
2609 necessary. If HEXDIGITS is 0, the minimal precision to display the
2610 number precisely is used instead. If nothing would appear after
2611 the decimal point it is suppressed.
2613 The decimal exponent is always printed and has at least one digit.
2614 Zero values display an exponent of zero. Infinities and NaNs
2615 appear as "infinity" or "nan" respectively.
2617 The above rules are as specified by C99. There is ambiguity about
2618 what the leading hexadecimal digit should be. This implementation
2619 uses whatever is necessary so that the exponent is displayed as
2620 stored. This implies the exponent will fall within the IEEE format
2621 range, and the leading hexadecimal digit will be 0 (for denormals),
2622 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2623 any other digits zero).
2625 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
2627 roundingMode rounding_mode) const {
2636 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2637 dst += sizeof infinityL - 1;
2641 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2642 dst += sizeof NaNU - 1;
2647 *dst++ = upperCase ? 'X': 'x';
2649 if (hexDigits > 1) {
2651 memset (dst, '0', hexDigits - 1);
2652 dst += hexDigits - 1;
2654 *dst++ = upperCase ? 'P': 'p';
2659 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2665 return static_cast<unsigned int>(dst - p);
2668 /* Does the hard work of outputting the correctly rounded hexadecimal
2669 form of a normal floating point number with the specified number of
2670 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2671 digits necessary to print the value precisely is output. */
2672 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2674 roundingMode rounding_mode) const {
2675 unsigned int count, valueBits, shift, partsCount, outputDigits;
2676 const char *hexDigitChars;
2677 const integerPart *significand;
2682 *dst++ = upperCase ? 'X': 'x';
2685 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2687 significand = significandParts();
2688 partsCount = partCount();
2690 /* +3 because the first digit only uses the single integer bit, so
2691 we have 3 virtual zero most-significant-bits. */
2692 valueBits = semantics->precision + 3;
2693 shift = integerPartWidth - valueBits % integerPartWidth;
2695 /* The natural number of digits required ignoring trailing
2696 insignificant zeroes. */
2697 outputDigits = (valueBits - significandLSB () + 3) / 4;
2699 /* hexDigits of zero means use the required number for the
2700 precision. Otherwise, see if we are truncating. If we are,
2701 find out if we need to round away from zero. */
2703 if (hexDigits < outputDigits) {
2704 /* We are dropping non-zero bits, so need to check how to round.
2705 "bits" is the number of dropped bits. */
2707 lostFraction fraction;
2709 bits = valueBits - hexDigits * 4;
2710 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2711 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2713 outputDigits = hexDigits;
2716 /* Write the digits consecutively, and start writing in the location
2717 of the hexadecimal point. We move the most significant digit
2718 left and add the hexadecimal point later. */
2721 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2723 while (outputDigits && count) {
2726 /* Put the most significant integerPartWidth bits in "part". */
2727 if (--count == partsCount)
2728 part = 0; /* An imaginary higher zero part. */
2730 part = significand[count] << shift;
2733 part |= significand[count - 1] >> (integerPartWidth - shift);
2735 /* Convert as much of "part" to hexdigits as we can. */
2736 unsigned int curDigits = integerPartWidth / 4;
2738 if (curDigits > outputDigits)
2739 curDigits = outputDigits;
2740 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2741 outputDigits -= curDigits;
2747 /* Note that hexDigitChars has a trailing '0'. */
2750 *q = hexDigitChars[hexDigitValue (*q) + 1];
2751 } while (*q == '0');
2754 /* Add trailing zeroes. */
2755 memset (dst, '0', outputDigits);
2756 dst += outputDigits;
2759 /* Move the most significant digit to before the point, and if there
2760 is something after the decimal point add it. This must come
2761 after rounding above. */
2768 /* Finally output the exponent. */
2769 *dst++ = upperCase ? 'P': 'p';
2771 return writeSignedDecimal (dst, exponent);
2774 hash_code hash_value(const IEEEFloat &Arg) {
2775 if (!Arg.isFiniteNonZero())
2776 return hash_combine((uint8_t)Arg.category,
2777 // NaN has no sign, fix it at zero.
2778 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2779 Arg.semantics->precision);
2781 // Normal floats need their exponent and significand hashed.
2782 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2783 Arg.semantics->precision, Arg.exponent,
2785 Arg.significandParts(),
2786 Arg.significandParts() + Arg.partCount()));
2789 // Conversion from APFloat to/from host float/double. It may eventually be
2790 // possible to eliminate these and have everybody deal with APFloats, but that
2791 // will take a while. This approach will not easily extend to long double.
2792 // Current implementation requires integerPartWidth==64, which is correct at
2793 // the moment but could be made more general.
2795 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2796 // the actual IEEE respresentations. We compensate for that here.
2798 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
2799 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
2800 assert(partCount()==2);
2802 uint64_t myexponent, mysignificand;
2804 if (isFiniteNonZero()) {
2805 myexponent = exponent+16383; //bias
2806 mysignificand = significandParts()[0];
2807 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2808 myexponent = 0; // denormal
2809 } else if (category==fcZero) {
2812 } else if (category==fcInfinity) {
2813 myexponent = 0x7fff;
2814 mysignificand = 0x8000000000000000ULL;
2816 assert(category == fcNaN && "Unknown category");
2817 myexponent = 0x7fff;
2818 mysignificand = significandParts()[0];
2822 words[0] = mysignificand;
2823 words[1] = ((uint64_t)(sign & 1) << 15) |
2824 (myexponent & 0x7fffLL);
2825 return APInt(80, words);
2828 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
2829 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
2830 assert(partCount()==2);
2836 // Convert number to double. To avoid spurious underflows, we re-
2837 // normalize against the "double" minExponent first, and only *then*
2838 // truncate the mantissa. The result of that second conversion
2839 // may be inexact, but should never underflow.
2840 // Declare fltSemantics before APFloat that uses it (and
2841 // saves pointer to it) to ensure correct destruction order.
2842 fltSemantics extendedSemantics = *semantics;
2843 extendedSemantics.minExponent = semIEEEdouble.minExponent;
2844 IEEEFloat extended(*this);
2845 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2846 assert(fs == opOK && !losesInfo);
2849 IEEEFloat u(extended);
2850 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
2851 assert(fs == opOK || fs == opInexact);
2853 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2855 // If conversion was exact or resulted in a special case, we're done;
2856 // just set the second double to zero. Otherwise, re-convert back to
2857 // the extended format and compute the difference. This now should
2858 // convert exactly to double.
2859 if (u.isFiniteNonZero() && losesInfo) {
2860 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2861 assert(fs == opOK && !losesInfo);
2864 IEEEFloat v(extended);
2865 v.subtract(u, rmNearestTiesToEven);
2866 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
2867 assert(fs == opOK && !losesInfo);
2869 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2874 return APInt(128, words);
2877 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
2878 assert(semantics == (const llvm::fltSemantics*)&semIEEEquad);
2879 assert(partCount()==2);
2881 uint64_t myexponent, mysignificand, mysignificand2;
2883 if (isFiniteNonZero()) {
2884 myexponent = exponent+16383; //bias
2885 mysignificand = significandParts()[0];
2886 mysignificand2 = significandParts()[1];
2887 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2888 myexponent = 0; // denormal
2889 } else if (category==fcZero) {
2891 mysignificand = mysignificand2 = 0;
2892 } else if (category==fcInfinity) {
2893 myexponent = 0x7fff;
2894 mysignificand = mysignificand2 = 0;
2896 assert(category == fcNaN && "Unknown category!");
2897 myexponent = 0x7fff;
2898 mysignificand = significandParts()[0];
2899 mysignificand2 = significandParts()[1];
2903 words[0] = mysignificand;
2904 words[1] = ((uint64_t)(sign & 1) << 63) |
2905 ((myexponent & 0x7fff) << 48) |
2906 (mysignificand2 & 0xffffffffffffLL);
2908 return APInt(128, words);
2911 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
2912 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble);
2913 assert(partCount()==1);
2915 uint64_t myexponent, mysignificand;
2917 if (isFiniteNonZero()) {
2918 myexponent = exponent+1023; //bias
2919 mysignificand = *significandParts();
2920 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2921 myexponent = 0; // denormal
2922 } else if (category==fcZero) {
2925 } else if (category==fcInfinity) {
2929 assert(category == fcNaN && "Unknown category!");
2931 mysignificand = *significandParts();
2934 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2935 ((myexponent & 0x7ff) << 52) |
2936 (mysignificand & 0xfffffffffffffLL))));
2939 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
2940 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle);
2941 assert(partCount()==1);
2943 uint32_t myexponent, mysignificand;
2945 if (isFiniteNonZero()) {
2946 myexponent = exponent+127; //bias
2947 mysignificand = (uint32_t)*significandParts();
2948 if (myexponent == 1 && !(mysignificand & 0x800000))
2949 myexponent = 0; // denormal
2950 } else if (category==fcZero) {
2953 } else if (category==fcInfinity) {
2957 assert(category == fcNaN && "Unknown category!");
2959 mysignificand = (uint32_t)*significandParts();
2962 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2963 (mysignificand & 0x7fffff)));
2966 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
2967 assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf);
2968 assert(partCount()==1);
2970 uint32_t myexponent, mysignificand;
2972 if (isFiniteNonZero()) {
2973 myexponent = exponent+15; //bias
2974 mysignificand = (uint32_t)*significandParts();
2975 if (myexponent == 1 && !(mysignificand & 0x400))
2976 myexponent = 0; // denormal
2977 } else if (category==fcZero) {
2980 } else if (category==fcInfinity) {
2984 assert(category == fcNaN && "Unknown category!");
2986 mysignificand = (uint32_t)*significandParts();
2989 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
2990 (mysignificand & 0x3ff)));
2993 // This function creates an APInt that is just a bit map of the floating
2994 // point constant as it would appear in memory. It is not a conversion,
2995 // and treating the result as a normal integer is unlikely to be useful.
2997 APInt IEEEFloat::bitcastToAPInt() const {
2998 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
2999 return convertHalfAPFloatToAPInt();
3001 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3002 return convertFloatAPFloatToAPInt();
3004 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3005 return convertDoubleAPFloatToAPInt();
3007 if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3008 return convertQuadrupleAPFloatToAPInt();
3010 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3011 return convertPPCDoubleDoubleAPFloatToAPInt();
3013 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3015 return convertF80LongDoubleAPFloatToAPInt();
3018 float IEEEFloat::convertToFloat() const {
3019 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3020 "Float semantics are not IEEEsingle");
3021 APInt api = bitcastToAPInt();
3022 return api.bitsToFloat();
3025 double IEEEFloat::convertToDouble() const {
3026 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3027 "Float semantics are not IEEEdouble");
3028 APInt api = bitcastToAPInt();
3029 return api.bitsToDouble();
3032 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3033 /// does not support these bit patterns:
3034 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3035 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3036 /// exponent = 0, integer bit 1 ("pseudodenormal")
3037 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3038 /// At the moment, the first two are treated as NaNs, the second two as Normal.
3039 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3040 assert(api.getBitWidth()==80);
3041 uint64_t i1 = api.getRawData()[0];
3042 uint64_t i2 = api.getRawData()[1];
3043 uint64_t myexponent = (i2 & 0x7fff);
3044 uint64_t mysignificand = i1;
3046 initialize(&semX87DoubleExtended);
3047 assert(partCount()==2);
3049 sign = static_cast<unsigned int>(i2>>15);
3050 if (myexponent==0 && mysignificand==0) {
3051 // exponent, significand meaningless
3053 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3054 // exponent, significand meaningless
3055 category = fcInfinity;
3056 } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
3057 // exponent meaningless
3059 significandParts()[0] = mysignificand;
3060 significandParts()[1] = 0;
3062 category = fcNormal;
3063 exponent = myexponent - 16383;
3064 significandParts()[0] = mysignificand;
3065 significandParts()[1] = 0;
3066 if (myexponent==0) // denormal
3071 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3072 assert(api.getBitWidth()==128);
3073 uint64_t i1 = api.getRawData()[0];
3074 uint64_t i2 = api.getRawData()[1];
3078 // Get the first double and convert to our format.
3079 initFromDoubleAPInt(APInt(64, i1));
3080 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3081 assert(fs == opOK && !losesInfo);
3084 // Unless we have a special case, add in second double.
3085 if (isFiniteNonZero()) {
3086 IEEEFloat v(semIEEEdouble, APInt(64, i2));
3087 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3088 assert(fs == opOK && !losesInfo);
3091 add(v, rmNearestTiesToEven);
3095 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3096 assert(api.getBitWidth()==128);
3097 uint64_t i1 = api.getRawData()[0];
3098 uint64_t i2 = api.getRawData()[1];
3099 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3100 uint64_t mysignificand = i1;
3101 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3103 initialize(&semIEEEquad);
3104 assert(partCount()==2);
3106 sign = static_cast<unsigned int>(i2>>63);
3107 if (myexponent==0 &&
3108 (mysignificand==0 && mysignificand2==0)) {
3109 // exponent, significand meaningless
3111 } else if (myexponent==0x7fff &&
3112 (mysignificand==0 && mysignificand2==0)) {
3113 // exponent, significand meaningless
3114 category = fcInfinity;
3115 } else if (myexponent==0x7fff &&
3116 (mysignificand!=0 || mysignificand2 !=0)) {
3117 // exponent meaningless
3119 significandParts()[0] = mysignificand;
3120 significandParts()[1] = mysignificand2;
3122 category = fcNormal;
3123 exponent = myexponent - 16383;
3124 significandParts()[0] = mysignificand;
3125 significandParts()[1] = mysignificand2;
3126 if (myexponent==0) // denormal
3129 significandParts()[1] |= 0x1000000000000LL; // integer bit
3133 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3134 assert(api.getBitWidth()==64);
3135 uint64_t i = *api.getRawData();
3136 uint64_t myexponent = (i >> 52) & 0x7ff;
3137 uint64_t mysignificand = i & 0xfffffffffffffLL;
3139 initialize(&semIEEEdouble);
3140 assert(partCount()==1);
3142 sign = static_cast<unsigned int>(i>>63);
3143 if (myexponent==0 && mysignificand==0) {
3144 // exponent, significand meaningless
3146 } else if (myexponent==0x7ff && mysignificand==0) {
3147 // exponent, significand meaningless
3148 category = fcInfinity;
3149 } else if (myexponent==0x7ff && mysignificand!=0) {
3150 // exponent meaningless
3152 *significandParts() = mysignificand;
3154 category = fcNormal;
3155 exponent = myexponent - 1023;
3156 *significandParts() = mysignificand;
3157 if (myexponent==0) // denormal
3160 *significandParts() |= 0x10000000000000LL; // integer bit
3164 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3165 assert(api.getBitWidth()==32);
3166 uint32_t i = (uint32_t)*api.getRawData();
3167 uint32_t myexponent = (i >> 23) & 0xff;
3168 uint32_t mysignificand = i & 0x7fffff;
3170 initialize(&semIEEEsingle);
3171 assert(partCount()==1);
3174 if (myexponent==0 && mysignificand==0) {
3175 // exponent, significand meaningless
3177 } else if (myexponent==0xff && mysignificand==0) {
3178 // exponent, significand meaningless
3179 category = fcInfinity;
3180 } else if (myexponent==0xff && mysignificand!=0) {
3181 // sign, exponent, significand meaningless
3183 *significandParts() = mysignificand;
3185 category = fcNormal;
3186 exponent = myexponent - 127; //bias
3187 *significandParts() = mysignificand;
3188 if (myexponent==0) // denormal
3191 *significandParts() |= 0x800000; // integer bit
3195 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3196 assert(api.getBitWidth()==16);
3197 uint32_t i = (uint32_t)*api.getRawData();
3198 uint32_t myexponent = (i >> 10) & 0x1f;
3199 uint32_t mysignificand = i & 0x3ff;
3201 initialize(&semIEEEhalf);
3202 assert(partCount()==1);
3205 if (myexponent==0 && mysignificand==0) {
3206 // exponent, significand meaningless
3208 } else if (myexponent==0x1f && mysignificand==0) {
3209 // exponent, significand meaningless
3210 category = fcInfinity;
3211 } else if (myexponent==0x1f && mysignificand!=0) {
3212 // sign, exponent, significand meaningless
3214 *significandParts() = mysignificand;
3216 category = fcNormal;
3217 exponent = myexponent - 15; //bias
3218 *significandParts() = mysignificand;
3219 if (myexponent==0) // denormal
3222 *significandParts() |= 0x400; // integer bit
3226 /// Treat api as containing the bits of a floating point number. Currently
3227 /// we infer the floating point type from the size of the APInt. The
3228 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3229 /// when the size is anything else).
3230 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3231 if (Sem == &semIEEEhalf)
3232 return initFromHalfAPInt(api);
3233 if (Sem == &semIEEEsingle)
3234 return initFromFloatAPInt(api);
3235 if (Sem == &semIEEEdouble)
3236 return initFromDoubleAPInt(api);
3237 if (Sem == &semX87DoubleExtended)
3238 return initFromF80LongDoubleAPInt(api);
3239 if (Sem == &semIEEEquad)
3240 return initFromQuadrupleAPInt(api);
3241 if (Sem == &semPPCDoubleDoubleLegacy)
3242 return initFromPPCDoubleDoubleAPInt(api);
3244 llvm_unreachable(nullptr);
3247 /// Make this number the largest magnitude normal number in the given
3249 void IEEEFloat::makeLargest(bool Negative) {
3250 // We want (in interchange format):
3251 // sign = {Negative}
3253 // significand = 1..1
3254 category = fcNormal;
3256 exponent = semantics->maxExponent;
3258 // Use memset to set all but the highest integerPart to all ones.
3259 integerPart *significand = significandParts();
3260 unsigned PartCount = partCount();
3261 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3263 // Set the high integerPart especially setting all unused top bits for
3264 // internal consistency.
3265 const unsigned NumUnusedHighBits =
3266 PartCount*integerPartWidth - semantics->precision;
3267 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3268 ? (~integerPart(0) >> NumUnusedHighBits)
3272 /// Make this number the smallest magnitude denormal number in the given
3274 void IEEEFloat::makeSmallest(bool Negative) {
3275 // We want (in interchange format):
3276 // sign = {Negative}
3278 // significand = 0..01
3279 category = fcNormal;
3281 exponent = semantics->minExponent;
3282 APInt::tcSet(significandParts(), 1, partCount());
3285 void IEEEFloat::makeSmallestNormalized(bool Negative) {
3286 // We want (in interchange format):
3287 // sign = {Negative}
3289 // significand = 10..0
3291 category = fcNormal;
3294 exponent = semantics->minExponent;
3295 significandParts()[partCountForBits(semantics->precision) - 1] |=
3296 (((integerPart)1) << ((semantics->precision - 1) % integerPartWidth));
3299 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
3300 initFromAPInt(&Sem, API);
3303 IEEEFloat::IEEEFloat(float f) {
3304 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
3307 IEEEFloat::IEEEFloat(double d) {
3308 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
3312 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3313 Buffer.append(Str.begin(), Str.end());
3316 /// Removes data from the given significand until it is no more
3317 /// precise than is required for the desired precision.
3318 void AdjustToPrecision(APInt &significand,
3319 int &exp, unsigned FormatPrecision) {
3320 unsigned bits = significand.getActiveBits();
3322 // 196/59 is a very slight overestimate of lg_2(10).
3323 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3325 if (bits <= bitsRequired) return;
3327 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3328 if (!tensRemovable) return;
3330 exp += tensRemovable;
3332 APInt divisor(significand.getBitWidth(), 1);
3333 APInt powten(significand.getBitWidth(), 10);
3335 if (tensRemovable & 1)
3337 tensRemovable >>= 1;
3338 if (!tensRemovable) break;
3342 significand = significand.udiv(divisor);
3344 // Truncate the significand down to its active bit count.
3345 significand = significand.trunc(significand.getActiveBits());
3349 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3350 int &exp, unsigned FormatPrecision) {
3351 unsigned N = buffer.size();
3352 if (N <= FormatPrecision) return;
3354 // The most significant figures are the last ones in the buffer.
3355 unsigned FirstSignificant = N - FormatPrecision;
3358 // FIXME: this probably shouldn't use 'round half up'.
3360 // Rounding down is just a truncation, except we also want to drop
3361 // trailing zeros from the new result.
3362 if (buffer[FirstSignificant - 1] < '5') {
3363 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3366 exp += FirstSignificant;
3367 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3371 // Rounding up requires a decimal add-with-carry. If we continue
3372 // the carry, the newly-introduced zeros will just be truncated.
3373 for (unsigned I = FirstSignificant; I != N; ++I) {
3374 if (buffer[I] == '9') {
3382 // If we carried through, we have exactly one digit of precision.
3383 if (FirstSignificant == N) {
3384 exp += FirstSignificant;
3386 buffer.push_back('1');
3390 exp += FirstSignificant;
3391 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3395 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
3396 unsigned FormatMaxPadding) const {
3400 return append(Str, "-Inf");
3402 return append(Str, "+Inf");
3404 case fcNaN: return append(Str, "NaN");
3410 if (!FormatMaxPadding)
3411 append(Str, "0.0E+0");
3423 // Decompose the number into an APInt and an exponent.
3424 int exp = exponent - ((int) semantics->precision - 1);
3425 APInt significand(semantics->precision,
3426 makeArrayRef(significandParts(),
3427 partCountForBits(semantics->precision)));
3429 // Set FormatPrecision if zero. We want to do this before we
3430 // truncate trailing zeros, as those are part of the precision.
3431 if (!FormatPrecision) {
3432 // We use enough digits so the number can be round-tripped back to an
3433 // APFloat. The formula comes from "How to Print Floating-Point Numbers
3434 // Accurately" by Steele and White.
3435 // FIXME: Using a formula based purely on the precision is conservative;
3436 // we can print fewer digits depending on the actual value being printed.
3438 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
3439 FormatPrecision = 2 + semantics->precision * 59 / 196;
3442 // Ignore trailing binary zeros.
3443 int trailingZeros = significand.countTrailingZeros();
3444 exp += trailingZeros;
3445 significand = significand.lshr(trailingZeros);
3447 // Change the exponent from 2^e to 10^e.
3450 } else if (exp > 0) {
3452 significand = significand.zext(semantics->precision + exp);
3453 significand <<= exp;
3455 } else { /* exp < 0 */
3458 // We transform this using the identity:
3459 // (N)(2^-e) == (N)(5^e)(10^-e)
3460 // This means we have to multiply N (the significand) by 5^e.
3461 // To avoid overflow, we have to operate on numbers large
3462 // enough to store N * 5^e:
3463 // log2(N * 5^e) == log2(N) + e * log2(5)
3464 // <= semantics->precision + e * 137 / 59
3465 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3467 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3469 // Multiply significand by 5^e.
3470 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3471 significand = significand.zext(precision);
3472 APInt five_to_the_i(precision, 5);
3474 if (texp & 1) significand *= five_to_the_i;
3478 five_to_the_i *= five_to_the_i;
3482 AdjustToPrecision(significand, exp, FormatPrecision);
3484 SmallVector<char, 256> buffer;
3487 unsigned precision = significand.getBitWidth();
3488 APInt ten(precision, 10);
3489 APInt digit(precision, 0);
3491 bool inTrail = true;
3492 while (significand != 0) {
3493 // digit <- significand % 10
3494 // significand <- significand / 10
3495 APInt::udivrem(significand, ten, significand, digit);
3497 unsigned d = digit.getZExtValue();
3499 // Drop trailing zeros.
3500 if (inTrail && !d) exp++;
3502 buffer.push_back((char) ('0' + d));
3507 assert(!buffer.empty() && "no characters in buffer!");
3509 // Drop down to FormatPrecision.
3510 // TODO: don't do more precise calculations above than are required.
3511 AdjustToPrecision(buffer, exp, FormatPrecision);
3513 unsigned NDigits = buffer.size();
3515 // Check whether we should use scientific notation.
3516 bool FormatScientific;
3517 if (!FormatMaxPadding)
3518 FormatScientific = true;
3523 // But we shouldn't make the number look more precise than it is.
3524 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3525 NDigits + (unsigned) exp > FormatPrecision);
3527 // Power of the most significant digit.
3528 int MSD = exp + (int) (NDigits - 1);
3531 FormatScientific = false;
3533 // 765e-5 == 0.00765
3535 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3540 // Scientific formatting is pretty straightforward.
3541 if (FormatScientific) {
3542 exp += (NDigits - 1);
3544 Str.push_back(buffer[NDigits-1]);
3549 for (unsigned I = 1; I != NDigits; ++I)
3550 Str.push_back(buffer[NDigits-1-I]);
3553 Str.push_back(exp >= 0 ? '+' : '-');
3554 if (exp < 0) exp = -exp;
3555 SmallVector<char, 6> expbuf;
3557 expbuf.push_back((char) ('0' + (exp % 10)));
3560 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3561 Str.push_back(expbuf[E-1-I]);
3565 // Non-scientific, positive exponents.
3567 for (unsigned I = 0; I != NDigits; ++I)
3568 Str.push_back(buffer[NDigits-1-I]);
3569 for (unsigned I = 0; I != (unsigned) exp; ++I)
3574 // Non-scientific, negative exponents.
3576 // The number of digits to the left of the decimal point.
3577 int NWholeDigits = exp + (int) NDigits;
3580 if (NWholeDigits > 0) {
3581 for (; I != (unsigned) NWholeDigits; ++I)
3582 Str.push_back(buffer[NDigits-I-1]);
3585 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3589 for (unsigned Z = 1; Z != NZeros; ++Z)
3593 for (; I != NDigits; ++I)
3594 Str.push_back(buffer[NDigits-I-1]);
3597 bool IEEEFloat::getExactInverse(APFloat *inv) const {
3598 // Special floats and denormals have no exact inverse.
3599 if (!isFiniteNonZero())
3602 // Check that the number is a power of two by making sure that only the
3603 // integer bit is set in the significand.
3604 if (significandLSB() != semantics->precision - 1)
3608 IEEEFloat reciprocal(*semantics, 1ULL);
3609 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3612 // Avoid multiplication with a denormal, it is not safe on all platforms and
3613 // may be slower than a normal division.
3614 if (reciprocal.isDenormal())
3617 assert(reciprocal.isFiniteNonZero() &&
3618 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3621 *inv = APFloat(reciprocal, *semantics);
3626 bool IEEEFloat::isSignaling() const {
3630 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
3631 // first bit of the trailing significand being 0.
3632 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
3635 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
3637 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
3638 /// appropriate sign switching before/after the computation.
3639 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
3640 // If we are performing nextDown, swap sign so we have -x.
3644 // Compute nextUp(x)
3645 opStatus result = opOK;
3647 // Handle each float category separately.
3650 // nextUp(+inf) = +inf
3653 // nextUp(-inf) = -getLargest()
3657 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
3658 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
3659 // change the payload.
3660 if (isSignaling()) {
3661 result = opInvalidOp;
3662 // For consistency, propagate the sign of the sNaN to the qNaN.
3663 makeNaN(false, isNegative(), nullptr);
3667 // nextUp(pm 0) = +getSmallest()
3668 makeSmallest(false);
3671 // nextUp(-getSmallest()) = -0
3672 if (isSmallest() && isNegative()) {
3673 APInt::tcSet(significandParts(), 0, partCount());
3679 // nextUp(getLargest()) == INFINITY
3680 if (isLargest() && !isNegative()) {
3681 APInt::tcSet(significandParts(), 0, partCount());
3682 category = fcInfinity;
3683 exponent = semantics->maxExponent + 1;
3687 // nextUp(normal) == normal + inc.
3689 // If we are negative, we need to decrement the significand.
3691 // We only cross a binade boundary that requires adjusting the exponent
3693 // 1. exponent != semantics->minExponent. This implies we are not in the
3694 // smallest binade or are dealing with denormals.
3695 // 2. Our significand excluding the integral bit is all zeros.
3696 bool WillCrossBinadeBoundary =
3697 exponent != semantics->minExponent && isSignificandAllZeros();
3699 // Decrement the significand.
3701 // We always do this since:
3702 // 1. If we are dealing with a non-binade decrement, by definition we
3703 // just decrement the significand.
3704 // 2. If we are dealing with a normal -> normal binade decrement, since
3705 // we have an explicit integral bit the fact that all bits but the
3706 // integral bit are zero implies that subtracting one will yield a
3707 // significand with 0 integral bit and 1 in all other spots. Thus we
3708 // must just adjust the exponent and set the integral bit to 1.
3709 // 3. If we are dealing with a normal -> denormal binade decrement,
3710 // since we set the integral bit to 0 when we represent denormals, we
3711 // just decrement the significand.
3712 integerPart *Parts = significandParts();
3713 APInt::tcDecrement(Parts, partCount());
3715 if (WillCrossBinadeBoundary) {
3716 // Our result is a normal number. Do the following:
3717 // 1. Set the integral bit to 1.
3718 // 2. Decrement the exponent.
3719 APInt::tcSetBit(Parts, semantics->precision - 1);
3723 // If we are positive, we need to increment the significand.
3725 // We only cross a binade boundary that requires adjusting the exponent if
3726 // the input is not a denormal and all of said input's significand bits
3727 // are set. If all of said conditions are true: clear the significand, set
3728 // the integral bit to 1, and increment the exponent. If we have a
3729 // denormal always increment since moving denormals and the numbers in the
3730 // smallest normal binade have the same exponent in our representation.
3731 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
3733 if (WillCrossBinadeBoundary) {
3734 integerPart *Parts = significandParts();
3735 APInt::tcSet(Parts, 0, partCount());
3736 APInt::tcSetBit(Parts, semantics->precision - 1);
3737 assert(exponent != semantics->maxExponent &&
3738 "We can not increment an exponent beyond the maxExponent allowed"
3739 " by the given floating point semantics.");
3742 incrementSignificand();
3748 // If we are performing nextDown, swap sign so we have -nextUp(-x)
3755 void IEEEFloat::makeInf(bool Negative) {
3756 category = fcInfinity;
3758 exponent = semantics->maxExponent + 1;
3759 APInt::tcSet(significandParts(), 0, partCount());
3762 void IEEEFloat::makeZero(bool Negative) {
3765 exponent = semantics->minExponent-1;
3766 APInt::tcSet(significandParts(), 0, partCount());
3769 void IEEEFloat::makeQuiet() {
3771 APInt::tcSetBit(significandParts(), semantics->precision - 2);
3774 int ilogb(const IEEEFloat &Arg) {
3776 return IEEEFloat::IEK_NaN;
3778 return IEEEFloat::IEK_Zero;
3779 if (Arg.isInfinity())
3780 return IEEEFloat::IEK_Inf;
3781 if (!Arg.isDenormal())
3782 return Arg.exponent;
3784 IEEEFloat Normalized(Arg);
3785 int SignificandBits = Arg.getSemantics().precision - 1;
3787 Normalized.exponent += SignificandBits;
3788 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
3789 return Normalized.exponent - SignificandBits;
3792 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
3793 auto MaxExp = X.getSemantics().maxExponent;
3794 auto MinExp = X.getSemantics().minExponent;
3796 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
3797 // overflow; clamp it to a safe range before adding, but ensure that the range
3798 // is large enough that the clamp does not change the result. The range we
3799 // need to support is the difference between the largest possible exponent and
3800 // the normalized exponent of half the smallest denormal.
3802 int SignificandBits = X.getSemantics().precision - 1;
3803 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
3805 // Clamp to one past the range ends to let normalize handle overlflow.
3806 X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement);
3807 X.normalize(RoundingMode, lfExactlyZero);
3813 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
3816 // Quiet signalling nans.
3817 if (Exp == IEEEFloat::IEK_NaN) {
3818 IEEEFloat Quiet(Val);
3823 if (Exp == IEEEFloat::IEK_Inf)
3826 // 1 is added because frexp is defined to return a normalized fraction in
3827 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
3828 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
3829 return scalbn(Val, -Exp, RM);
3832 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
3834 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
3835 assert(Semantics == &semPPCDoubleDouble);
3838 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
3840 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
3841 APFloat(semIEEEdouble, uninitialized)}) {
3842 assert(Semantics == &semPPCDoubleDouble);
3845 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
3846 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
3847 APFloat(semIEEEdouble)}) {
3848 assert(Semantics == &semPPCDoubleDouble);
3851 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
3853 Floats(new APFloat[2]{
3854 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
3855 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
3856 assert(Semantics == &semPPCDoubleDouble);
3859 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
3862 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
3863 assert(Semantics == &semPPCDoubleDouble);
3864 assert(&Floats[0].getSemantics() == &semIEEEdouble);
3865 assert(&Floats[1].getSemantics() == &semIEEEdouble);
3868 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
3869 : Semantics(RHS.Semantics),
3870 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
3871 APFloat(RHS.Floats[1])}
3873 assert(Semantics == &semPPCDoubleDouble);
3876 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
3877 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
3878 RHS.Semantics = &semBogus;
3879 assert(Semantics == &semPPCDoubleDouble);
3882 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
3883 if (Semantics == RHS.Semantics && RHS.Floats) {
3884 Floats[0] = RHS.Floats[0];
3885 Floats[1] = RHS.Floats[1];
3886 } else if (this != &RHS) {
3887 this->~DoubleAPFloat();
3888 new (this) DoubleAPFloat(RHS);
3893 // Implement addition, subtraction, multiplication and division based on:
3894 // "Software for Doubled-Precision Floating-Point Computations",
3895 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
3896 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
3897 const APFloat &c, const APFloat &cc,
3901 Status |= z.add(c, RM);
3902 if (!z.isFinite()) {
3903 if (!z.isInfinity()) {
3904 Floats[0] = std::move(z);
3905 Floats[1].makeZero(/* Neg = */ false);
3906 return (opStatus)Status;
3909 auto AComparedToC = a.compareAbsoluteValue(c);
3911 Status |= z.add(aa, RM);
3912 if (AComparedToC == APFloat::cmpGreaterThan) {
3913 // z = cc + aa + c + a;
3914 Status |= z.add(c, RM);
3915 Status |= z.add(a, RM);
3917 // z = cc + aa + a + c;
3918 Status |= z.add(a, RM);
3919 Status |= z.add(c, RM);
3921 if (!z.isFinite()) {
3922 Floats[0] = std::move(z);
3923 Floats[1].makeZero(/* Neg = */ false);
3924 return (opStatus)Status;
3928 Status |= zz.add(cc, RM);
3929 if (AComparedToC == APFloat::cmpGreaterThan) {
3930 // Floats[1] = a - z + c + zz;
3932 Status |= Floats[1].subtract(z, RM);
3933 Status |= Floats[1].add(c, RM);
3934 Status |= Floats[1].add(zz, RM);
3936 // Floats[1] = c - z + a + zz;
3938 Status |= Floats[1].subtract(z, RM);
3939 Status |= Floats[1].add(a, RM);
3940 Status |= Floats[1].add(zz, RM);
3945 Status |= q.subtract(z, RM);
3947 // zz = q + c + (a - (q + z)) + aa + cc;
3948 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
3950 Status |= zz.add(c, RM);
3951 Status |= q.add(z, RM);
3952 Status |= q.subtract(a, RM);
3954 Status |= zz.add(q, RM);
3955 Status |= zz.add(aa, RM);
3956 Status |= zz.add(cc, RM);
3957 if (zz.isZero() && !zz.isNegative()) {
3958 Floats[0] = std::move(z);
3959 Floats[1].makeZero(/* Neg = */ false);
3963 Status |= Floats[0].add(zz, RM);
3964 if (!Floats[0].isFinite()) {
3965 Floats[1].makeZero(/* Neg = */ false);
3966 return (opStatus)Status;
3968 Floats[1] = std::move(z);
3969 Status |= Floats[1].subtract(Floats[0], RM);
3970 Status |= Floats[1].add(zz, RM);
3972 return (opStatus)Status;
3975 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
3976 const DoubleAPFloat &RHS,
3979 if (LHS.getCategory() == fcNaN) {
3983 if (RHS.getCategory() == fcNaN) {
3987 if (LHS.getCategory() == fcZero) {
3991 if (RHS.getCategory() == fcZero) {
3995 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
3996 LHS.isNegative() != RHS.isNegative()) {
3997 Out.makeNaN(false, Out.isNegative(), nullptr);
4000 if (LHS.getCategory() == fcInfinity) {
4004 if (RHS.getCategory() == fcInfinity) {
4008 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4010 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4012 assert(&A.getSemantics() == &semIEEEdouble);
4013 assert(&AA.getSemantics() == &semIEEEdouble);
4014 assert(&C.getSemantics() == &semIEEEdouble);
4015 assert(&CC.getSemantics() == &semIEEEdouble);
4016 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4017 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4018 return Out.addImpl(A, AA, C, CC, RM);
4021 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4023 return addWithSpecial(*this, RHS, *this, RM);
4026 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4029 auto Ret = add(RHS, RM);
4034 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
4035 APFloat::roundingMode RM) {
4036 const auto &LHS = *this;
4038 /* Interesting observation: For special categories, finding the lowest
4039 common ancestor of the following layered graph gives the correct
4048 e.g. NaN * NaN = NaN
4050 Normal * Zero = Zero
4053 if (LHS.getCategory() == fcNaN) {
4057 if (RHS.getCategory() == fcNaN) {
4061 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4062 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4063 Out.makeNaN(false, false, nullptr);
4066 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4070 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4074 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4075 "Special cases not handled exhaustively");
4078 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4081 Status |= T.multiply(C, RM);
4082 if (!T.isFiniteNonZero()) {
4084 Floats[1].makeZero(/* Neg = */ false);
4085 return (opStatus)Status;
4088 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4091 Status |= Tau.fusedMultiplyAdd(C, T, RM);
4096 Status |= V.multiply(D, RM);
4099 Status |= W.multiply(C, RM);
4100 Status |= V.add(W, RM);
4102 Status |= Tau.add(V, RM);
4106 Status |= U.add(Tau, RM);
4109 if (!U.isFinite()) {
4110 Floats[1].makeZero(/* Neg = */ false);
4112 // Floats[1] = (t - u) + tau
4113 Status |= T.subtract(U, RM);
4114 Status |= T.add(Tau, RM);
4117 return (opStatus)Status;
4120 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
4121 APFloat::roundingMode RM) {
4122 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4123 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4125 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4126 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4130 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
4131 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4132 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4134 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4135 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4139 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
4140 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4141 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4142 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4143 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4148 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
4149 const DoubleAPFloat &Addend,
4150 APFloat::roundingMode RM) {
4151 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4152 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4153 auto Ret = Tmp.fusedMultiplyAdd(
4154 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
4155 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
4156 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4160 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
4161 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4162 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4163 auto Ret = Tmp.roundToIntegral(RM);
4164 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4168 void DoubleAPFloat::changeSign() {
4169 Floats[0].changeSign();
4170 Floats[1].changeSign();
4174 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
4175 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
4176 if (Result != cmpEqual)
4178 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
4179 if (Result == cmpLessThan || Result == cmpGreaterThan) {
4180 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
4181 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
4182 if (Against && !RHSAgainst)
4184 if (!Against && RHSAgainst)
4185 return cmpGreaterThan;
4186 if (!Against && !RHSAgainst)
4188 if (Against && RHSAgainst)
4189 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
4194 APFloat::fltCategory DoubleAPFloat::getCategory() const {
4195 return Floats[0].getCategory();
4198 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
4200 void DoubleAPFloat::makeInf(bool Neg) {
4201 Floats[0].makeInf(Neg);
4202 Floats[1].makeZero(/* Neg = */ false);
4205 void DoubleAPFloat::makeZero(bool Neg) {
4206 Floats[0].makeZero(Neg);
4207 Floats[1].makeZero(/* Neg = */ false);
4210 void DoubleAPFloat::makeLargest(bool Neg) {
4211 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4212 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
4213 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
4218 void DoubleAPFloat::makeSmallest(bool Neg) {
4219 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4220 Floats[0].makeSmallest(Neg);
4221 Floats[1].makeZero(/* Neg = */ false);
4224 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
4225 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4226 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
4228 Floats[0].changeSign();
4229 Floats[1].makeZero(/* Neg = */ false);
4232 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
4233 Floats[0].makeNaN(SNaN, Neg, fill);
4234 Floats[1].makeZero(/* Neg = */ false);
4237 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
4238 auto Result = Floats[0].compare(RHS.Floats[0]);
4239 // |Float[0]| > |Float[1]|
4240 if (Result == APFloat::cmpEqual)
4241 return Floats[1].compare(RHS.Floats[1]);
4245 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
4246 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
4247 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
4250 hash_code hash_value(const DoubleAPFloat &Arg) {
4252 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
4253 return hash_combine(Arg.Semantics);
4256 APInt DoubleAPFloat::bitcastToAPInt() const {
4257 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4259 Floats[0].bitcastToAPInt().getRawData()[0],
4260 Floats[1].bitcastToAPInt().getRawData()[0],
4262 return APInt(128, 2, Data);
4265 APFloat::opStatus DoubleAPFloat::convertFromString(StringRef S,
4267 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4268 APFloat Tmp(semPPCDoubleDoubleLegacy);
4269 auto Ret = Tmp.convertFromString(S, RM);
4270 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4274 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
4275 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4276 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4277 auto Ret = Tmp.next(nextDown);
4278 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4283 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
4284 unsigned int Width, bool IsSigned,
4285 roundingMode RM, bool *IsExact) const {
4286 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4287 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4288 .convertToInteger(Input, Width, IsSigned, RM, IsExact);
4291 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
4294 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4295 APFloat Tmp(semPPCDoubleDoubleLegacy);
4296 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
4297 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4302 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
4303 unsigned int InputSize,
4304 bool IsSigned, roundingMode RM) {
4305 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4306 APFloat Tmp(semPPCDoubleDoubleLegacy);
4307 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
4308 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4313 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
4314 unsigned int InputSize,
4315 bool IsSigned, roundingMode RM) {
4316 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4317 APFloat Tmp(semPPCDoubleDoubleLegacy);
4318 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
4319 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4323 unsigned int DoubleAPFloat::convertToHexString(char *DST,
4324 unsigned int HexDigits,
4326 roundingMode RM) const {
4327 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4328 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4329 .convertToHexString(DST, HexDigits, UpperCase, RM);
4332 bool DoubleAPFloat::isDenormal() const {
4333 return getCategory() == fcNormal &&
4334 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
4335 // (double)(Hi + Lo) == Hi defines a normal number.
4336 Floats[0].compare(Floats[0] + Floats[1]) != cmpEqual);
4339 bool DoubleAPFloat::isSmallest() const {
4340 if (getCategory() != fcNormal)
4342 DoubleAPFloat Tmp(*this);
4343 Tmp.makeSmallest(this->isNegative());
4344 return Tmp.compare(*this) == cmpEqual;
4347 bool DoubleAPFloat::isLargest() const {
4348 if (getCategory() != fcNormal)
4350 DoubleAPFloat Tmp(*this);
4351 Tmp.makeLargest(this->isNegative());
4352 return Tmp.compare(*this) == cmpEqual;
4355 bool DoubleAPFloat::isInteger() const {
4356 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4357 APFloat Tmp(semPPCDoubleDoubleLegacy);
4358 (void)Tmp.add(Floats[0], rmNearestTiesToEven);
4359 (void)Tmp.add(Floats[1], rmNearestTiesToEven);
4360 return Tmp.isInteger();
4363 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
4364 unsigned FormatPrecision,
4365 unsigned FormatMaxPadding) const {
4366 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4367 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4368 .toString(Str, FormatPrecision, FormatMaxPadding);
4371 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
4372 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4373 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4375 return Tmp.getExactInverse(nullptr);
4376 APFloat Inv(semPPCDoubleDoubleLegacy);
4377 auto Ret = Tmp.getExactInverse(&Inv);
4378 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
4382 DoubleAPFloat scalbn(DoubleAPFloat Arg, int Exp, APFloat::roundingMode RM) {
4383 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4384 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
4385 scalbn(Arg.Floats[1], Exp, RM));
4388 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
4389 APFloat::roundingMode RM) {
4390 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4391 APFloat First = frexp(Arg.Floats[0], Exp, RM);
4392 APFloat Second = Arg.Floats[1];
4393 if (Arg.getCategory() == APFloat::fcNormal)
4394 Second = scalbn(Second, -Exp, RM);
4395 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
4398 } // End detail namespace
4400 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
4401 if (usesLayout<IEEEFloat>(Semantics)) {
4402 new (&IEEE) IEEEFloat(std::move(F));
4405 if (usesLayout<DoubleAPFloat>(Semantics)) {
4407 DoubleAPFloat(Semantics, APFloat(std::move(F), F.getSemantics()),
4408 APFloat(semIEEEdouble));
4411 llvm_unreachable("Unexpected semantics");
4414 APFloat::opStatus APFloat::convertFromString(StringRef Str, roundingMode RM) {
4415 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
4418 hash_code hash_value(const APFloat &Arg) {
4419 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
4420 return hash_value(Arg.U.IEEE);
4421 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
4422 return hash_value(Arg.U.Double);
4423 llvm_unreachable("Unexpected semantics");
4426 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
4427 : APFloat(Semantics) {
4428 convertFromString(S, rmNearestTiesToEven);
4431 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
4432 roundingMode RM, bool *losesInfo) {
4433 if (&getSemantics() == &ToSemantics)
4435 if (usesLayout<IEEEFloat>(getSemantics()) &&
4436 usesLayout<IEEEFloat>(ToSemantics))
4437 return U.IEEE.convert(ToSemantics, RM, losesInfo);
4438 if (usesLayout<IEEEFloat>(getSemantics()) &&
4439 usesLayout<DoubleAPFloat>(ToSemantics)) {
4440 assert(&ToSemantics == &semPPCDoubleDouble);
4441 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
4442 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
4445 if (usesLayout<DoubleAPFloat>(getSemantics()) &&
4446 usesLayout<IEEEFloat>(ToSemantics)) {
4447 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
4448 *this = APFloat(std::move(getIEEE()), ToSemantics);
4451 llvm_unreachable("Unexpected semantics");
4454 APFloat APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) {
4458 return APFloat(semIEEEhalf, APInt::getAllOnesValue(BitWidth));
4460 return APFloat(semIEEEsingle, APInt::getAllOnesValue(BitWidth));
4462 return APFloat(semIEEEdouble, APInt::getAllOnesValue(BitWidth));
4464 return APFloat(semX87DoubleExtended, APInt::getAllOnesValue(BitWidth));
4466 return APFloat(semIEEEquad, APInt::getAllOnesValue(BitWidth));
4468 llvm_unreachable("Unknown floating bit width");
4471 assert(BitWidth == 128);
4472 return APFloat(semPPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
4476 void APFloat::print(raw_ostream &OS) const {
4477 SmallVector<char, 16> Buffer;
4479 OS << Buffer << "\n";
4482 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4483 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }
4486 void APFloat::Profile(FoldingSetNodeID &NID) const {
4487 NID.Add(bitcastToAPInt());
4490 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
4491 an APSInt, whose initial bit-width and signed-ness are used to determine the
4492 precision of the conversion.
4494 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
4495 roundingMode rounding_mode,
4496 bool *isExact) const {
4497 unsigned bitWidth = result.getBitWidth();
4498 SmallVector<uint64_t, 4> parts(result.getNumWords());
4499 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
4500 rounding_mode, isExact);
4501 // Keeps the original signed-ness.
4502 result = APInt(bitWidth, parts);
4506 } // End llvm namespace
4508 #undef APFLOAT_DISPATCH_ON_SEMANTICS