1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements a class to represent arbitrary precision floating
10 // point values and provide a variety of arithmetic operations on them.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Config/llvm-config.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
29 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
31 if (usesLayout<IEEEFloat>(getSemantics())) \
32 return U.IEEE.METHOD_CALL; \
33 if (usesLayout<DoubleAPFloat>(getSemantics())) \
34 return U.Double.METHOD_CALL; \
35 llvm_unreachable("Unexpected semantics"); \
40 /// A macro used to combine two fcCategory enums into one key which can be used
41 /// in a switch statement to classify how the interaction of two APFloat's
42 /// categories affects an operation.
44 /// TODO: If clang source code is ever allowed to use constexpr in its own
45 /// codebase, change this into a static inline function.
46 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
48 /* Assumed in hexadecimal significand parsing, and conversion to
49 hexadecimal strings. */
50 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
53 /* Represents floating point arithmetic semantics. */
55 /* The largest E such that 2^E is representable; this matches the
56 definition of IEEE 754. */
57 APFloatBase::ExponentType maxExponent;
59 /* The smallest E such that 2^E is a normalized number; this
60 matches the definition of IEEE 754. */
61 APFloatBase::ExponentType minExponent;
63 /* Number of bits in the significand. This includes the integer
65 unsigned int precision;
67 /* Number of bits actually used in the semantics. */
68 unsigned int sizeInBits;
71 static const fltSemantics semIEEEhalf = {15, -14, 11, 16};
72 static const fltSemantics semIEEEsingle = {127, -126, 24, 32};
73 static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
74 static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};
75 static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
76 static const fltSemantics semBogus = {0, 0, 0, 0};
78 /* The IBM double-double semantics. Such a number consists of a pair of IEEE
79 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
80 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
81 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
82 to each other, and two 11-bit exponents.
84 Note: we need to make the value different from semBogus as otherwise
85 an unsafe optimization may collapse both values to a single address,
86 and we heavily rely on them having distinct addresses. */
87 static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0};
89 /* These are legacy semantics for the fallback, inaccrurate implementation of
90 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
91 operation. It's equivalent to having an IEEE number with consecutive 106
92 bits of mantissa and 11 bits of exponent.
94 It's not equivalent to IBM double-double. For example, a legit IBM
95 double-double, 1 + epsilon:
97 1 + epsilon = 1 + (1 >> 1076)
99 is not representable by a consecutive 106 bits of mantissa.
101 Currently, these semantics are used in the following way:
103 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
104 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
105 semPPCDoubleDoubleLegacy -> IEEE operations
107 We use bitcastToAPInt() to get the bit representation (in APInt) of the
108 underlying IEEEdouble, then use the APInt constructor to construct the
111 TODO: Implement all operations in semPPCDoubleDouble, and delete these
113 static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
116 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
124 case S_x87DoubleExtended:
125 return x87DoubleExtended();
128 case S_PPCDoubleDouble:
129 return PPCDoubleDouble();
131 llvm_unreachable("Unrecognised floating semantics");
134 APFloatBase::Semantics
135 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
136 if (&Sem == &llvm::APFloat::IEEEhalf())
138 else if (&Sem == &llvm::APFloat::IEEEsingle())
140 else if (&Sem == &llvm::APFloat::IEEEdouble())
142 else if (&Sem == &llvm::APFloat::x87DoubleExtended())
143 return S_x87DoubleExtended;
144 else if (&Sem == &llvm::APFloat::IEEEquad())
146 else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
147 return S_PPCDoubleDouble;
149 llvm_unreachable("Unknown floating semantics");
152 const fltSemantics &APFloatBase::IEEEhalf() {
155 const fltSemantics &APFloatBase::IEEEsingle() {
156 return semIEEEsingle;
158 const fltSemantics &APFloatBase::IEEEdouble() {
159 return semIEEEdouble;
161 const fltSemantics &APFloatBase::IEEEquad() {
164 const fltSemantics &APFloatBase::x87DoubleExtended() {
165 return semX87DoubleExtended;
167 const fltSemantics &APFloatBase::Bogus() {
170 const fltSemantics &APFloatBase::PPCDoubleDouble() {
171 return semPPCDoubleDouble;
174 /* A tight upper bound on number of parts required to hold the value
177 power * 815 / (351 * integerPartWidth) + 1
179 However, whilst the result may require only this many parts,
180 because we are multiplying two values to get it, the
181 multiplication may require an extra part with the excess part
182 being zero (consider the trivial case of 1 * 1, tcFullMultiply
183 requires two parts to hold the single-part result). So we add an
184 extra one to guarantee enough space whilst multiplying. */
185 const unsigned int maxExponent = 16383;
186 const unsigned int maxPrecision = 113;
187 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
188 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
190 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
191 return semantics.precision;
193 APFloatBase::ExponentType
194 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
195 return semantics.maxExponent;
197 APFloatBase::ExponentType
198 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
199 return semantics.minExponent;
201 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
202 return semantics.sizeInBits;
205 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
206 return Sem.sizeInBits;
209 /* A bunch of private, handy routines. */
211 static inline Error createError(const Twine &Err) {
212 return make_error<StringError>(Err, inconvertibleErrorCode());
215 static inline unsigned int
216 partCountForBits(unsigned int bits)
218 return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
221 /* Returns 0U-9U. Return values >= 10U are not digits. */
222 static inline unsigned int
223 decDigitValue(unsigned int c)
228 /* Return the value of a decimal exponent of the form
231 If the exponent overflows, returns a large exponent with the
233 static Expected<int> readExponent(StringRef::iterator begin,
234 StringRef::iterator end) {
236 unsigned int absExponent;
237 const unsigned int overlargeExponent = 24000; /* FIXME. */
238 StringRef::iterator p = begin;
240 // Treat no exponent as 0 to match binutils
241 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
245 isNegative = (*p == '-');
246 if (*p == '-' || *p == '+') {
249 return createError("Exponent has no digits");
252 absExponent = decDigitValue(*p++);
253 if (absExponent >= 10U)
254 return createError("Invalid character in exponent");
256 for (; p != end; ++p) {
259 value = decDigitValue(*p);
261 return createError("Invalid character in exponent");
263 absExponent = absExponent * 10U + value;
264 if (absExponent >= overlargeExponent) {
265 absExponent = overlargeExponent;
271 return -(int) absExponent;
273 return (int) absExponent;
276 /* This is ugly and needs cleaning up, but I don't immediately see
277 how whilst remaining safe. */
278 static Expected<int> totalExponent(StringRef::iterator p,
279 StringRef::iterator end,
280 int exponentAdjustment) {
281 int unsignedExponent;
282 bool negative, overflow;
286 return createError("Exponent has no digits");
288 negative = *p == '-';
289 if (*p == '-' || *p == '+') {
292 return createError("Exponent has no digits");
295 unsignedExponent = 0;
297 for (; p != end; ++p) {
300 value = decDigitValue(*p);
302 return createError("Invalid character in exponent");
304 unsignedExponent = unsignedExponent * 10 + value;
305 if (unsignedExponent > 32767) {
311 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
315 exponent = unsignedExponent;
317 exponent = -exponent;
318 exponent += exponentAdjustment;
319 if (exponent > 32767 || exponent < -32768)
324 exponent = negative ? -32768: 32767;
329 static Expected<StringRef::iterator>
330 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
331 StringRef::iterator *dot) {
332 StringRef::iterator p = begin;
334 while (p != end && *p == '0')
337 if (p != end && *p == '.') {
340 if (end - begin == 1)
341 return createError("Significand has no digits");
343 while (p != end && *p == '0')
350 /* Given a normal decimal floating point number of the form
354 where the decimal point and exponent are optional, fill out the
355 structure D. Exponent is appropriate if the significand is
356 treated as an integer, and normalizedExponent if the significand
357 is taken to have the decimal point after a single leading
360 If the value is zero, V->firstSigDigit points to a non-digit, and
361 the return exponent is zero.
364 const char *firstSigDigit;
365 const char *lastSigDigit;
367 int normalizedExponent;
370 static Error interpretDecimal(StringRef::iterator begin,
371 StringRef::iterator end, decimalInfo *D) {
372 StringRef::iterator dot = end;
374 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
376 return PtrOrErr.takeError();
377 StringRef::iterator p = *PtrOrErr;
379 D->firstSigDigit = p;
381 D->normalizedExponent = 0;
383 for (; p != end; ++p) {
386 return createError("String contains multiple dots");
391 if (decDigitValue(*p) >= 10U)
396 if (*p != 'e' && *p != 'E')
397 return createError("Invalid character in significand");
399 return createError("Significand has no digits");
400 if (dot != end && p - begin == 1)
401 return createError("Significand has no digits");
403 /* p points to the first non-digit in the string */
404 auto ExpOrErr = readExponent(p + 1, end);
406 return ExpOrErr.takeError();
407 D->exponent = *ExpOrErr;
409 /* Implied decimal point? */
414 /* If number is all zeroes accept any exponent. */
415 if (p != D->firstSigDigit) {
416 /* Drop insignificant trailing zeroes. */
421 while (p != begin && *p == '0');
422 while (p != begin && *p == '.');
425 /* Adjust the exponents for any decimal point. */
426 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
427 D->normalizedExponent = (D->exponent +
428 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
429 - (dot > D->firstSigDigit && dot < p)));
433 return Error::success();
436 /* Return the trailing fraction of a hexadecimal number.
437 DIGITVALUE is the first hex digit of the fraction, P points to
439 static Expected<lostFraction>
440 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
441 unsigned int digitValue) {
442 unsigned int hexDigit;
444 /* If the first trailing digit isn't 0 or 8 we can work out the
445 fraction immediately. */
447 return lfMoreThanHalf;
448 else if (digitValue < 8 && digitValue > 0)
449 return lfLessThanHalf;
451 // Otherwise we need to find the first non-zero digit.
452 while (p != end && (*p == '0' || *p == '.'))
456 return createError("Invalid trailing hexadecimal fraction!");
458 hexDigit = hexDigitValue(*p);
460 /* If we ran off the end it is exactly zero or one-half, otherwise
463 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
465 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
468 /* Return the fraction lost were a bignum truncated losing the least
469 significant BITS bits. */
471 lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
472 unsigned int partCount,
477 lsb = APInt::tcLSB(parts, partCount);
479 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
481 return lfExactlyZero;
483 return lfExactlyHalf;
484 if (bits <= partCount * APFloatBase::integerPartWidth &&
485 APInt::tcExtractBit(parts, bits - 1))
486 return lfMoreThanHalf;
488 return lfLessThanHalf;
491 /* Shift DST right BITS bits noting lost fraction. */
493 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
495 lostFraction lost_fraction;
497 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
499 APInt::tcShiftRight(dst, parts, bits);
501 return lost_fraction;
504 /* Combine the effect of two lost fractions. */
506 combineLostFractions(lostFraction moreSignificant,
507 lostFraction lessSignificant)
509 if (lessSignificant != lfExactlyZero) {
510 if (moreSignificant == lfExactlyZero)
511 moreSignificant = lfLessThanHalf;
512 else if (moreSignificant == lfExactlyHalf)
513 moreSignificant = lfMoreThanHalf;
516 return moreSignificant;
519 /* The error from the true value, in half-ulps, on multiplying two
520 floating point numbers, which differ from the value they
521 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
522 than the returned value.
524 See "How to Read Floating Point Numbers Accurately" by William D
527 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
529 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
531 if (HUerr1 + HUerr2 == 0)
532 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
534 return inexactMultiply + 2 * (HUerr1 + HUerr2);
537 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
538 when the least significant BITS are truncated. BITS cannot be
540 static APFloatBase::integerPart
541 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
543 unsigned int count, partBits;
544 APFloatBase::integerPart part, boundary;
549 count = bits / APFloatBase::integerPartWidth;
550 partBits = bits % APFloatBase::integerPartWidth + 1;
552 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
555 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
560 if (part - boundary <= boundary - part)
561 return part - boundary;
563 return boundary - part;
566 if (part == boundary) {
569 return ~(APFloatBase::integerPart) 0; /* A lot. */
572 } else if (part == boundary - 1) {
575 return ~(APFloatBase::integerPart) 0; /* A lot. */
580 return ~(APFloatBase::integerPart) 0; /* A lot. */
583 /* Place pow(5, power) in DST, and return the number of parts used.
584 DST must be at least one part larger than size of the answer. */
586 powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
587 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
588 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
589 pow5s[0] = 78125 * 5;
591 unsigned int partsCount[16] = { 1 };
592 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
594 assert(power <= maxExponent);
599 *p1 = firstEightPowers[power & 7];
605 for (unsigned int n = 0; power; power >>= 1, n++) {
610 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
612 pc = partsCount[n - 1];
613 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
615 if (pow5[pc - 1] == 0)
621 APFloatBase::integerPart *tmp;
623 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
625 if (p2[result - 1] == 0)
628 /* Now result is in p1 with partsCount parts and p2 is scratch
639 APInt::tcAssign(dst, p1, result);
644 /* Zero at the end to avoid modular arithmetic when adding one; used
645 when rounding up during hexadecimal output. */
646 static const char hexDigitsLower[] = "0123456789abcdef0";
647 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
648 static const char infinityL[] = "infinity";
649 static const char infinityU[] = "INFINITY";
650 static const char NaNL[] = "nan";
651 static const char NaNU[] = "NAN";
653 /* Write out an integerPart in hexadecimal, starting with the most
654 significant nibble. Write out exactly COUNT hexdigits, return
657 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
658 const char *hexDigitChars)
660 unsigned int result = count;
662 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
664 part >>= (APFloatBase::integerPartWidth - 4 * count);
666 dst[count] = hexDigitChars[part & 0xf];
673 /* Write out an unsigned decimal integer. */
675 writeUnsignedDecimal (char *dst, unsigned int n)
691 /* Write out a signed decimal integer. */
693 writeSignedDecimal (char *dst, int value)
697 dst = writeUnsignedDecimal(dst, -(unsigned) value);
699 dst = writeUnsignedDecimal(dst, value);
706 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
709 semantics = ourSemantics;
712 significand.parts = new integerPart[count];
715 void IEEEFloat::freeSignificand() {
717 delete [] significand.parts;
720 void IEEEFloat::assign(const IEEEFloat &rhs) {
721 assert(semantics == rhs.semantics);
724 category = rhs.category;
725 exponent = rhs.exponent;
726 if (isFiniteNonZero() || category == fcNaN)
727 copySignificand(rhs);
730 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
731 assert(isFiniteNonZero() || category == fcNaN);
732 assert(rhs.partCount() >= partCount());
734 APInt::tcAssign(significandParts(), rhs.significandParts(),
738 /* Make this number a NaN, with an arbitrary but deterministic value
739 for the significand. If double or longer, this is a signalling NaN,
740 which may not be ideal. If float, this is QNaN(0). */
741 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
745 integerPart *significand = significandParts();
746 unsigned numParts = partCount();
748 // Set the significand bits to the fill.
749 if (!fill || fill->getNumWords() < numParts)
750 APInt::tcSet(significand, 0, numParts);
752 APInt::tcAssign(significand, fill->getRawData(),
753 std::min(fill->getNumWords(), numParts));
755 // Zero out the excess bits of the significand.
756 unsigned bitsToPreserve = semantics->precision - 1;
757 unsigned part = bitsToPreserve / 64;
758 bitsToPreserve %= 64;
759 significand[part] &= ((1ULL << bitsToPreserve) - 1);
760 for (part++; part != numParts; ++part)
761 significand[part] = 0;
764 unsigned QNaNBit = semantics->precision - 2;
767 // We always have to clear the QNaN bit to make it an SNaN.
768 APInt::tcClearBit(significand, QNaNBit);
770 // If there are no bits set in the payload, we have to set
771 // *something* to make it a NaN instead of an infinity;
772 // conventionally, this is the next bit down from the QNaN bit.
773 if (APInt::tcIsZero(significand, numParts))
774 APInt::tcSetBit(significand, QNaNBit - 1);
776 // We always have to set the QNaN bit to make it a QNaN.
777 APInt::tcSetBit(significand, QNaNBit);
780 // For x87 extended precision, we want to make a NaN, not a
781 // pseudo-NaN. Maybe we should expose the ability to make
783 if (semantics == &semX87DoubleExtended)
784 APInt::tcSetBit(significand, QNaNBit + 1);
787 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
789 if (semantics != rhs.semantics) {
791 initialize(rhs.semantics);
799 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
802 semantics = rhs.semantics;
803 significand = rhs.significand;
804 exponent = rhs.exponent;
805 category = rhs.category;
808 rhs.semantics = &semBogus;
812 bool IEEEFloat::isDenormal() const {
813 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
814 (APInt::tcExtractBit(significandParts(),
815 semantics->precision - 1) == 0);
818 bool IEEEFloat::isSmallest() const {
819 // The smallest number by magnitude in our format will be the smallest
820 // denormal, i.e. the floating point number with exponent being minimum
821 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
822 return isFiniteNonZero() && exponent == semantics->minExponent &&
823 significandMSB() == 0;
826 bool IEEEFloat::isSignificandAllOnes() const {
827 // Test if the significand excluding the integral bit is all ones. This allows
828 // us to test for binade boundaries.
829 const integerPart *Parts = significandParts();
830 const unsigned PartCount = partCount();
831 for (unsigned i = 0; i < PartCount - 1; i++)
835 // Set the unused high bits to all ones when we compare.
836 const unsigned NumHighBits =
837 PartCount*integerPartWidth - semantics->precision + 1;
838 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
839 "fill than integerPartWidth");
840 const integerPart HighBitFill =
841 ~integerPart(0) << (integerPartWidth - NumHighBits);
842 if (~(Parts[PartCount - 1] | HighBitFill))
848 bool IEEEFloat::isSignificandAllZeros() const {
849 // Test if the significand excluding the integral bit is all zeros. This
850 // allows us to test for binade boundaries.
851 const integerPart *Parts = significandParts();
852 const unsigned PartCount = partCount();
854 for (unsigned i = 0; i < PartCount - 1; i++)
858 const unsigned NumHighBits =
859 PartCount*integerPartWidth - semantics->precision + 1;
860 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
861 "clear than integerPartWidth");
862 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
864 if (Parts[PartCount - 1] & HighBitMask)
870 bool IEEEFloat::isLargest() const {
871 // The largest number by magnitude in our format will be the floating point
872 // number with maximum exponent and with significand that is all ones.
873 return isFiniteNonZero() && exponent == semantics->maxExponent
874 && isSignificandAllOnes();
877 bool IEEEFloat::isInteger() const {
878 // This could be made more efficient; I'm going for obviously correct.
879 if (!isFinite()) return false;
880 IEEEFloat truncated = *this;
881 truncated.roundToIntegral(rmTowardZero);
882 return compare(truncated) == cmpEqual;
885 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
888 if (semantics != rhs.semantics ||
889 category != rhs.category ||
892 if (category==fcZero || category==fcInfinity)
895 if (isFiniteNonZero() && exponent != rhs.exponent)
898 return std::equal(significandParts(), significandParts() + partCount(),
899 rhs.significandParts());
902 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
903 initialize(&ourSemantics);
907 exponent = ourSemantics.precision - 1;
908 significandParts()[0] = value;
909 normalize(rmNearestTiesToEven, lfExactlyZero);
912 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
913 initialize(&ourSemantics);
918 // Delegate to the previous constructor, because later copy constructor may
919 // actually inspects category, which can't be garbage.
920 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
921 : IEEEFloat(ourSemantics) {}
923 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
924 initialize(rhs.semantics);
928 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
929 *this = std::move(rhs);
932 IEEEFloat::~IEEEFloat() { freeSignificand(); }
934 unsigned int IEEEFloat::partCount() const {
935 return partCountForBits(semantics->precision + 1);
938 const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
939 return const_cast<IEEEFloat *>(this)->significandParts();
942 IEEEFloat::integerPart *IEEEFloat::significandParts() {
944 return significand.parts;
946 return &significand.part;
949 void IEEEFloat::zeroSignificand() {
950 APInt::tcSet(significandParts(), 0, partCount());
953 /* Increment an fcNormal floating point number's significand. */
954 void IEEEFloat::incrementSignificand() {
957 carry = APInt::tcIncrement(significandParts(), partCount());
959 /* Our callers should never cause us to overflow. */
964 /* Add the significand of the RHS. Returns the carry flag. */
965 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
968 parts = significandParts();
970 assert(semantics == rhs.semantics);
971 assert(exponent == rhs.exponent);
973 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
976 /* Subtract the significand of the RHS with a borrow flag. Returns
978 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
979 integerPart borrow) {
982 parts = significandParts();
984 assert(semantics == rhs.semantics);
985 assert(exponent == rhs.exponent);
987 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
991 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
992 on to the full-precision result of the multiplication. Returns the
994 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
996 unsigned int omsb; // One, not zero, based MSB.
997 unsigned int partsCount, newPartsCount, precision;
998 integerPart *lhsSignificand;
999 integerPart scratch[4];
1000 integerPart *fullSignificand;
1001 lostFraction lost_fraction;
1004 assert(semantics == rhs.semantics);
1006 precision = semantics->precision;
1008 // Allocate space for twice as many bits as the original significand, plus one
1009 // extra bit for the addition to overflow into.
1010 newPartsCount = partCountForBits(precision * 2 + 1);
1012 if (newPartsCount > 4)
1013 fullSignificand = new integerPart[newPartsCount];
1015 fullSignificand = scratch;
1017 lhsSignificand = significandParts();
1018 partsCount = partCount();
1020 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1021 rhs.significandParts(), partsCount, partsCount);
1023 lost_fraction = lfExactlyZero;
1024 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1025 exponent += rhs.exponent;
1027 // Assume the operands involved in the multiplication are single-precision
1028 // FP, and the two multiplicants are:
1029 // *this = a23 . a22 ... a0 * 2^e1
1030 // rhs = b23 . b22 ... b0 * 2^e2
1031 // the result of multiplication is:
1032 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1033 // Note that there are three significant bits at the left-hand side of the
1034 // radix point: two for the multiplication, and an overflow bit for the
1035 // addition (that will always be zero at this point). Move the radix point
1036 // toward left by two bits, and adjust exponent accordingly.
1039 if (addend.isNonZero()) {
1040 // The intermediate result of the multiplication has "2 * precision"
1041 // signicant bit; adjust the addend to be consistent with mul result.
1043 Significand savedSignificand = significand;
1044 const fltSemantics *savedSemantics = semantics;
1045 fltSemantics extendedSemantics;
1047 unsigned int extendedPrecision;
1049 // Normalize our MSB to one below the top bit to allow for overflow.
1050 extendedPrecision = 2 * precision + 1;
1051 if (omsb != extendedPrecision - 1) {
1052 assert(extendedPrecision > omsb);
1053 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1054 (extendedPrecision - 1) - omsb);
1055 exponent -= (extendedPrecision - 1) - omsb;
1058 /* Create new semantics. */
1059 extendedSemantics = *semantics;
1060 extendedSemantics.precision = extendedPrecision;
1062 if (newPartsCount == 1)
1063 significand.part = fullSignificand[0];
1065 significand.parts = fullSignificand;
1066 semantics = &extendedSemantics;
1068 // Make a copy so we can convert it to the extended semantics.
1069 // Note that we cannot convert the addend directly, as the extendedSemantics
1070 // is a local variable (which we take a reference to).
1071 IEEEFloat extendedAddend(addend);
1072 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1073 assert(status == opOK);
1076 // Shift the significand of the addend right by one bit. This guarantees
1077 // that the high bit of the significand is zero (same as fullSignificand),
1078 // so the addition will overflow (if it does overflow at all) into the top bit.
1079 lost_fraction = extendedAddend.shiftSignificandRight(1);
1080 assert(lost_fraction == lfExactlyZero &&
1081 "Lost precision while shifting addend for fused-multiply-add.");
1083 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1085 /* Restore our state. */
1086 if (newPartsCount == 1)
1087 fullSignificand[0] = significand.part;
1088 significand = savedSignificand;
1089 semantics = savedSemantics;
1091 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1094 // Convert the result having "2 * precision" significant-bits back to the one
1095 // having "precision" significant-bits. First, move the radix point from
1096 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1097 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1098 exponent -= precision + 1;
1100 // In case MSB resides at the left-hand side of radix point, shift the
1101 // mantissa right by some amount to make sure the MSB reside right before
1102 // the radix point (i.e. "MSB . rest-significant-bits").
1104 // Note that the result is not normalized when "omsb < precision". So, the
1105 // caller needs to call IEEEFloat::normalize() if normalized value is
1107 if (omsb > precision) {
1108 unsigned int bits, significantParts;
1111 bits = omsb - precision;
1112 significantParts = partCountForBits(omsb);
1113 lf = shiftRight(fullSignificand, significantParts, bits);
1114 lost_fraction = combineLostFractions(lf, lost_fraction);
1118 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1120 if (newPartsCount > 4)
1121 delete [] fullSignificand;
1123 return lost_fraction;
1126 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1127 return multiplySignificand(rhs, IEEEFloat(*semantics));
1130 /* Multiply the significands of LHS and RHS to DST. */
1131 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1132 unsigned int bit, i, partsCount;
1133 const integerPart *rhsSignificand;
1134 integerPart *lhsSignificand, *dividend, *divisor;
1135 integerPart scratch[4];
1136 lostFraction lost_fraction;
1138 assert(semantics == rhs.semantics);
1140 lhsSignificand = significandParts();
1141 rhsSignificand = rhs.significandParts();
1142 partsCount = partCount();
1145 dividend = new integerPart[partsCount * 2];
1149 divisor = dividend + partsCount;
1151 /* Copy the dividend and divisor as they will be modified in-place. */
1152 for (i = 0; i < partsCount; i++) {
1153 dividend[i] = lhsSignificand[i];
1154 divisor[i] = rhsSignificand[i];
1155 lhsSignificand[i] = 0;
1158 exponent -= rhs.exponent;
1160 unsigned int precision = semantics->precision;
1162 /* Normalize the divisor. */
1163 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1166 APInt::tcShiftLeft(divisor, partsCount, bit);
1169 /* Normalize the dividend. */
1170 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1173 APInt::tcShiftLeft(dividend, partsCount, bit);
1176 /* Ensure the dividend >= divisor initially for the loop below.
1177 Incidentally, this means that the division loop below is
1178 guaranteed to set the integer bit to one. */
1179 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1181 APInt::tcShiftLeft(dividend, partsCount, 1);
1182 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1185 /* Long division. */
1186 for (bit = precision; bit; bit -= 1) {
1187 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1188 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1189 APInt::tcSetBit(lhsSignificand, bit - 1);
1192 APInt::tcShiftLeft(dividend, partsCount, 1);
1195 /* Figure out the lost fraction. */
1196 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1199 lost_fraction = lfMoreThanHalf;
1201 lost_fraction = lfExactlyHalf;
1202 else if (APInt::tcIsZero(dividend, partsCount))
1203 lost_fraction = lfExactlyZero;
1205 lost_fraction = lfLessThanHalf;
1210 return lost_fraction;
1213 unsigned int IEEEFloat::significandMSB() const {
1214 return APInt::tcMSB(significandParts(), partCount());
1217 unsigned int IEEEFloat::significandLSB() const {
1218 return APInt::tcLSB(significandParts(), partCount());
1221 /* Note that a zero result is NOT normalized to fcZero. */
1222 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1223 /* Our exponent should not overflow. */
1224 assert((ExponentType) (exponent + bits) >= exponent);
1228 return shiftRight(significandParts(), partCount(), bits);
1231 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1232 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1233 assert(bits < semantics->precision);
1236 unsigned int partsCount = partCount();
1238 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1241 assert(!APInt::tcIsZero(significandParts(), partsCount));
1245 IEEEFloat::cmpResult
1246 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1249 assert(semantics == rhs.semantics);
1250 assert(isFiniteNonZero());
1251 assert(rhs.isFiniteNonZero());
1253 compare = exponent - rhs.exponent;
1255 /* If exponents are equal, do an unsigned bignum comparison of the
1258 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1262 return cmpGreaterThan;
1263 else if (compare < 0)
1269 /* Handle overflow. Sign is preserved. We either become infinity or
1270 the largest finite number. */
1271 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1273 if (rounding_mode == rmNearestTiesToEven ||
1274 rounding_mode == rmNearestTiesToAway ||
1275 (rounding_mode == rmTowardPositive && !sign) ||
1276 (rounding_mode == rmTowardNegative && sign)) {
1277 category = fcInfinity;
1278 return (opStatus) (opOverflow | opInexact);
1281 /* Otherwise we become the largest finite number. */
1282 category = fcNormal;
1283 exponent = semantics->maxExponent;
1284 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1285 semantics->precision);
1290 /* Returns TRUE if, when truncating the current number, with BIT the
1291 new LSB, with the given lost fraction and rounding mode, the result
1292 would need to be rounded away from zero (i.e., by increasing the
1293 signficand). This routine must work for fcZero of both signs, and
1294 fcNormal numbers. */
1295 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1296 lostFraction lost_fraction,
1297 unsigned int bit) const {
1298 /* NaNs and infinities should not have lost fractions. */
1299 assert(isFiniteNonZero() || category == fcZero);
1301 /* Current callers never pass this so we don't handle it. */
1302 assert(lost_fraction != lfExactlyZero);
1304 switch (rounding_mode) {
1305 case rmNearestTiesToAway:
1306 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1308 case rmNearestTiesToEven:
1309 if (lost_fraction == lfMoreThanHalf)
1312 /* Our zeroes don't have a significand to test. */
1313 if (lost_fraction == lfExactlyHalf && category != fcZero)
1314 return APInt::tcExtractBit(significandParts(), bit);
1321 case rmTowardPositive:
1324 case rmTowardNegative:
1327 llvm_unreachable("Invalid rounding mode found");
1330 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1331 lostFraction lost_fraction) {
1332 unsigned int omsb; /* One, not zero, based MSB. */
1335 if (!isFiniteNonZero())
1338 /* Before rounding normalize the exponent of fcNormal numbers. */
1339 omsb = significandMSB() + 1;
1342 /* OMSB is numbered from 1. We want to place it in the integer
1343 bit numbered PRECISION if possible, with a compensating change in
1345 exponentChange = omsb - semantics->precision;
1347 /* If the resulting exponent is too high, overflow according to
1348 the rounding mode. */
1349 if (exponent + exponentChange > semantics->maxExponent)
1350 return handleOverflow(rounding_mode);
1352 /* Subnormal numbers have exponent minExponent, and their MSB
1353 is forced based on that. */
1354 if (exponent + exponentChange < semantics->minExponent)
1355 exponentChange = semantics->minExponent - exponent;
1357 /* Shifting left is easy as we don't lose precision. */
1358 if (exponentChange < 0) {
1359 assert(lost_fraction == lfExactlyZero);
1361 shiftSignificandLeft(-exponentChange);
1366 if (exponentChange > 0) {
1369 /* Shift right and capture any new lost fraction. */
1370 lf = shiftSignificandRight(exponentChange);
1372 lost_fraction = combineLostFractions(lf, lost_fraction);
1374 /* Keep OMSB up-to-date. */
1375 if (omsb > (unsigned) exponentChange)
1376 omsb -= exponentChange;
1382 /* Now round the number according to rounding_mode given the lost
1385 /* As specified in IEEE 754, since we do not trap we do not report
1386 underflow for exact results. */
1387 if (lost_fraction == lfExactlyZero) {
1388 /* Canonicalize zeroes. */
1395 /* Increment the significand if we're rounding away from zero. */
1396 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1398 exponent = semantics->minExponent;
1400 incrementSignificand();
1401 omsb = significandMSB() + 1;
1403 /* Did the significand increment overflow? */
1404 if (omsb == (unsigned) semantics->precision + 1) {
1405 /* Renormalize by incrementing the exponent and shifting our
1406 significand right one. However if we already have the
1407 maximum exponent we overflow to infinity. */
1408 if (exponent == semantics->maxExponent) {
1409 category = fcInfinity;
1411 return (opStatus) (opOverflow | opInexact);
1414 shiftSignificandRight(1);
1420 /* The normal case - we were and are not denormal, and any
1421 significand increment above didn't overflow. */
1422 if (omsb == semantics->precision)
1425 /* We have a non-zero denormal. */
1426 assert(omsb < semantics->precision);
1428 /* Canonicalize zeroes. */
1432 /* The fcZero case is a denormal that underflowed to zero. */
1433 return (opStatus) (opUnderflow | opInexact);
1436 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1438 switch (PackCategoriesIntoKey(category, rhs.category)) {
1440 llvm_unreachable(nullptr);
1442 case PackCategoriesIntoKey(fcNaN, fcZero):
1443 case PackCategoriesIntoKey(fcNaN, fcNormal):
1444 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1445 case PackCategoriesIntoKey(fcNaN, fcNaN):
1446 case PackCategoriesIntoKey(fcNormal, fcZero):
1447 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1448 case PackCategoriesIntoKey(fcInfinity, fcZero):
1451 case PackCategoriesIntoKey(fcZero, fcNaN):
1452 case PackCategoriesIntoKey(fcNormal, fcNaN):
1453 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1454 // We need to be sure to flip the sign here for subtraction because we
1455 // don't have a separate negate operation so -NaN becomes 0 - NaN here.
1456 sign = rhs.sign ^ subtract;
1458 copySignificand(rhs);
1461 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1462 case PackCategoriesIntoKey(fcZero, fcInfinity):
1463 category = fcInfinity;
1464 sign = rhs.sign ^ subtract;
1467 case PackCategoriesIntoKey(fcZero, fcNormal):
1469 sign = rhs.sign ^ subtract;
1472 case PackCategoriesIntoKey(fcZero, fcZero):
1473 /* Sign depends on rounding mode; handled by caller. */
1476 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1477 /* Differently signed infinities can only be validly
1479 if (((sign ^ rhs.sign)!=0) != subtract) {
1486 case PackCategoriesIntoKey(fcNormal, fcNormal):
1491 /* Add or subtract two normal numbers. */
1492 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1495 lostFraction lost_fraction;
1498 /* Determine if the operation on the absolute values is effectively
1499 an addition or subtraction. */
1500 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1502 /* Are we bigger exponent-wise than the RHS? */
1503 bits = exponent - rhs.exponent;
1505 /* Subtraction is more subtle than one might naively expect. */
1507 IEEEFloat temp_rhs(rhs);
1510 lost_fraction = lfExactlyZero;
1511 else if (bits > 0) {
1512 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1513 shiftSignificandLeft(1);
1515 lost_fraction = shiftSignificandRight(-bits - 1);
1516 temp_rhs.shiftSignificandLeft(1);
1519 // Should we reverse the subtraction.
1520 if (compareAbsoluteValue(temp_rhs) == cmpLessThan) {
1521 carry = temp_rhs.subtractSignificand
1522 (*this, lost_fraction != lfExactlyZero);
1523 copySignificand(temp_rhs);
1526 carry = subtractSignificand
1527 (temp_rhs, lost_fraction != lfExactlyZero);
1530 /* Invert the lost fraction - it was on the RHS and
1532 if (lost_fraction == lfLessThanHalf)
1533 lost_fraction = lfMoreThanHalf;
1534 else if (lost_fraction == lfMoreThanHalf)
1535 lost_fraction = lfLessThanHalf;
1537 /* The code above is intended to ensure that no borrow is
1543 IEEEFloat temp_rhs(rhs);
1545 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1546 carry = addSignificand(temp_rhs);
1548 lost_fraction = shiftSignificandRight(-bits);
1549 carry = addSignificand(rhs);
1552 /* We have a guard bit; generating a carry cannot happen. */
1557 return lost_fraction;
1560 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1561 switch (PackCategoriesIntoKey(category, rhs.category)) {
1563 llvm_unreachable(nullptr);
1565 case PackCategoriesIntoKey(fcNaN, fcZero):
1566 case PackCategoriesIntoKey(fcNaN, fcNormal):
1567 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1568 case PackCategoriesIntoKey(fcNaN, fcNaN):
1572 case PackCategoriesIntoKey(fcZero, fcNaN):
1573 case PackCategoriesIntoKey(fcNormal, fcNaN):
1574 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1577 copySignificand(rhs);
1580 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1581 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1582 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1583 category = fcInfinity;
1586 case PackCategoriesIntoKey(fcZero, fcNormal):
1587 case PackCategoriesIntoKey(fcNormal, fcZero):
1588 case PackCategoriesIntoKey(fcZero, fcZero):
1592 case PackCategoriesIntoKey(fcZero, fcInfinity):
1593 case PackCategoriesIntoKey(fcInfinity, fcZero):
1597 case PackCategoriesIntoKey(fcNormal, fcNormal):
1602 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1603 switch (PackCategoriesIntoKey(category, rhs.category)) {
1605 llvm_unreachable(nullptr);
1607 case PackCategoriesIntoKey(fcZero, fcNaN):
1608 case PackCategoriesIntoKey(fcNormal, fcNaN):
1609 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1611 copySignificand(rhs);
1613 case PackCategoriesIntoKey(fcNaN, fcZero):
1614 case PackCategoriesIntoKey(fcNaN, fcNormal):
1615 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1616 case PackCategoriesIntoKey(fcNaN, fcNaN):
1619 case PackCategoriesIntoKey(fcInfinity, fcZero):
1620 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1621 case PackCategoriesIntoKey(fcZero, fcInfinity):
1622 case PackCategoriesIntoKey(fcZero, fcNormal):
1625 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1629 case PackCategoriesIntoKey(fcNormal, fcZero):
1630 category = fcInfinity;
1633 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1634 case PackCategoriesIntoKey(fcZero, fcZero):
1638 case PackCategoriesIntoKey(fcNormal, fcNormal):
1643 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1644 switch (PackCategoriesIntoKey(category, rhs.category)) {
1646 llvm_unreachable(nullptr);
1648 case PackCategoriesIntoKey(fcNaN, fcZero):
1649 case PackCategoriesIntoKey(fcNaN, fcNormal):
1650 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1651 case PackCategoriesIntoKey(fcNaN, fcNaN):
1652 case PackCategoriesIntoKey(fcZero, fcInfinity):
1653 case PackCategoriesIntoKey(fcZero, fcNormal):
1654 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1657 case PackCategoriesIntoKey(fcZero, fcNaN):
1658 case PackCategoriesIntoKey(fcNormal, fcNaN):
1659 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1662 copySignificand(rhs);
1665 case PackCategoriesIntoKey(fcNormal, fcZero):
1666 case PackCategoriesIntoKey(fcInfinity, fcZero):
1667 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1668 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1669 case PackCategoriesIntoKey(fcZero, fcZero):
1673 case PackCategoriesIntoKey(fcNormal, fcNormal):
1679 void IEEEFloat::changeSign() {
1680 /* Look mummy, this one's easy. */
1684 /* Normalized addition or subtraction. */
1685 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
1686 roundingMode rounding_mode,
1690 fs = addOrSubtractSpecials(rhs, subtract);
1692 /* This return code means it was not a simple case. */
1693 if (fs == opDivByZero) {
1694 lostFraction lost_fraction;
1696 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1697 fs = normalize(rounding_mode, lost_fraction);
1699 /* Can only be zero if we lost no fraction. */
1700 assert(category != fcZero || lost_fraction == lfExactlyZero);
1703 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1704 positive zero unless rounding to minus infinity, except that
1705 adding two like-signed zeroes gives that zero. */
1706 if (category == fcZero) {
1707 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1708 sign = (rounding_mode == rmTowardNegative);
1714 /* Normalized addition. */
1715 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
1716 roundingMode rounding_mode) {
1717 return addOrSubtract(rhs, rounding_mode, false);
1720 /* Normalized subtraction. */
1721 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
1722 roundingMode rounding_mode) {
1723 return addOrSubtract(rhs, rounding_mode, true);
1726 /* Normalized multiply. */
1727 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
1728 roundingMode rounding_mode) {
1732 fs = multiplySpecials(rhs);
1734 if (isFiniteNonZero()) {
1735 lostFraction lost_fraction = multiplySignificand(rhs);
1736 fs = normalize(rounding_mode, lost_fraction);
1737 if (lost_fraction != lfExactlyZero)
1738 fs = (opStatus) (fs | opInexact);
1744 /* Normalized divide. */
1745 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
1746 roundingMode rounding_mode) {
1750 fs = divideSpecials(rhs);
1752 if (isFiniteNonZero()) {
1753 lostFraction lost_fraction = divideSignificand(rhs);
1754 fs = normalize(rounding_mode, lost_fraction);
1755 if (lost_fraction != lfExactlyZero)
1756 fs = (opStatus) (fs | opInexact);
1762 /* Normalized remainder. This is not currently correct in all cases. */
1763 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
1765 IEEEFloat V = *this;
1766 unsigned int origSign = sign;
1768 fs = V.divide(rhs, rmNearestTiesToEven);
1769 if (fs == opDivByZero)
1772 int parts = partCount();
1773 integerPart *x = new integerPart[parts];
1775 fs = V.convertToInteger(makeMutableArrayRef(x, parts),
1776 parts * integerPartWidth, true, rmNearestTiesToEven,
1778 if (fs == opInvalidOp) {
1783 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1784 rmNearestTiesToEven);
1785 assert(fs==opOK); // should always work
1787 fs = V.multiply(rhs, rmNearestTiesToEven);
1788 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1790 fs = subtract(V, rmNearestTiesToEven);
1791 assert(fs==opOK || fs==opInexact); // likewise
1794 sign = origSign; // IEEE754 requires this
1799 /* Normalized llvm frem (C fmod). */
1800 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
1802 fs = modSpecials(rhs);
1803 unsigned int origSign = sign;
1805 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
1806 compareAbsoluteValue(rhs) != cmpLessThan) {
1807 IEEEFloat V = scalbn(rhs, ilogb(*this) - ilogb(rhs), rmNearestTiesToEven);
1808 if (compareAbsoluteValue(V) == cmpLessThan)
1809 V = scalbn(V, -1, rmNearestTiesToEven);
1812 fs = subtract(V, rmNearestTiesToEven);
1816 sign = origSign; // fmod requires this
1820 /* Normalized fused-multiply-add. */
1821 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
1822 const IEEEFloat &addend,
1823 roundingMode rounding_mode) {
1826 /* Post-multiplication sign, before addition. */
1827 sign ^= multiplicand.sign;
1829 /* If and only if all arguments are normal do we need to do an
1830 extended-precision calculation. */
1831 if (isFiniteNonZero() &&
1832 multiplicand.isFiniteNonZero() &&
1833 addend.isFinite()) {
1834 lostFraction lost_fraction;
1836 lost_fraction = multiplySignificand(multiplicand, addend);
1837 fs = normalize(rounding_mode, lost_fraction);
1838 if (lost_fraction != lfExactlyZero)
1839 fs = (opStatus) (fs | opInexact);
1841 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1842 positive zero unless rounding to minus infinity, except that
1843 adding two like-signed zeroes gives that zero. */
1844 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
1845 sign = (rounding_mode == rmTowardNegative);
1847 fs = multiplySpecials(multiplicand);
1849 /* FS can only be opOK or opInvalidOp. There is no more work
1850 to do in the latter case. The IEEE-754R standard says it is
1851 implementation-defined in this case whether, if ADDEND is a
1852 quiet NaN, we raise invalid op; this implementation does so.
1854 If we need to do the addition we can do so with normal
1857 fs = addOrSubtract(addend, rounding_mode, false);
1863 /* Rounding-mode corrrect round to integral value. */
1864 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
1867 // If the exponent is large enough, we know that this value is already
1868 // integral, and the arithmetic below would potentially cause it to saturate
1869 // to +/-Inf. Bail out early instead.
1870 if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics))
1873 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1874 // precision of our format, and then subtract it back off again. The choice
1875 // of rounding modes for the addition/subtraction determines the rounding mode
1876 // for our integral rounding as well.
1877 // NOTE: When the input value is negative, we do subtraction followed by
1878 // addition instead.
1879 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1880 IntegerConstant <<= semanticsPrecision(*semantics)-1;
1881 IEEEFloat MagicConstant(*semantics);
1882 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1883 rmNearestTiesToEven);
1884 MagicConstant.sign = sign;
1889 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1890 bool inputSign = isNegative();
1892 fs = add(MagicConstant, rounding_mode);
1893 if (fs != opOK && fs != opInexact)
1896 fs = subtract(MagicConstant, rounding_mode);
1898 // Restore the input sign.
1899 if (inputSign != isNegative())
1906 /* Comparison requires normalized numbers. */
1907 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
1910 assert(semantics == rhs.semantics);
1912 switch (PackCategoriesIntoKey(category, rhs.category)) {
1914 llvm_unreachable(nullptr);
1916 case PackCategoriesIntoKey(fcNaN, fcZero):
1917 case PackCategoriesIntoKey(fcNaN, fcNormal):
1918 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1919 case PackCategoriesIntoKey(fcNaN, fcNaN):
1920 case PackCategoriesIntoKey(fcZero, fcNaN):
1921 case PackCategoriesIntoKey(fcNormal, fcNaN):
1922 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1923 return cmpUnordered;
1925 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1926 case PackCategoriesIntoKey(fcInfinity, fcZero):
1927 case PackCategoriesIntoKey(fcNormal, fcZero):
1931 return cmpGreaterThan;
1933 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1934 case PackCategoriesIntoKey(fcZero, fcInfinity):
1935 case PackCategoriesIntoKey(fcZero, fcNormal):
1937 return cmpGreaterThan;
1941 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1942 if (sign == rhs.sign)
1947 return cmpGreaterThan;
1949 case PackCategoriesIntoKey(fcZero, fcZero):
1952 case PackCategoriesIntoKey(fcNormal, fcNormal):
1956 /* Two normal numbers. Do they have the same sign? */
1957 if (sign != rhs.sign) {
1959 result = cmpLessThan;
1961 result = cmpGreaterThan;
1963 /* Compare absolute values; invert result if negative. */
1964 result = compareAbsoluteValue(rhs);
1967 if (result == cmpLessThan)
1968 result = cmpGreaterThan;
1969 else if (result == cmpGreaterThan)
1970 result = cmpLessThan;
1977 /// IEEEFloat::convert - convert a value of one floating point type to another.
1978 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1979 /// records whether the transformation lost information, i.e. whether
1980 /// converting the result back to the original type will produce the
1981 /// original value (this is almost the same as return value==fsOK, but there
1982 /// are edge cases where this is not so).
1984 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
1985 roundingMode rounding_mode,
1987 lostFraction lostFraction;
1988 unsigned int newPartCount, oldPartCount;
1991 const fltSemantics &fromSemantics = *semantics;
1993 lostFraction = lfExactlyZero;
1994 newPartCount = partCountForBits(toSemantics.precision + 1);
1995 oldPartCount = partCount();
1996 shift = toSemantics.precision - fromSemantics.precision;
1998 bool X86SpecialNan = false;
1999 if (&fromSemantics == &semX87DoubleExtended &&
2000 &toSemantics != &semX87DoubleExtended && category == fcNaN &&
2001 (!(*significandParts() & 0x8000000000000000ULL) ||
2002 !(*significandParts() & 0x4000000000000000ULL))) {
2003 // x86 has some unusual NaNs which cannot be represented in any other
2004 // format; note them here.
2005 X86SpecialNan = true;
2008 // If this is a truncation of a denormal number, and the target semantics
2009 // has larger exponent range than the source semantics (this can happen
2010 // when truncating from PowerPC double-double to double format), the
2011 // right shift could lose result mantissa bits. Adjust exponent instead
2012 // of performing excessive shift.
2013 if (shift < 0 && isFiniteNonZero()) {
2014 int exponentChange = significandMSB() + 1 - fromSemantics.precision;
2015 if (exponent + exponentChange < toSemantics.minExponent)
2016 exponentChange = toSemantics.minExponent - exponent;
2017 if (exponentChange < shift)
2018 exponentChange = shift;
2019 if (exponentChange < 0) {
2020 shift -= exponentChange;
2021 exponent += exponentChange;
2025 // If this is a truncation, perform the shift before we narrow the storage.
2026 if (shift < 0 && (isFiniteNonZero() || category==fcNaN))
2027 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2029 // Fix the storage so it can hold to new value.
2030 if (newPartCount > oldPartCount) {
2031 // The new type requires more storage; make it available.
2032 integerPart *newParts;
2033 newParts = new integerPart[newPartCount];
2034 APInt::tcSet(newParts, 0, newPartCount);
2035 if (isFiniteNonZero() || category==fcNaN)
2036 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2038 significand.parts = newParts;
2039 } else if (newPartCount == 1 && oldPartCount != 1) {
2040 // Switch to built-in storage for a single part.
2041 integerPart newPart = 0;
2042 if (isFiniteNonZero() || category==fcNaN)
2043 newPart = significandParts()[0];
2045 significand.part = newPart;
2048 // Now that we have the right storage, switch the semantics.
2049 semantics = &toSemantics;
2051 // If this is an extension, perform the shift now that the storage is
2053 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2054 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2056 if (isFiniteNonZero()) {
2057 fs = normalize(rounding_mode, lostFraction);
2058 *losesInfo = (fs != opOK);
2059 } else if (category == fcNaN) {
2060 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2062 // For x87 extended precision, we want to make a NaN, not a special NaN if
2063 // the input wasn't special either.
2064 if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2065 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2067 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
2068 // does not give you back the same bits. This is dubious, and we
2069 // don't currently do it. You're really supposed to get
2070 // an invalid operation signal at runtime, but nobody does that.
2080 /* Convert a floating point number to an integer according to the
2081 rounding mode. If the rounded integer value is out of range this
2082 returns an invalid operation exception and the contents of the
2083 destination parts are unspecified. If the rounded value is in
2084 range but the floating point number is not the exact integer, the C
2085 standard doesn't require an inexact exception to be raised. IEEE
2086 854 does require it so we do that.
2088 Note that for conversions to integer type the C standard requires
2089 round-to-zero to always be used. */
2090 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2091 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2092 roundingMode rounding_mode, bool *isExact) const {
2093 lostFraction lost_fraction;
2094 const integerPart *src;
2095 unsigned int dstPartsCount, truncatedBits;
2099 /* Handle the three special cases first. */
2100 if (category == fcInfinity || category == fcNaN)
2103 dstPartsCount = partCountForBits(width);
2104 assert(dstPartsCount <= parts.size() && "Integer too big");
2106 if (category == fcZero) {
2107 APInt::tcSet(parts.data(), 0, dstPartsCount);
2108 // Negative zero can't be represented as an int.
2113 src = significandParts();
2115 /* Step 1: place our absolute value, with any fraction truncated, in
2118 /* Our absolute value is less than one; truncate everything. */
2119 APInt::tcSet(parts.data(), 0, dstPartsCount);
2120 /* For exponent -1 the integer bit represents .5, look at that.
2121 For smaller exponents leftmost truncated bit is 0. */
2122 truncatedBits = semantics->precision -1U - exponent;
2124 /* We want the most significant (exponent + 1) bits; the rest are
2126 unsigned int bits = exponent + 1U;
2128 /* Hopelessly large in magnitude? */
2132 if (bits < semantics->precision) {
2133 /* We truncate (semantics->precision - bits) bits. */
2134 truncatedBits = semantics->precision - bits;
2135 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2137 /* We want at least as many bits as are available. */
2138 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2140 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2141 bits - semantics->precision);
2146 /* Step 2: work out any lost fraction, and increment the absolute
2147 value if we would round away from zero. */
2148 if (truncatedBits) {
2149 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2151 if (lost_fraction != lfExactlyZero &&
2152 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2153 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2154 return opInvalidOp; /* Overflow. */
2157 lost_fraction = lfExactlyZero;
2160 /* Step 3: check if we fit in the destination. */
2161 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2165 /* Negative numbers cannot be represented as unsigned. */
2169 /* It takes omsb bits to represent the unsigned integer value.
2170 We lose a bit for the sign, but care is needed as the
2171 maximally negative integer is a special case. */
2172 if (omsb == width &&
2173 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2176 /* This case can happen because of rounding. */
2181 APInt::tcNegate (parts.data(), dstPartsCount);
2183 if (omsb >= width + !isSigned)
2187 if (lost_fraction == lfExactlyZero) {
2194 /* Same as convertToSignExtendedInteger, except we provide
2195 deterministic values in case of an invalid operation exception,
2196 namely zero for NaNs and the minimal or maximal value respectively
2197 for underflow or overflow.
2198 The *isExact output tells whether the result is exact, in the sense
2199 that converting it back to the original floating point type produces
2200 the original value. This is almost equivalent to result==opOK,
2201 except for negative zeroes.
2204 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2205 unsigned int width, bool isSigned,
2206 roundingMode rounding_mode, bool *isExact) const {
2209 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2212 if (fs == opInvalidOp) {
2213 unsigned int bits, dstPartsCount;
2215 dstPartsCount = partCountForBits(width);
2216 assert(dstPartsCount <= parts.size() && "Integer too big");
2218 if (category == fcNaN)
2223 bits = width - isSigned;
2225 APInt::tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2226 if (sign && isSigned)
2227 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2233 /* Convert an unsigned integer SRC to a floating point number,
2234 rounding according to ROUNDING_MODE. The sign of the floating
2235 point number is not modified. */
2236 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2237 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2238 unsigned int omsb, precision, dstCount;
2240 lostFraction lost_fraction;
2242 category = fcNormal;
2243 omsb = APInt::tcMSB(src, srcCount) + 1;
2244 dst = significandParts();
2245 dstCount = partCount();
2246 precision = semantics->precision;
2248 /* We want the most significant PRECISION bits of SRC. There may not
2249 be that many; extract what we can. */
2250 if (precision <= omsb) {
2251 exponent = omsb - 1;
2252 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2254 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2256 exponent = precision - 1;
2257 lost_fraction = lfExactlyZero;
2258 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2261 return normalize(rounding_mode, lost_fraction);
2264 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2265 roundingMode rounding_mode) {
2266 unsigned int partCount = Val.getNumWords();
2270 if (isSigned && api.isNegative()) {
2275 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2278 /* Convert a two's complement integer SRC to a floating point number,
2279 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2280 integer is signed, in which case it must be sign-extended. */
2282 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2283 unsigned int srcCount, bool isSigned,
2284 roundingMode rounding_mode) {
2288 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2291 /* If we're signed and negative negate a copy. */
2293 copy = new integerPart[srcCount];
2294 APInt::tcAssign(copy, src, srcCount);
2295 APInt::tcNegate(copy, srcCount);
2296 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2300 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2306 /* FIXME: should this just take a const APInt reference? */
2308 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2309 unsigned int width, bool isSigned,
2310 roundingMode rounding_mode) {
2311 unsigned int partCount = partCountForBits(width);
2312 APInt api = APInt(width, makeArrayRef(parts, partCount));
2315 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2320 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2323 Expected<IEEEFloat::opStatus>
2324 IEEEFloat::convertFromHexadecimalString(StringRef s,
2325 roundingMode rounding_mode) {
2326 lostFraction lost_fraction = lfExactlyZero;
2328 category = fcNormal;
2332 integerPart *significand = significandParts();
2333 unsigned partsCount = partCount();
2334 unsigned bitPos = partsCount * integerPartWidth;
2335 bool computedTrailingFraction = false;
2337 // Skip leading zeroes and any (hexa)decimal point.
2338 StringRef::iterator begin = s.begin();
2339 StringRef::iterator end = s.end();
2340 StringRef::iterator dot;
2341 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2343 return PtrOrErr.takeError();
2344 StringRef::iterator p = *PtrOrErr;
2345 StringRef::iterator firstSignificantDigit = p;
2348 integerPart hex_value;
2352 return createError("String contains multiple dots");
2357 hex_value = hexDigitValue(*p);
2358 if (hex_value == -1U)
2363 // Store the number while we have space.
2366 hex_value <<= bitPos % integerPartWidth;
2367 significand[bitPos / integerPartWidth] |= hex_value;
2368 } else if (!computedTrailingFraction) {
2369 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2371 return FractOrErr.takeError();
2372 lost_fraction = *FractOrErr;
2373 computedTrailingFraction = true;
2377 /* Hex floats require an exponent but not a hexadecimal point. */
2379 return createError("Hex strings require an exponent");
2380 if (*p != 'p' && *p != 'P')
2381 return createError("Invalid character in significand");
2383 return createError("Significand has no digits");
2384 if (dot != end && p - begin == 1)
2385 return createError("Significand has no digits");
2387 /* Ignore the exponent if we are zero. */
2388 if (p != firstSignificantDigit) {
2391 /* Implicit hexadecimal point? */
2395 /* Calculate the exponent adjustment implicit in the number of
2396 significant digits. */
2397 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2398 if (expAdjustment < 0)
2400 expAdjustment = expAdjustment * 4 - 1;
2402 /* Adjust for writing the significand starting at the most
2403 significant nibble. */
2404 expAdjustment += semantics->precision;
2405 expAdjustment -= partsCount * integerPartWidth;
2407 /* Adjust for the given exponent. */
2408 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2410 return ExpOrErr.takeError();
2411 exponent = *ExpOrErr;
2414 return normalize(rounding_mode, lost_fraction);
2418 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2419 unsigned sigPartCount, int exp,
2420 roundingMode rounding_mode) {
2421 unsigned int parts, pow5PartCount;
2422 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2423 integerPart pow5Parts[maxPowerOfFiveParts];
2426 isNearest = (rounding_mode == rmNearestTiesToEven ||
2427 rounding_mode == rmNearestTiesToAway);
2429 parts = partCountForBits(semantics->precision + 11);
2431 /* Calculate pow(5, abs(exp)). */
2432 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2434 for (;; parts *= 2) {
2435 opStatus sigStatus, powStatus;
2436 unsigned int excessPrecision, truncatedBits;
2438 calcSemantics.precision = parts * integerPartWidth - 1;
2439 excessPrecision = calcSemantics.precision - semantics->precision;
2440 truncatedBits = excessPrecision;
2442 IEEEFloat decSig(calcSemantics, uninitialized);
2443 decSig.makeZero(sign);
2444 IEEEFloat pow5(calcSemantics);
2446 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2447 rmNearestTiesToEven);
2448 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2449 rmNearestTiesToEven);
2450 /* Add exp, as 10^n = 5^n * 2^n. */
2451 decSig.exponent += exp;
2453 lostFraction calcLostFraction;
2454 integerPart HUerr, HUdistance;
2455 unsigned int powHUerr;
2458 /* multiplySignificand leaves the precision-th bit set to 1. */
2459 calcLostFraction = decSig.multiplySignificand(pow5);
2460 powHUerr = powStatus != opOK;
2462 calcLostFraction = decSig.divideSignificand(pow5);
2463 /* Denormal numbers have less precision. */
2464 if (decSig.exponent < semantics->minExponent) {
2465 excessPrecision += (semantics->minExponent - decSig.exponent);
2466 truncatedBits = excessPrecision;
2467 if (excessPrecision > calcSemantics.precision)
2468 excessPrecision = calcSemantics.precision;
2470 /* Extra half-ulp lost in reciprocal of exponent. */
2471 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2474 /* Both multiplySignificand and divideSignificand return the
2475 result with the integer bit set. */
2476 assert(APInt::tcExtractBit
2477 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2479 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2481 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2482 excessPrecision, isNearest);
2484 /* Are we guaranteed to round correctly if we truncate? */
2485 if (HUdistance >= HUerr) {
2486 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2487 calcSemantics.precision - excessPrecision,
2489 /* Take the exponent of decSig. If we tcExtract-ed less bits
2490 above we must adjust our exponent to compensate for the
2491 implicit right shift. */
2492 exponent = (decSig.exponent + semantics->precision
2493 - (calcSemantics.precision - excessPrecision));
2494 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2497 return normalize(rounding_mode, calcLostFraction);
2502 Expected<IEEEFloat::opStatus>
2503 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2507 /* Scan the text. */
2508 StringRef::iterator p = str.begin();
2509 if (Error Err = interpretDecimal(p, str.end(), &D))
2510 return std::move(Err);
2512 /* Handle the quick cases. First the case of no significant digits,
2513 i.e. zero, and then exponents that are obviously too large or too
2514 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2515 definitely overflows if
2517 (exp - 1) * L >= maxExponent
2519 and definitely underflows to zero where
2521 (exp + 1) * L <= minExponent - precision
2523 With integer arithmetic the tightest bounds for L are
2525 93/28 < L < 196/59 [ numerator <= 256 ]
2526 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2529 // Test if we have a zero number allowing for strings with no null terminators
2530 // and zero decimals with non-zero exponents.
2532 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2533 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2534 // be at most one dot. On the other hand, if we have a zero with a non-zero
2535 // exponent, then we know that D.firstSigDigit will be non-numeric.
2536 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2540 /* Check whether the normalized exponent is high enough to overflow
2541 max during the log-rebasing in the max-exponent check below. */
2542 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2543 fs = handleOverflow(rounding_mode);
2545 /* If it wasn't, then it also wasn't high enough to overflow max
2546 during the log-rebasing in the min-exponent check. Check that it
2547 won't overflow min in either check, then perform the min-exponent
2549 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2550 (D.normalizedExponent + 1) * 28738 <=
2551 8651 * (semantics->minExponent - (int) semantics->precision)) {
2552 /* Underflow to zero and round. */
2553 category = fcNormal;
2555 fs = normalize(rounding_mode, lfLessThanHalf);
2557 /* We can finally safely perform the max-exponent check. */
2558 } else if ((D.normalizedExponent - 1) * 42039
2559 >= 12655 * semantics->maxExponent) {
2560 /* Overflow and round. */
2561 fs = handleOverflow(rounding_mode);
2563 integerPart *decSignificand;
2564 unsigned int partCount;
2566 /* A tight upper bound on number of bits required to hold an
2567 N-digit decimal integer is N * 196 / 59. Allocate enough space
2568 to hold the full significand, and an extra part required by
2570 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2571 partCount = partCountForBits(1 + 196 * partCount / 59);
2572 decSignificand = new integerPart[partCount + 1];
2575 /* Convert to binary efficiently - we do almost all multiplication
2576 in an integerPart. When this would overflow do we do a single
2577 bignum multiplication, and then revert again to multiplication
2578 in an integerPart. */
2580 integerPart decValue, val, multiplier;
2588 if (p == str.end()) {
2592 decValue = decDigitValue(*p++);
2593 if (decValue >= 10U) {
2594 delete[] decSignificand;
2595 return createError("Invalid character in significand");
2598 val = val * 10 + decValue;
2599 /* The maximum number that can be multiplied by ten with any
2600 digit added without overflowing an integerPart. */
2601 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2603 /* Multiply out the current part. */
2604 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2605 partCount, partCount + 1, false);
2607 /* If we used another part (likely but not guaranteed), increase
2609 if (decSignificand[partCount])
2611 } while (p <= D.lastSigDigit);
2613 category = fcNormal;
2614 fs = roundSignificandWithExponent(decSignificand, partCount,
2615 D.exponent, rounding_mode);
2617 delete [] decSignificand;
2623 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
2624 if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) {
2629 if (str.equals("-inf") || str.equals("-INFINITY") || str.equals("-Inf")) {
2634 if (str.equals("nan") || str.equals("NaN")) {
2635 makeNaN(false, false);
2639 if (str.equals("-nan") || str.equals("-NaN")) {
2640 makeNaN(false, true);
2647 Expected<IEEEFloat::opStatus>
2648 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) {
2650 return createError("Invalid string length");
2652 // Handle special cases.
2653 if (convertFromStringSpecials(str))
2656 /* Handle a leading minus sign. */
2657 StringRef::iterator p = str.begin();
2658 size_t slen = str.size();
2659 sign = *p == '-' ? 1 : 0;
2660 if (*p == '-' || *p == '+') {
2664 return createError("String has no digits");
2667 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2669 return createError("Invalid string");
2670 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2674 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2677 /* Write out a hexadecimal representation of the floating point value
2678 to DST, which must be of sufficient size, in the C99 form
2679 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2680 excluding the terminating NUL.
2682 If UPPERCASE, the output is in upper case, otherwise in lower case.
2684 HEXDIGITS digits appear altogether, rounding the value if
2685 necessary. If HEXDIGITS is 0, the minimal precision to display the
2686 number precisely is used instead. If nothing would appear after
2687 the decimal point it is suppressed.
2689 The decimal exponent is always printed and has at least one digit.
2690 Zero values display an exponent of zero. Infinities and NaNs
2691 appear as "infinity" or "nan" respectively.
2693 The above rules are as specified by C99. There is ambiguity about
2694 what the leading hexadecimal digit should be. This implementation
2695 uses whatever is necessary so that the exponent is displayed as
2696 stored. This implies the exponent will fall within the IEEE format
2697 range, and the leading hexadecimal digit will be 0 (for denormals),
2698 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2699 any other digits zero).
2701 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
2703 roundingMode rounding_mode) const {
2712 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2713 dst += sizeof infinityL - 1;
2717 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2718 dst += sizeof NaNU - 1;
2723 *dst++ = upperCase ? 'X': 'x';
2725 if (hexDigits > 1) {
2727 memset (dst, '0', hexDigits - 1);
2728 dst += hexDigits - 1;
2730 *dst++ = upperCase ? 'P': 'p';
2735 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2741 return static_cast<unsigned int>(dst - p);
2744 /* Does the hard work of outputting the correctly rounded hexadecimal
2745 form of a normal floating point number with the specified number of
2746 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2747 digits necessary to print the value precisely is output. */
2748 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2750 roundingMode rounding_mode) const {
2751 unsigned int count, valueBits, shift, partsCount, outputDigits;
2752 const char *hexDigitChars;
2753 const integerPart *significand;
2758 *dst++ = upperCase ? 'X': 'x';
2761 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2763 significand = significandParts();
2764 partsCount = partCount();
2766 /* +3 because the first digit only uses the single integer bit, so
2767 we have 3 virtual zero most-significant-bits. */
2768 valueBits = semantics->precision + 3;
2769 shift = integerPartWidth - valueBits % integerPartWidth;
2771 /* The natural number of digits required ignoring trailing
2772 insignificant zeroes. */
2773 outputDigits = (valueBits - significandLSB () + 3) / 4;
2775 /* hexDigits of zero means use the required number for the
2776 precision. Otherwise, see if we are truncating. If we are,
2777 find out if we need to round away from zero. */
2779 if (hexDigits < outputDigits) {
2780 /* We are dropping non-zero bits, so need to check how to round.
2781 "bits" is the number of dropped bits. */
2783 lostFraction fraction;
2785 bits = valueBits - hexDigits * 4;
2786 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2787 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2789 outputDigits = hexDigits;
2792 /* Write the digits consecutively, and start writing in the location
2793 of the hexadecimal point. We move the most significant digit
2794 left and add the hexadecimal point later. */
2797 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2799 while (outputDigits && count) {
2802 /* Put the most significant integerPartWidth bits in "part". */
2803 if (--count == partsCount)
2804 part = 0; /* An imaginary higher zero part. */
2806 part = significand[count] << shift;
2809 part |= significand[count - 1] >> (integerPartWidth - shift);
2811 /* Convert as much of "part" to hexdigits as we can. */
2812 unsigned int curDigits = integerPartWidth / 4;
2814 if (curDigits > outputDigits)
2815 curDigits = outputDigits;
2816 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2817 outputDigits -= curDigits;
2823 /* Note that hexDigitChars has a trailing '0'. */
2826 *q = hexDigitChars[hexDigitValue (*q) + 1];
2827 } while (*q == '0');
2830 /* Add trailing zeroes. */
2831 memset (dst, '0', outputDigits);
2832 dst += outputDigits;
2835 /* Move the most significant digit to before the point, and if there
2836 is something after the decimal point add it. This must come
2837 after rounding above. */
2844 /* Finally output the exponent. */
2845 *dst++ = upperCase ? 'P': 'p';
2847 return writeSignedDecimal (dst, exponent);
2850 hash_code hash_value(const IEEEFloat &Arg) {
2851 if (!Arg.isFiniteNonZero())
2852 return hash_combine((uint8_t)Arg.category,
2853 // NaN has no sign, fix it at zero.
2854 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2855 Arg.semantics->precision);
2857 // Normal floats need their exponent and significand hashed.
2858 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2859 Arg.semantics->precision, Arg.exponent,
2861 Arg.significandParts(),
2862 Arg.significandParts() + Arg.partCount()));
2865 // Conversion from APFloat to/from host float/double. It may eventually be
2866 // possible to eliminate these and have everybody deal with APFloats, but that
2867 // will take a while. This approach will not easily extend to long double.
2868 // Current implementation requires integerPartWidth==64, which is correct at
2869 // the moment but could be made more general.
2871 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2872 // the actual IEEE respresentations. We compensate for that here.
2874 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
2875 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
2876 assert(partCount()==2);
2878 uint64_t myexponent, mysignificand;
2880 if (isFiniteNonZero()) {
2881 myexponent = exponent+16383; //bias
2882 mysignificand = significandParts()[0];
2883 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2884 myexponent = 0; // denormal
2885 } else if (category==fcZero) {
2888 } else if (category==fcInfinity) {
2889 myexponent = 0x7fff;
2890 mysignificand = 0x8000000000000000ULL;
2892 assert(category == fcNaN && "Unknown category");
2893 myexponent = 0x7fff;
2894 mysignificand = significandParts()[0];
2898 words[0] = mysignificand;
2899 words[1] = ((uint64_t)(sign & 1) << 15) |
2900 (myexponent & 0x7fffLL);
2901 return APInt(80, words);
2904 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
2905 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
2906 assert(partCount()==2);
2912 // Convert number to double. To avoid spurious underflows, we re-
2913 // normalize against the "double" minExponent first, and only *then*
2914 // truncate the mantissa. The result of that second conversion
2915 // may be inexact, but should never underflow.
2916 // Declare fltSemantics before APFloat that uses it (and
2917 // saves pointer to it) to ensure correct destruction order.
2918 fltSemantics extendedSemantics = *semantics;
2919 extendedSemantics.minExponent = semIEEEdouble.minExponent;
2920 IEEEFloat extended(*this);
2921 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2922 assert(fs == opOK && !losesInfo);
2925 IEEEFloat u(extended);
2926 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
2927 assert(fs == opOK || fs == opInexact);
2929 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2931 // If conversion was exact or resulted in a special case, we're done;
2932 // just set the second double to zero. Otherwise, re-convert back to
2933 // the extended format and compute the difference. This now should
2934 // convert exactly to double.
2935 if (u.isFiniteNonZero() && losesInfo) {
2936 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2937 assert(fs == opOK && !losesInfo);
2940 IEEEFloat v(extended);
2941 v.subtract(u, rmNearestTiesToEven);
2942 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
2943 assert(fs == opOK && !losesInfo);
2945 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2950 return APInt(128, words);
2953 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
2954 assert(semantics == (const llvm::fltSemantics*)&semIEEEquad);
2955 assert(partCount()==2);
2957 uint64_t myexponent, mysignificand, mysignificand2;
2959 if (isFiniteNonZero()) {
2960 myexponent = exponent+16383; //bias
2961 mysignificand = significandParts()[0];
2962 mysignificand2 = significandParts()[1];
2963 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2964 myexponent = 0; // denormal
2965 } else if (category==fcZero) {
2967 mysignificand = mysignificand2 = 0;
2968 } else if (category==fcInfinity) {
2969 myexponent = 0x7fff;
2970 mysignificand = mysignificand2 = 0;
2972 assert(category == fcNaN && "Unknown category!");
2973 myexponent = 0x7fff;
2974 mysignificand = significandParts()[0];
2975 mysignificand2 = significandParts()[1];
2979 words[0] = mysignificand;
2980 words[1] = ((uint64_t)(sign & 1) << 63) |
2981 ((myexponent & 0x7fff) << 48) |
2982 (mysignificand2 & 0xffffffffffffLL);
2984 return APInt(128, words);
2987 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
2988 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble);
2989 assert(partCount()==1);
2991 uint64_t myexponent, mysignificand;
2993 if (isFiniteNonZero()) {
2994 myexponent = exponent+1023; //bias
2995 mysignificand = *significandParts();
2996 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2997 myexponent = 0; // denormal
2998 } else if (category==fcZero) {
3001 } else if (category==fcInfinity) {
3005 assert(category == fcNaN && "Unknown category!");
3007 mysignificand = *significandParts();
3010 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
3011 ((myexponent & 0x7ff) << 52) |
3012 (mysignificand & 0xfffffffffffffLL))));
3015 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3016 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle);
3017 assert(partCount()==1);
3019 uint32_t myexponent, mysignificand;
3021 if (isFiniteNonZero()) {
3022 myexponent = exponent+127; //bias
3023 mysignificand = (uint32_t)*significandParts();
3024 if (myexponent == 1 && !(mysignificand & 0x800000))
3025 myexponent = 0; // denormal
3026 } else if (category==fcZero) {
3029 } else if (category==fcInfinity) {
3033 assert(category == fcNaN && "Unknown category!");
3035 mysignificand = (uint32_t)*significandParts();
3038 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
3039 (mysignificand & 0x7fffff)));
3042 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3043 assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf);
3044 assert(partCount()==1);
3046 uint32_t myexponent, mysignificand;
3048 if (isFiniteNonZero()) {
3049 myexponent = exponent+15; //bias
3050 mysignificand = (uint32_t)*significandParts();
3051 if (myexponent == 1 && !(mysignificand & 0x400))
3052 myexponent = 0; // denormal
3053 } else if (category==fcZero) {
3056 } else if (category==fcInfinity) {
3060 assert(category == fcNaN && "Unknown category!");
3062 mysignificand = (uint32_t)*significandParts();
3065 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
3066 (mysignificand & 0x3ff)));
3069 // This function creates an APInt that is just a bit map of the floating
3070 // point constant as it would appear in memory. It is not a conversion,
3071 // and treating the result as a normal integer is unlikely to be useful.
3073 APInt IEEEFloat::bitcastToAPInt() const {
3074 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3075 return convertHalfAPFloatToAPInt();
3077 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3078 return convertFloatAPFloatToAPInt();
3080 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3081 return convertDoubleAPFloatToAPInt();
3083 if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3084 return convertQuadrupleAPFloatToAPInt();
3086 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3087 return convertPPCDoubleDoubleAPFloatToAPInt();
3089 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3091 return convertF80LongDoubleAPFloatToAPInt();
3094 float IEEEFloat::convertToFloat() const {
3095 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3096 "Float semantics are not IEEEsingle");
3097 APInt api = bitcastToAPInt();
3098 return api.bitsToFloat();
3101 double IEEEFloat::convertToDouble() const {
3102 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3103 "Float semantics are not IEEEdouble");
3104 APInt api = bitcastToAPInt();
3105 return api.bitsToDouble();
3108 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3109 /// does not support these bit patterns:
3110 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3111 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3112 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3113 /// exponent = 0, integer bit 1 ("pseudodenormal")
3114 /// At the moment, the first three are treated as NaNs, the last one as Normal.
3115 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3116 assert(api.getBitWidth()==80);
3117 uint64_t i1 = api.getRawData()[0];
3118 uint64_t i2 = api.getRawData()[1];
3119 uint64_t myexponent = (i2 & 0x7fff);
3120 uint64_t mysignificand = i1;
3121 uint8_t myintegerbit = mysignificand >> 63;
3123 initialize(&semX87DoubleExtended);
3124 assert(partCount()==2);
3126 sign = static_cast<unsigned int>(i2>>15);
3127 if (myexponent == 0 && mysignificand == 0) {
3128 // exponent, significand meaningless
3130 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3131 // exponent, significand meaningless
3132 category = fcInfinity;
3133 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3134 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3135 // exponent meaningless
3137 significandParts()[0] = mysignificand;
3138 significandParts()[1] = 0;
3140 category = fcNormal;
3141 exponent = myexponent - 16383;
3142 significandParts()[0] = mysignificand;
3143 significandParts()[1] = 0;
3144 if (myexponent==0) // denormal
3149 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3150 assert(api.getBitWidth()==128);
3151 uint64_t i1 = api.getRawData()[0];
3152 uint64_t i2 = api.getRawData()[1];
3156 // Get the first double and convert to our format.
3157 initFromDoubleAPInt(APInt(64, i1));
3158 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3159 assert(fs == opOK && !losesInfo);
3162 // Unless we have a special case, add in second double.
3163 if (isFiniteNonZero()) {
3164 IEEEFloat v(semIEEEdouble, APInt(64, i2));
3165 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3166 assert(fs == opOK && !losesInfo);
3169 add(v, rmNearestTiesToEven);
3173 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3174 assert(api.getBitWidth()==128);
3175 uint64_t i1 = api.getRawData()[0];
3176 uint64_t i2 = api.getRawData()[1];
3177 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3178 uint64_t mysignificand = i1;
3179 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3181 initialize(&semIEEEquad);
3182 assert(partCount()==2);
3184 sign = static_cast<unsigned int>(i2>>63);
3185 if (myexponent==0 &&
3186 (mysignificand==0 && mysignificand2==0)) {
3187 // exponent, significand meaningless
3189 } else if (myexponent==0x7fff &&
3190 (mysignificand==0 && mysignificand2==0)) {
3191 // exponent, significand meaningless
3192 category = fcInfinity;
3193 } else if (myexponent==0x7fff &&
3194 (mysignificand!=0 || mysignificand2 !=0)) {
3195 // exponent meaningless
3197 significandParts()[0] = mysignificand;
3198 significandParts()[1] = mysignificand2;
3200 category = fcNormal;
3201 exponent = myexponent - 16383;
3202 significandParts()[0] = mysignificand;
3203 significandParts()[1] = mysignificand2;
3204 if (myexponent==0) // denormal
3207 significandParts()[1] |= 0x1000000000000LL; // integer bit
3211 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3212 assert(api.getBitWidth()==64);
3213 uint64_t i = *api.getRawData();
3214 uint64_t myexponent = (i >> 52) & 0x7ff;
3215 uint64_t mysignificand = i & 0xfffffffffffffLL;
3217 initialize(&semIEEEdouble);
3218 assert(partCount()==1);
3220 sign = static_cast<unsigned int>(i>>63);
3221 if (myexponent==0 && mysignificand==0) {
3222 // exponent, significand meaningless
3224 } else if (myexponent==0x7ff && mysignificand==0) {
3225 // exponent, significand meaningless
3226 category = fcInfinity;
3227 } else if (myexponent==0x7ff && mysignificand!=0) {
3228 // exponent meaningless
3230 *significandParts() = mysignificand;
3232 category = fcNormal;
3233 exponent = myexponent - 1023;
3234 *significandParts() = mysignificand;
3235 if (myexponent==0) // denormal
3238 *significandParts() |= 0x10000000000000LL; // integer bit
3242 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3243 assert(api.getBitWidth()==32);
3244 uint32_t i = (uint32_t)*api.getRawData();
3245 uint32_t myexponent = (i >> 23) & 0xff;
3246 uint32_t mysignificand = i & 0x7fffff;
3248 initialize(&semIEEEsingle);
3249 assert(partCount()==1);
3252 if (myexponent==0 && mysignificand==0) {
3253 // exponent, significand meaningless
3255 } else if (myexponent==0xff && mysignificand==0) {
3256 // exponent, significand meaningless
3257 category = fcInfinity;
3258 } else if (myexponent==0xff && mysignificand!=0) {
3259 // sign, exponent, significand meaningless
3261 *significandParts() = mysignificand;
3263 category = fcNormal;
3264 exponent = myexponent - 127; //bias
3265 *significandParts() = mysignificand;
3266 if (myexponent==0) // denormal
3269 *significandParts() |= 0x800000; // integer bit
3273 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3274 assert(api.getBitWidth()==16);
3275 uint32_t i = (uint32_t)*api.getRawData();
3276 uint32_t myexponent = (i >> 10) & 0x1f;
3277 uint32_t mysignificand = i & 0x3ff;
3279 initialize(&semIEEEhalf);
3280 assert(partCount()==1);
3283 if (myexponent==0 && mysignificand==0) {
3284 // exponent, significand meaningless
3286 } else if (myexponent==0x1f && mysignificand==0) {
3287 // exponent, significand meaningless
3288 category = fcInfinity;
3289 } else if (myexponent==0x1f && mysignificand!=0) {
3290 // sign, exponent, significand meaningless
3292 *significandParts() = mysignificand;
3294 category = fcNormal;
3295 exponent = myexponent - 15; //bias
3296 *significandParts() = mysignificand;
3297 if (myexponent==0) // denormal
3300 *significandParts() |= 0x400; // integer bit
3304 /// Treat api as containing the bits of a floating point number. Currently
3305 /// we infer the floating point type from the size of the APInt. The
3306 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3307 /// when the size is anything else).
3308 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3309 if (Sem == &semIEEEhalf)
3310 return initFromHalfAPInt(api);
3311 if (Sem == &semIEEEsingle)
3312 return initFromFloatAPInt(api);
3313 if (Sem == &semIEEEdouble)
3314 return initFromDoubleAPInt(api);
3315 if (Sem == &semX87DoubleExtended)
3316 return initFromF80LongDoubleAPInt(api);
3317 if (Sem == &semIEEEquad)
3318 return initFromQuadrupleAPInt(api);
3319 if (Sem == &semPPCDoubleDoubleLegacy)
3320 return initFromPPCDoubleDoubleAPInt(api);
3322 llvm_unreachable(nullptr);
3325 /// Make this number the largest magnitude normal number in the given
3327 void IEEEFloat::makeLargest(bool Negative) {
3328 // We want (in interchange format):
3329 // sign = {Negative}
3331 // significand = 1..1
3332 category = fcNormal;
3334 exponent = semantics->maxExponent;
3336 // Use memset to set all but the highest integerPart to all ones.
3337 integerPart *significand = significandParts();
3338 unsigned PartCount = partCount();
3339 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3341 // Set the high integerPart especially setting all unused top bits for
3342 // internal consistency.
3343 const unsigned NumUnusedHighBits =
3344 PartCount*integerPartWidth - semantics->precision;
3345 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3346 ? (~integerPart(0) >> NumUnusedHighBits)
3350 /// Make this number the smallest magnitude denormal number in the given
3352 void IEEEFloat::makeSmallest(bool Negative) {
3353 // We want (in interchange format):
3354 // sign = {Negative}
3356 // significand = 0..01
3357 category = fcNormal;
3359 exponent = semantics->minExponent;
3360 APInt::tcSet(significandParts(), 1, partCount());
3363 void IEEEFloat::makeSmallestNormalized(bool Negative) {
3364 // We want (in interchange format):
3365 // sign = {Negative}
3367 // significand = 10..0
3369 category = fcNormal;
3372 exponent = semantics->minExponent;
3373 significandParts()[partCountForBits(semantics->precision) - 1] |=
3374 (((integerPart)1) << ((semantics->precision - 1) % integerPartWidth));
3377 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
3378 initFromAPInt(&Sem, API);
3381 IEEEFloat::IEEEFloat(float f) {
3382 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
3385 IEEEFloat::IEEEFloat(double d) {
3386 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
3390 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3391 Buffer.append(Str.begin(), Str.end());
3394 /// Removes data from the given significand until it is no more
3395 /// precise than is required for the desired precision.
3396 void AdjustToPrecision(APInt &significand,
3397 int &exp, unsigned FormatPrecision) {
3398 unsigned bits = significand.getActiveBits();
3400 // 196/59 is a very slight overestimate of lg_2(10).
3401 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3403 if (bits <= bitsRequired) return;
3405 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3406 if (!tensRemovable) return;
3408 exp += tensRemovable;
3410 APInt divisor(significand.getBitWidth(), 1);
3411 APInt powten(significand.getBitWidth(), 10);
3413 if (tensRemovable & 1)
3415 tensRemovable >>= 1;
3416 if (!tensRemovable) break;
3420 significand = significand.udiv(divisor);
3422 // Truncate the significand down to its active bit count.
3423 significand = significand.trunc(significand.getActiveBits());
3427 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3428 int &exp, unsigned FormatPrecision) {
3429 unsigned N = buffer.size();
3430 if (N <= FormatPrecision) return;
3432 // The most significant figures are the last ones in the buffer.
3433 unsigned FirstSignificant = N - FormatPrecision;
3436 // FIXME: this probably shouldn't use 'round half up'.
3438 // Rounding down is just a truncation, except we also want to drop
3439 // trailing zeros from the new result.
3440 if (buffer[FirstSignificant - 1] < '5') {
3441 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3444 exp += FirstSignificant;
3445 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3449 // Rounding up requires a decimal add-with-carry. If we continue
3450 // the carry, the newly-introduced zeros will just be truncated.
3451 for (unsigned I = FirstSignificant; I != N; ++I) {
3452 if (buffer[I] == '9') {
3460 // If we carried through, we have exactly one digit of precision.
3461 if (FirstSignificant == N) {
3462 exp += FirstSignificant;
3464 buffer.push_back('1');
3468 exp += FirstSignificant;
3469 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3473 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
3474 unsigned FormatMaxPadding, bool TruncateZero) const {
3478 return append(Str, "-Inf");
3480 return append(Str, "+Inf");
3482 case fcNaN: return append(Str, "NaN");
3488 if (!FormatMaxPadding) {
3490 append(Str, "0.0E+0");
3493 if (FormatPrecision > 1)
3494 Str.append(FormatPrecision - 1, '0');
3495 append(Str, "e+00");
3508 // Decompose the number into an APInt and an exponent.
3509 int exp = exponent - ((int) semantics->precision - 1);
3510 APInt significand(semantics->precision,
3511 makeArrayRef(significandParts(),
3512 partCountForBits(semantics->precision)));
3514 // Set FormatPrecision if zero. We want to do this before we
3515 // truncate trailing zeros, as those are part of the precision.
3516 if (!FormatPrecision) {
3517 // We use enough digits so the number can be round-tripped back to an
3518 // APFloat. The formula comes from "How to Print Floating-Point Numbers
3519 // Accurately" by Steele and White.
3520 // FIXME: Using a formula based purely on the precision is conservative;
3521 // we can print fewer digits depending on the actual value being printed.
3523 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
3524 FormatPrecision = 2 + semantics->precision * 59 / 196;
3527 // Ignore trailing binary zeros.
3528 int trailingZeros = significand.countTrailingZeros();
3529 exp += trailingZeros;
3530 significand.lshrInPlace(trailingZeros);
3532 // Change the exponent from 2^e to 10^e.
3535 } else if (exp > 0) {
3537 significand = significand.zext(semantics->precision + exp);
3538 significand <<= exp;
3540 } else { /* exp < 0 */
3543 // We transform this using the identity:
3544 // (N)(2^-e) == (N)(5^e)(10^-e)
3545 // This means we have to multiply N (the significand) by 5^e.
3546 // To avoid overflow, we have to operate on numbers large
3547 // enough to store N * 5^e:
3548 // log2(N * 5^e) == log2(N) + e * log2(5)
3549 // <= semantics->precision + e * 137 / 59
3550 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3552 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3554 // Multiply significand by 5^e.
3555 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3556 significand = significand.zext(precision);
3557 APInt five_to_the_i(precision, 5);
3559 if (texp & 1) significand *= five_to_the_i;
3563 five_to_the_i *= five_to_the_i;
3567 AdjustToPrecision(significand, exp, FormatPrecision);
3569 SmallVector<char, 256> buffer;
3572 unsigned precision = significand.getBitWidth();
3573 APInt ten(precision, 10);
3574 APInt digit(precision, 0);
3576 bool inTrail = true;
3577 while (significand != 0) {
3578 // digit <- significand % 10
3579 // significand <- significand / 10
3580 APInt::udivrem(significand, ten, significand, digit);
3582 unsigned d = digit.getZExtValue();
3584 // Drop trailing zeros.
3585 if (inTrail && !d) exp++;
3587 buffer.push_back((char) ('0' + d));
3592 assert(!buffer.empty() && "no characters in buffer!");
3594 // Drop down to FormatPrecision.
3595 // TODO: don't do more precise calculations above than are required.
3596 AdjustToPrecision(buffer, exp, FormatPrecision);
3598 unsigned NDigits = buffer.size();
3600 // Check whether we should use scientific notation.
3601 bool FormatScientific;
3602 if (!FormatMaxPadding)
3603 FormatScientific = true;
3608 // But we shouldn't make the number look more precise than it is.
3609 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3610 NDigits + (unsigned) exp > FormatPrecision);
3612 // Power of the most significant digit.
3613 int MSD = exp + (int) (NDigits - 1);
3616 FormatScientific = false;
3618 // 765e-5 == 0.00765
3620 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3625 // Scientific formatting is pretty straightforward.
3626 if (FormatScientific) {
3627 exp += (NDigits - 1);
3629 Str.push_back(buffer[NDigits-1]);
3631 if (NDigits == 1 && TruncateZero)
3634 for (unsigned I = 1; I != NDigits; ++I)
3635 Str.push_back(buffer[NDigits-1-I]);
3636 // Fill with zeros up to FormatPrecision.
3637 if (!TruncateZero && FormatPrecision > NDigits - 1)
3638 Str.append(FormatPrecision - NDigits + 1, '0');
3639 // For !TruncateZero we use lower 'e'.
3640 Str.push_back(TruncateZero ? 'E' : 'e');
3642 Str.push_back(exp >= 0 ? '+' : '-');
3643 if (exp < 0) exp = -exp;
3644 SmallVector<char, 6> expbuf;
3646 expbuf.push_back((char) ('0' + (exp % 10)));
3649 // Exponent always at least two digits if we do not truncate zeros.
3650 if (!TruncateZero && expbuf.size() < 2)
3651 expbuf.push_back('0');
3652 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3653 Str.push_back(expbuf[E-1-I]);
3657 // Non-scientific, positive exponents.
3659 for (unsigned I = 0; I != NDigits; ++I)
3660 Str.push_back(buffer[NDigits-1-I]);
3661 for (unsigned I = 0; I != (unsigned) exp; ++I)
3666 // Non-scientific, negative exponents.
3668 // The number of digits to the left of the decimal point.
3669 int NWholeDigits = exp + (int) NDigits;
3672 if (NWholeDigits > 0) {
3673 for (; I != (unsigned) NWholeDigits; ++I)
3674 Str.push_back(buffer[NDigits-I-1]);
3677 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3681 for (unsigned Z = 1; Z != NZeros; ++Z)
3685 for (; I != NDigits; ++I)
3686 Str.push_back(buffer[NDigits-I-1]);
3689 bool IEEEFloat::getExactInverse(APFloat *inv) const {
3690 // Special floats and denormals have no exact inverse.
3691 if (!isFiniteNonZero())
3694 // Check that the number is a power of two by making sure that only the
3695 // integer bit is set in the significand.
3696 if (significandLSB() != semantics->precision - 1)
3700 IEEEFloat reciprocal(*semantics, 1ULL);
3701 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3704 // Avoid multiplication with a denormal, it is not safe on all platforms and
3705 // may be slower than a normal division.
3706 if (reciprocal.isDenormal())
3709 assert(reciprocal.isFiniteNonZero() &&
3710 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3713 *inv = APFloat(reciprocal, *semantics);
3718 bool IEEEFloat::isSignaling() const {
3722 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
3723 // first bit of the trailing significand being 0.
3724 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
3727 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
3729 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
3730 /// appropriate sign switching before/after the computation.
3731 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
3732 // If we are performing nextDown, swap sign so we have -x.
3736 // Compute nextUp(x)
3737 opStatus result = opOK;
3739 // Handle each float category separately.
3742 // nextUp(+inf) = +inf
3745 // nextUp(-inf) = -getLargest()
3749 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
3750 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
3751 // change the payload.
3752 if (isSignaling()) {
3753 result = opInvalidOp;
3754 // For consistency, propagate the sign of the sNaN to the qNaN.
3755 makeNaN(false, isNegative(), nullptr);
3759 // nextUp(pm 0) = +getSmallest()
3760 makeSmallest(false);
3763 // nextUp(-getSmallest()) = -0
3764 if (isSmallest() && isNegative()) {
3765 APInt::tcSet(significandParts(), 0, partCount());
3771 // nextUp(getLargest()) == INFINITY
3772 if (isLargest() && !isNegative()) {
3773 APInt::tcSet(significandParts(), 0, partCount());
3774 category = fcInfinity;
3775 exponent = semantics->maxExponent + 1;
3779 // nextUp(normal) == normal + inc.
3781 // If we are negative, we need to decrement the significand.
3783 // We only cross a binade boundary that requires adjusting the exponent
3785 // 1. exponent != semantics->minExponent. This implies we are not in the
3786 // smallest binade or are dealing with denormals.
3787 // 2. Our significand excluding the integral bit is all zeros.
3788 bool WillCrossBinadeBoundary =
3789 exponent != semantics->minExponent && isSignificandAllZeros();
3791 // Decrement the significand.
3793 // We always do this since:
3794 // 1. If we are dealing with a non-binade decrement, by definition we
3795 // just decrement the significand.
3796 // 2. If we are dealing with a normal -> normal binade decrement, since
3797 // we have an explicit integral bit the fact that all bits but the
3798 // integral bit are zero implies that subtracting one will yield a
3799 // significand with 0 integral bit and 1 in all other spots. Thus we
3800 // must just adjust the exponent and set the integral bit to 1.
3801 // 3. If we are dealing with a normal -> denormal binade decrement,
3802 // since we set the integral bit to 0 when we represent denormals, we
3803 // just decrement the significand.
3804 integerPart *Parts = significandParts();
3805 APInt::tcDecrement(Parts, partCount());
3807 if (WillCrossBinadeBoundary) {
3808 // Our result is a normal number. Do the following:
3809 // 1. Set the integral bit to 1.
3810 // 2. Decrement the exponent.
3811 APInt::tcSetBit(Parts, semantics->precision - 1);
3815 // If we are positive, we need to increment the significand.
3817 // We only cross a binade boundary that requires adjusting the exponent if
3818 // the input is not a denormal and all of said input's significand bits
3819 // are set. If all of said conditions are true: clear the significand, set
3820 // the integral bit to 1, and increment the exponent. If we have a
3821 // denormal always increment since moving denormals and the numbers in the
3822 // smallest normal binade have the same exponent in our representation.
3823 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
3825 if (WillCrossBinadeBoundary) {
3826 integerPart *Parts = significandParts();
3827 APInt::tcSet(Parts, 0, partCount());
3828 APInt::tcSetBit(Parts, semantics->precision - 1);
3829 assert(exponent != semantics->maxExponent &&
3830 "We can not increment an exponent beyond the maxExponent allowed"
3831 " by the given floating point semantics.");
3834 incrementSignificand();
3840 // If we are performing nextDown, swap sign so we have -nextUp(-x)
3847 void IEEEFloat::makeInf(bool Negative) {
3848 category = fcInfinity;
3850 exponent = semantics->maxExponent + 1;
3851 APInt::tcSet(significandParts(), 0, partCount());
3854 void IEEEFloat::makeZero(bool Negative) {
3857 exponent = semantics->minExponent-1;
3858 APInt::tcSet(significandParts(), 0, partCount());
3861 void IEEEFloat::makeQuiet() {
3863 APInt::tcSetBit(significandParts(), semantics->precision - 2);
3866 int ilogb(const IEEEFloat &Arg) {
3868 return IEEEFloat::IEK_NaN;
3870 return IEEEFloat::IEK_Zero;
3871 if (Arg.isInfinity())
3872 return IEEEFloat::IEK_Inf;
3873 if (!Arg.isDenormal())
3874 return Arg.exponent;
3876 IEEEFloat Normalized(Arg);
3877 int SignificandBits = Arg.getSemantics().precision - 1;
3879 Normalized.exponent += SignificandBits;
3880 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
3881 return Normalized.exponent - SignificandBits;
3884 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
3885 auto MaxExp = X.getSemantics().maxExponent;
3886 auto MinExp = X.getSemantics().minExponent;
3888 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
3889 // overflow; clamp it to a safe range before adding, but ensure that the range
3890 // is large enough that the clamp does not change the result. The range we
3891 // need to support is the difference between the largest possible exponent and
3892 // the normalized exponent of half the smallest denormal.
3894 int SignificandBits = X.getSemantics().precision - 1;
3895 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
3897 // Clamp to one past the range ends to let normalize handle overlflow.
3898 X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement);
3899 X.normalize(RoundingMode, lfExactlyZero);
3905 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
3908 // Quiet signalling nans.
3909 if (Exp == IEEEFloat::IEK_NaN) {
3910 IEEEFloat Quiet(Val);
3915 if (Exp == IEEEFloat::IEK_Inf)
3918 // 1 is added because frexp is defined to return a normalized fraction in
3919 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
3920 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
3921 return scalbn(Val, -Exp, RM);
3924 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
3926 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
3927 assert(Semantics == &semPPCDoubleDouble);
3930 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
3932 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
3933 APFloat(semIEEEdouble, uninitialized)}) {
3934 assert(Semantics == &semPPCDoubleDouble);
3937 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
3938 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
3939 APFloat(semIEEEdouble)}) {
3940 assert(Semantics == &semPPCDoubleDouble);
3943 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
3945 Floats(new APFloat[2]{
3946 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
3947 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
3948 assert(Semantics == &semPPCDoubleDouble);
3951 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
3954 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
3955 assert(Semantics == &semPPCDoubleDouble);
3956 assert(&Floats[0].getSemantics() == &semIEEEdouble);
3957 assert(&Floats[1].getSemantics() == &semIEEEdouble);
3960 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
3961 : Semantics(RHS.Semantics),
3962 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
3963 APFloat(RHS.Floats[1])}
3965 assert(Semantics == &semPPCDoubleDouble);
3968 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
3969 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
3970 RHS.Semantics = &semBogus;
3971 assert(Semantics == &semPPCDoubleDouble);
3974 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
3975 if (Semantics == RHS.Semantics && RHS.Floats) {
3976 Floats[0] = RHS.Floats[0];
3977 Floats[1] = RHS.Floats[1];
3978 } else if (this != &RHS) {
3979 this->~DoubleAPFloat();
3980 new (this) DoubleAPFloat(RHS);
3985 // Implement addition, subtraction, multiplication and division based on:
3986 // "Software for Doubled-Precision Floating-Point Computations",
3987 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
3988 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
3989 const APFloat &c, const APFloat &cc,
3993 Status |= z.add(c, RM);
3994 if (!z.isFinite()) {
3995 if (!z.isInfinity()) {
3996 Floats[0] = std::move(z);
3997 Floats[1].makeZero(/* Neg = */ false);
3998 return (opStatus)Status;
4001 auto AComparedToC = a.compareAbsoluteValue(c);
4003 Status |= z.add(aa, RM);
4004 if (AComparedToC == APFloat::cmpGreaterThan) {
4005 // z = cc + aa + c + a;
4006 Status |= z.add(c, RM);
4007 Status |= z.add(a, RM);
4009 // z = cc + aa + a + c;
4010 Status |= z.add(a, RM);
4011 Status |= z.add(c, RM);
4013 if (!z.isFinite()) {
4014 Floats[0] = std::move(z);
4015 Floats[1].makeZero(/* Neg = */ false);
4016 return (opStatus)Status;
4020 Status |= zz.add(cc, RM);
4021 if (AComparedToC == APFloat::cmpGreaterThan) {
4022 // Floats[1] = a - z + c + zz;
4024 Status |= Floats[1].subtract(z, RM);
4025 Status |= Floats[1].add(c, RM);
4026 Status |= Floats[1].add(zz, RM);
4028 // Floats[1] = c - z + a + zz;
4030 Status |= Floats[1].subtract(z, RM);
4031 Status |= Floats[1].add(a, RM);
4032 Status |= Floats[1].add(zz, RM);
4037 Status |= q.subtract(z, RM);
4039 // zz = q + c + (a - (q + z)) + aa + cc;
4040 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4042 Status |= zz.add(c, RM);
4043 Status |= q.add(z, RM);
4044 Status |= q.subtract(a, RM);
4046 Status |= zz.add(q, RM);
4047 Status |= zz.add(aa, RM);
4048 Status |= zz.add(cc, RM);
4049 if (zz.isZero() && !zz.isNegative()) {
4050 Floats[0] = std::move(z);
4051 Floats[1].makeZero(/* Neg = */ false);
4055 Status |= Floats[0].add(zz, RM);
4056 if (!Floats[0].isFinite()) {
4057 Floats[1].makeZero(/* Neg = */ false);
4058 return (opStatus)Status;
4060 Floats[1] = std::move(z);
4061 Status |= Floats[1].subtract(Floats[0], RM);
4062 Status |= Floats[1].add(zz, RM);
4064 return (opStatus)Status;
4067 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4068 const DoubleAPFloat &RHS,
4071 if (LHS.getCategory() == fcNaN) {
4075 if (RHS.getCategory() == fcNaN) {
4079 if (LHS.getCategory() == fcZero) {
4083 if (RHS.getCategory() == fcZero) {
4087 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4088 LHS.isNegative() != RHS.isNegative()) {
4089 Out.makeNaN(false, Out.isNegative(), nullptr);
4092 if (LHS.getCategory() == fcInfinity) {
4096 if (RHS.getCategory() == fcInfinity) {
4100 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4102 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4104 assert(&A.getSemantics() == &semIEEEdouble);
4105 assert(&AA.getSemantics() == &semIEEEdouble);
4106 assert(&C.getSemantics() == &semIEEEdouble);
4107 assert(&CC.getSemantics() == &semIEEEdouble);
4108 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4109 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4110 return Out.addImpl(A, AA, C, CC, RM);
4113 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4115 return addWithSpecial(*this, RHS, *this, RM);
4118 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4121 auto Ret = add(RHS, RM);
4126 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
4127 APFloat::roundingMode RM) {
4128 const auto &LHS = *this;
4130 /* Interesting observation: For special categories, finding the lowest
4131 common ancestor of the following layered graph gives the correct
4140 e.g. NaN * NaN = NaN
4142 Normal * Zero = Zero
4145 if (LHS.getCategory() == fcNaN) {
4149 if (RHS.getCategory() == fcNaN) {
4153 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4154 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4155 Out.makeNaN(false, false, nullptr);
4158 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4162 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4166 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4167 "Special cases not handled exhaustively");
4170 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4173 Status |= T.multiply(C, RM);
4174 if (!T.isFiniteNonZero()) {
4176 Floats[1].makeZero(/* Neg = */ false);
4177 return (opStatus)Status;
4180 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4183 Status |= Tau.fusedMultiplyAdd(C, T, RM);
4188 Status |= V.multiply(D, RM);
4191 Status |= W.multiply(C, RM);
4192 Status |= V.add(W, RM);
4194 Status |= Tau.add(V, RM);
4198 Status |= U.add(Tau, RM);
4201 if (!U.isFinite()) {
4202 Floats[1].makeZero(/* Neg = */ false);
4204 // Floats[1] = (t - u) + tau
4205 Status |= T.subtract(U, RM);
4206 Status |= T.add(Tau, RM);
4209 return (opStatus)Status;
4212 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
4213 APFloat::roundingMode RM) {
4214 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4215 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4217 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4218 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4222 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
4223 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4224 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4226 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4227 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4231 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
4232 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4233 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4234 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4235 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4240 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
4241 const DoubleAPFloat &Addend,
4242 APFloat::roundingMode RM) {
4243 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4244 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4245 auto Ret = Tmp.fusedMultiplyAdd(
4246 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
4247 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
4248 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4252 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
4253 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4254 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4255 auto Ret = Tmp.roundToIntegral(RM);
4256 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4260 void DoubleAPFloat::changeSign() {
4261 Floats[0].changeSign();
4262 Floats[1].changeSign();
4266 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
4267 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
4268 if (Result != cmpEqual)
4270 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
4271 if (Result == cmpLessThan || Result == cmpGreaterThan) {
4272 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
4273 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
4274 if (Against && !RHSAgainst)
4276 if (!Against && RHSAgainst)
4277 return cmpGreaterThan;
4278 if (!Against && !RHSAgainst)
4280 if (Against && RHSAgainst)
4281 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
4286 APFloat::fltCategory DoubleAPFloat::getCategory() const {
4287 return Floats[0].getCategory();
4290 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
4292 void DoubleAPFloat::makeInf(bool Neg) {
4293 Floats[0].makeInf(Neg);
4294 Floats[1].makeZero(/* Neg = */ false);
4297 void DoubleAPFloat::makeZero(bool Neg) {
4298 Floats[0].makeZero(Neg);
4299 Floats[1].makeZero(/* Neg = */ false);
4302 void DoubleAPFloat::makeLargest(bool Neg) {
4303 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4304 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
4305 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
4310 void DoubleAPFloat::makeSmallest(bool Neg) {
4311 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4312 Floats[0].makeSmallest(Neg);
4313 Floats[1].makeZero(/* Neg = */ false);
4316 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
4317 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4318 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
4320 Floats[0].changeSign();
4321 Floats[1].makeZero(/* Neg = */ false);
4324 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
4325 Floats[0].makeNaN(SNaN, Neg, fill);
4326 Floats[1].makeZero(/* Neg = */ false);
4329 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
4330 auto Result = Floats[0].compare(RHS.Floats[0]);
4331 // |Float[0]| > |Float[1]|
4332 if (Result == APFloat::cmpEqual)
4333 return Floats[1].compare(RHS.Floats[1]);
4337 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
4338 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
4339 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
4342 hash_code hash_value(const DoubleAPFloat &Arg) {
4344 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
4345 return hash_combine(Arg.Semantics);
4348 APInt DoubleAPFloat::bitcastToAPInt() const {
4349 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4351 Floats[0].bitcastToAPInt().getRawData()[0],
4352 Floats[1].bitcastToAPInt().getRawData()[0],
4354 return APInt(128, 2, Data);
4357 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
4359 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4360 APFloat Tmp(semPPCDoubleDoubleLegacy);
4361 auto Ret = Tmp.convertFromString(S, RM);
4362 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4366 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
4367 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4368 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4369 auto Ret = Tmp.next(nextDown);
4370 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4375 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
4376 unsigned int Width, bool IsSigned,
4377 roundingMode RM, bool *IsExact) const {
4378 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4379 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4380 .convertToInteger(Input, Width, IsSigned, RM, IsExact);
4383 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
4386 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4387 APFloat Tmp(semPPCDoubleDoubleLegacy);
4388 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
4389 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4394 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
4395 unsigned int InputSize,
4396 bool IsSigned, roundingMode RM) {
4397 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4398 APFloat Tmp(semPPCDoubleDoubleLegacy);
4399 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
4400 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4405 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
4406 unsigned int InputSize,
4407 bool IsSigned, roundingMode RM) {
4408 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4409 APFloat Tmp(semPPCDoubleDoubleLegacy);
4410 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
4411 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4415 unsigned int DoubleAPFloat::convertToHexString(char *DST,
4416 unsigned int HexDigits,
4418 roundingMode RM) const {
4419 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4420 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4421 .convertToHexString(DST, HexDigits, UpperCase, RM);
4424 bool DoubleAPFloat::isDenormal() const {
4425 return getCategory() == fcNormal &&
4426 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
4427 // (double)(Hi + Lo) == Hi defines a normal number.
4428 Floats[0].compare(Floats[0] + Floats[1]) != cmpEqual);
4431 bool DoubleAPFloat::isSmallest() const {
4432 if (getCategory() != fcNormal)
4434 DoubleAPFloat Tmp(*this);
4435 Tmp.makeSmallest(this->isNegative());
4436 return Tmp.compare(*this) == cmpEqual;
4439 bool DoubleAPFloat::isLargest() const {
4440 if (getCategory() != fcNormal)
4442 DoubleAPFloat Tmp(*this);
4443 Tmp.makeLargest(this->isNegative());
4444 return Tmp.compare(*this) == cmpEqual;
4447 bool DoubleAPFloat::isInteger() const {
4448 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4449 return Floats[0].isInteger() && Floats[1].isInteger();
4452 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
4453 unsigned FormatPrecision,
4454 unsigned FormatMaxPadding,
4455 bool TruncateZero) const {
4456 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4457 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4458 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
4461 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
4462 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4463 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4465 return Tmp.getExactInverse(nullptr);
4466 APFloat Inv(semPPCDoubleDoubleLegacy);
4467 auto Ret = Tmp.getExactInverse(&Inv);
4468 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
4472 DoubleAPFloat scalbn(DoubleAPFloat Arg, int Exp, APFloat::roundingMode RM) {
4473 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4474 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
4475 scalbn(Arg.Floats[1], Exp, RM));
4478 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
4479 APFloat::roundingMode RM) {
4480 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4481 APFloat First = frexp(Arg.Floats[0], Exp, RM);
4482 APFloat Second = Arg.Floats[1];
4483 if (Arg.getCategory() == APFloat::fcNormal)
4484 Second = scalbn(Second, -Exp, RM);
4485 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
4488 } // End detail namespace
4490 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
4491 if (usesLayout<IEEEFloat>(Semantics)) {
4492 new (&IEEE) IEEEFloat(std::move(F));
4495 if (usesLayout<DoubleAPFloat>(Semantics)) {
4496 const fltSemantics& S = F.getSemantics();
4498 DoubleAPFloat(Semantics, APFloat(std::move(F), S),
4499 APFloat(semIEEEdouble));
4502 llvm_unreachable("Unexpected semantics");
4505 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str,
4507 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
4510 hash_code hash_value(const APFloat &Arg) {
4511 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
4512 return hash_value(Arg.U.IEEE);
4513 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
4514 return hash_value(Arg.U.Double);
4515 llvm_unreachable("Unexpected semantics");
4518 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
4519 : APFloat(Semantics) {
4520 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
4521 assert(StatusOrErr && "Invalid floating point representation");
4522 consumeError(StatusOrErr.takeError());
4525 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
4526 roundingMode RM, bool *losesInfo) {
4527 if (&getSemantics() == &ToSemantics) {
4531 if (usesLayout<IEEEFloat>(getSemantics()) &&
4532 usesLayout<IEEEFloat>(ToSemantics))
4533 return U.IEEE.convert(ToSemantics, RM, losesInfo);
4534 if (usesLayout<IEEEFloat>(getSemantics()) &&
4535 usesLayout<DoubleAPFloat>(ToSemantics)) {
4536 assert(&ToSemantics == &semPPCDoubleDouble);
4537 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
4538 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
4541 if (usesLayout<DoubleAPFloat>(getSemantics()) &&
4542 usesLayout<IEEEFloat>(ToSemantics)) {
4543 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
4544 *this = APFloat(std::move(getIEEE()), ToSemantics);
4547 llvm_unreachable("Unexpected semantics");
4550 APFloat APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) {
4554 return APFloat(semIEEEhalf, APInt::getAllOnesValue(BitWidth));
4556 return APFloat(semIEEEsingle, APInt::getAllOnesValue(BitWidth));
4558 return APFloat(semIEEEdouble, APInt::getAllOnesValue(BitWidth));
4560 return APFloat(semX87DoubleExtended, APInt::getAllOnesValue(BitWidth));
4562 return APFloat(semIEEEquad, APInt::getAllOnesValue(BitWidth));
4564 llvm_unreachable("Unknown floating bit width");
4567 assert(BitWidth == 128);
4568 return APFloat(semPPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
4572 void APFloat::print(raw_ostream &OS) const {
4573 SmallVector<char, 16> Buffer;
4575 OS << Buffer << "\n";
4578 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4579 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }
4582 void APFloat::Profile(FoldingSetNodeID &NID) const {
4583 NID.Add(bitcastToAPInt());
4586 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
4587 an APSInt, whose initial bit-width and signed-ness are used to determine the
4588 precision of the conversion.
4590 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
4591 roundingMode rounding_mode,
4592 bool *isExact) const {
4593 unsigned bitWidth = result.getBitWidth();
4594 SmallVector<uint64_t, 4> parts(result.getNumWords());
4595 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
4596 rounding_mode, isExact);
4597 // Keeps the original signed-ness.
4598 result = APInt(bitWidth, parts);
4602 } // End llvm namespace
4604 #undef APFLOAT_DISPATCH_ON_SEMANTICS