1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements a class to represent arbitrary precision floating
10 // point values and provide a variety of arithmetic operations on them.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Config/llvm-config.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
29 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
31 if (usesLayout<IEEEFloat>(getSemantics())) \
32 return U.IEEE.METHOD_CALL; \
33 if (usesLayout<DoubleAPFloat>(getSemantics())) \
34 return U.Double.METHOD_CALL; \
35 llvm_unreachable("Unexpected semantics"); \
40 /// A macro used to combine two fcCategory enums into one key which can be used
41 /// in a switch statement to classify how the interaction of two APFloat's
42 /// categories affects an operation.
44 /// TODO: If clang source code is ever allowed to use constexpr in its own
45 /// codebase, change this into a static inline function.
46 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
48 /* Assumed in hexadecimal significand parsing, and conversion to
49 hexadecimal strings. */
50 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
53 /* Represents floating point arithmetic semantics. */
55 /* The largest E such that 2^E is representable; this matches the
56 definition of IEEE 754. */
57 APFloatBase::ExponentType maxExponent;
59 /* The smallest E such that 2^E is a normalized number; this
60 matches the definition of IEEE 754. */
61 APFloatBase::ExponentType minExponent;
63 /* Number of bits in the significand. This includes the integer
65 unsigned int precision;
67 /* Number of bits actually used in the semantics. */
68 unsigned int sizeInBits;
71 static const fltSemantics semIEEEhalf = {15, -14, 11, 16};
72 static const fltSemantics semBFloat = {127, -126, 8, 16};
73 static const fltSemantics semIEEEsingle = {127, -126, 24, 32};
74 static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
75 static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};
76 static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
77 static const fltSemantics semBogus = {0, 0, 0, 0};
79 /* The IBM double-double semantics. Such a number consists of a pair of IEEE
80 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
81 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
82 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
83 to each other, and two 11-bit exponents.
85 Note: we need to make the value different from semBogus as otherwise
86 an unsafe optimization may collapse both values to a single address,
87 and we heavily rely on them having distinct addresses. */
88 static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0};
90 /* These are legacy semantics for the fallback, inaccrurate implementation of
91 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
92 operation. It's equivalent to having an IEEE number with consecutive 106
93 bits of mantissa and 11 bits of exponent.
95 It's not equivalent to IBM double-double. For example, a legit IBM
96 double-double, 1 + epsilon:
98 1 + epsilon = 1 + (1 >> 1076)
100 is not representable by a consecutive 106 bits of mantissa.
102 Currently, these semantics are used in the following way:
104 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
105 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
106 semPPCDoubleDoubleLegacy -> IEEE operations
108 We use bitcastToAPInt() to get the bit representation (in APInt) of the
109 underlying IEEEdouble, then use the APInt constructor to construct the
112 TODO: Implement all operations in semPPCDoubleDouble, and delete these
114 static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
117 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
127 case S_x87DoubleExtended:
128 return x87DoubleExtended();
131 case S_PPCDoubleDouble:
132 return PPCDoubleDouble();
134 llvm_unreachable("Unrecognised floating semantics");
137 APFloatBase::Semantics
138 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
139 if (&Sem == &llvm::APFloat::IEEEhalf())
141 else if (&Sem == &llvm::APFloat::BFloat())
143 else if (&Sem == &llvm::APFloat::IEEEsingle())
145 else if (&Sem == &llvm::APFloat::IEEEdouble())
147 else if (&Sem == &llvm::APFloat::x87DoubleExtended())
148 return S_x87DoubleExtended;
149 else if (&Sem == &llvm::APFloat::IEEEquad())
151 else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
152 return S_PPCDoubleDouble;
154 llvm_unreachable("Unknown floating semantics");
157 const fltSemantics &APFloatBase::IEEEhalf() {
160 const fltSemantics &APFloatBase::BFloat() {
163 const fltSemantics &APFloatBase::IEEEsingle() {
164 return semIEEEsingle;
166 const fltSemantics &APFloatBase::IEEEdouble() {
167 return semIEEEdouble;
169 const fltSemantics &APFloatBase::IEEEquad() {
172 const fltSemantics &APFloatBase::x87DoubleExtended() {
173 return semX87DoubleExtended;
175 const fltSemantics &APFloatBase::Bogus() {
178 const fltSemantics &APFloatBase::PPCDoubleDouble() {
179 return semPPCDoubleDouble;
182 constexpr RoundingMode APFloatBase::rmNearestTiesToEven;
183 constexpr RoundingMode APFloatBase::rmTowardPositive;
184 constexpr RoundingMode APFloatBase::rmTowardNegative;
185 constexpr RoundingMode APFloatBase::rmTowardZero;
186 constexpr RoundingMode APFloatBase::rmNearestTiesToAway;
188 /* A tight upper bound on number of parts required to hold the value
191 power * 815 / (351 * integerPartWidth) + 1
193 However, whilst the result may require only this many parts,
194 because we are multiplying two values to get it, the
195 multiplication may require an extra part with the excess part
196 being zero (consider the trivial case of 1 * 1, tcFullMultiply
197 requires two parts to hold the single-part result). So we add an
198 extra one to guarantee enough space whilst multiplying. */
199 const unsigned int maxExponent = 16383;
200 const unsigned int maxPrecision = 113;
201 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
202 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
204 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
205 return semantics.precision;
207 APFloatBase::ExponentType
208 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
209 return semantics.maxExponent;
211 APFloatBase::ExponentType
212 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
213 return semantics.minExponent;
215 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
216 return semantics.sizeInBits;
219 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
220 return Sem.sizeInBits;
223 /* A bunch of private, handy routines. */
225 static inline Error createError(const Twine &Err) {
226 return make_error<StringError>(Err, inconvertibleErrorCode());
229 static inline unsigned int
230 partCountForBits(unsigned int bits)
232 return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
235 /* Returns 0U-9U. Return values >= 10U are not digits. */
236 static inline unsigned int
237 decDigitValue(unsigned int c)
242 /* Return the value of a decimal exponent of the form
245 If the exponent overflows, returns a large exponent with the
247 static Expected<int> readExponent(StringRef::iterator begin,
248 StringRef::iterator end) {
250 unsigned int absExponent;
251 const unsigned int overlargeExponent = 24000; /* FIXME. */
252 StringRef::iterator p = begin;
254 // Treat no exponent as 0 to match binutils
255 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
259 isNegative = (*p == '-');
260 if (*p == '-' || *p == '+') {
263 return createError("Exponent has no digits");
266 absExponent = decDigitValue(*p++);
267 if (absExponent >= 10U)
268 return createError("Invalid character in exponent");
270 for (; p != end; ++p) {
273 value = decDigitValue(*p);
275 return createError("Invalid character in exponent");
277 absExponent = absExponent * 10U + value;
278 if (absExponent >= overlargeExponent) {
279 absExponent = overlargeExponent;
285 return -(int) absExponent;
287 return (int) absExponent;
290 /* This is ugly and needs cleaning up, but I don't immediately see
291 how whilst remaining safe. */
292 static Expected<int> totalExponent(StringRef::iterator p,
293 StringRef::iterator end,
294 int exponentAdjustment) {
295 int unsignedExponent;
296 bool negative, overflow;
300 return createError("Exponent has no digits");
302 negative = *p == '-';
303 if (*p == '-' || *p == '+') {
306 return createError("Exponent has no digits");
309 unsignedExponent = 0;
311 for (; p != end; ++p) {
314 value = decDigitValue(*p);
316 return createError("Invalid character in exponent");
318 unsignedExponent = unsignedExponent * 10 + value;
319 if (unsignedExponent > 32767) {
325 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
329 exponent = unsignedExponent;
331 exponent = -exponent;
332 exponent += exponentAdjustment;
333 if (exponent > 32767 || exponent < -32768)
338 exponent = negative ? -32768: 32767;
343 static Expected<StringRef::iterator>
344 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
345 StringRef::iterator *dot) {
346 StringRef::iterator p = begin;
348 while (p != end && *p == '0')
351 if (p != end && *p == '.') {
354 if (end - begin == 1)
355 return createError("Significand has no digits");
357 while (p != end && *p == '0')
364 /* Given a normal decimal floating point number of the form
368 where the decimal point and exponent are optional, fill out the
369 structure D. Exponent is appropriate if the significand is
370 treated as an integer, and normalizedExponent if the significand
371 is taken to have the decimal point after a single leading
374 If the value is zero, V->firstSigDigit points to a non-digit, and
375 the return exponent is zero.
378 const char *firstSigDigit;
379 const char *lastSigDigit;
381 int normalizedExponent;
384 static Error interpretDecimal(StringRef::iterator begin,
385 StringRef::iterator end, decimalInfo *D) {
386 StringRef::iterator dot = end;
388 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
390 return PtrOrErr.takeError();
391 StringRef::iterator p = *PtrOrErr;
393 D->firstSigDigit = p;
395 D->normalizedExponent = 0;
397 for (; p != end; ++p) {
400 return createError("String contains multiple dots");
405 if (decDigitValue(*p) >= 10U)
410 if (*p != 'e' && *p != 'E')
411 return createError("Invalid character in significand");
413 return createError("Significand has no digits");
414 if (dot != end && p - begin == 1)
415 return createError("Significand has no digits");
417 /* p points to the first non-digit in the string */
418 auto ExpOrErr = readExponent(p + 1, end);
420 return ExpOrErr.takeError();
421 D->exponent = *ExpOrErr;
423 /* Implied decimal point? */
428 /* If number is all zeroes accept any exponent. */
429 if (p != D->firstSigDigit) {
430 /* Drop insignificant trailing zeroes. */
435 while (p != begin && *p == '0');
436 while (p != begin && *p == '.');
439 /* Adjust the exponents for any decimal point. */
440 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
441 D->normalizedExponent = (D->exponent +
442 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
443 - (dot > D->firstSigDigit && dot < p)));
447 return Error::success();
450 /* Return the trailing fraction of a hexadecimal number.
451 DIGITVALUE is the first hex digit of the fraction, P points to
453 static Expected<lostFraction>
454 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
455 unsigned int digitValue) {
456 unsigned int hexDigit;
458 /* If the first trailing digit isn't 0 or 8 we can work out the
459 fraction immediately. */
461 return lfMoreThanHalf;
462 else if (digitValue < 8 && digitValue > 0)
463 return lfLessThanHalf;
465 // Otherwise we need to find the first non-zero digit.
466 while (p != end && (*p == '0' || *p == '.'))
470 return createError("Invalid trailing hexadecimal fraction!");
472 hexDigit = hexDigitValue(*p);
474 /* If we ran off the end it is exactly zero or one-half, otherwise
477 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
479 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
482 /* Return the fraction lost were a bignum truncated losing the least
483 significant BITS bits. */
485 lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
486 unsigned int partCount,
491 lsb = APInt::tcLSB(parts, partCount);
493 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
495 return lfExactlyZero;
497 return lfExactlyHalf;
498 if (bits <= partCount * APFloatBase::integerPartWidth &&
499 APInt::tcExtractBit(parts, bits - 1))
500 return lfMoreThanHalf;
502 return lfLessThanHalf;
505 /* Shift DST right BITS bits noting lost fraction. */
507 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
509 lostFraction lost_fraction;
511 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
513 APInt::tcShiftRight(dst, parts, bits);
515 return lost_fraction;
518 /* Combine the effect of two lost fractions. */
520 combineLostFractions(lostFraction moreSignificant,
521 lostFraction lessSignificant)
523 if (lessSignificant != lfExactlyZero) {
524 if (moreSignificant == lfExactlyZero)
525 moreSignificant = lfLessThanHalf;
526 else if (moreSignificant == lfExactlyHalf)
527 moreSignificant = lfMoreThanHalf;
530 return moreSignificant;
533 /* The error from the true value, in half-ulps, on multiplying two
534 floating point numbers, which differ from the value they
535 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
536 than the returned value.
538 See "How to Read Floating Point Numbers Accurately" by William D
541 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
543 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
545 if (HUerr1 + HUerr2 == 0)
546 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
548 return inexactMultiply + 2 * (HUerr1 + HUerr2);
551 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
552 when the least significant BITS are truncated. BITS cannot be
554 static APFloatBase::integerPart
555 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
557 unsigned int count, partBits;
558 APFloatBase::integerPart part, boundary;
563 count = bits / APFloatBase::integerPartWidth;
564 partBits = bits % APFloatBase::integerPartWidth + 1;
566 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
569 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
574 if (part - boundary <= boundary - part)
575 return part - boundary;
577 return boundary - part;
580 if (part == boundary) {
583 return ~(APFloatBase::integerPart) 0; /* A lot. */
586 } else if (part == boundary - 1) {
589 return ~(APFloatBase::integerPart) 0; /* A lot. */
594 return ~(APFloatBase::integerPart) 0; /* A lot. */
597 /* Place pow(5, power) in DST, and return the number of parts used.
598 DST must be at least one part larger than size of the answer. */
600 powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
601 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
602 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
603 pow5s[0] = 78125 * 5;
605 unsigned int partsCount[16] = { 1 };
606 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
608 assert(power <= maxExponent);
613 *p1 = firstEightPowers[power & 7];
619 for (unsigned int n = 0; power; power >>= 1, n++) {
624 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
626 pc = partsCount[n - 1];
627 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
629 if (pow5[pc - 1] == 0)
635 APFloatBase::integerPart *tmp;
637 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
639 if (p2[result - 1] == 0)
642 /* Now result is in p1 with partsCount parts and p2 is scratch
653 APInt::tcAssign(dst, p1, result);
658 /* Zero at the end to avoid modular arithmetic when adding one; used
659 when rounding up during hexadecimal output. */
660 static const char hexDigitsLower[] = "0123456789abcdef0";
661 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
662 static const char infinityL[] = "infinity";
663 static const char infinityU[] = "INFINITY";
664 static const char NaNL[] = "nan";
665 static const char NaNU[] = "NAN";
667 /* Write out an integerPart in hexadecimal, starting with the most
668 significant nibble. Write out exactly COUNT hexdigits, return
671 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
672 const char *hexDigitChars)
674 unsigned int result = count;
676 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
678 part >>= (APFloatBase::integerPartWidth - 4 * count);
680 dst[count] = hexDigitChars[part & 0xf];
687 /* Write out an unsigned decimal integer. */
689 writeUnsignedDecimal (char *dst, unsigned int n)
705 /* Write out a signed decimal integer. */
707 writeSignedDecimal (char *dst, int value)
711 dst = writeUnsignedDecimal(dst, -(unsigned) value);
713 dst = writeUnsignedDecimal(dst, value);
720 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
723 semantics = ourSemantics;
726 significand.parts = new integerPart[count];
729 void IEEEFloat::freeSignificand() {
731 delete [] significand.parts;
734 void IEEEFloat::assign(const IEEEFloat &rhs) {
735 assert(semantics == rhs.semantics);
738 category = rhs.category;
739 exponent = rhs.exponent;
740 if (isFiniteNonZero() || category == fcNaN)
741 copySignificand(rhs);
744 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
745 assert(isFiniteNonZero() || category == fcNaN);
746 assert(rhs.partCount() >= partCount());
748 APInt::tcAssign(significandParts(), rhs.significandParts(),
752 /* Make this number a NaN, with an arbitrary but deterministic value
753 for the significand. If double or longer, this is a signalling NaN,
754 which may not be ideal. If float, this is QNaN(0). */
755 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
759 integerPart *significand = significandParts();
760 unsigned numParts = partCount();
762 // Set the significand bits to the fill.
763 if (!fill || fill->getNumWords() < numParts)
764 APInt::tcSet(significand, 0, numParts);
766 APInt::tcAssign(significand, fill->getRawData(),
767 std::min(fill->getNumWords(), numParts));
769 // Zero out the excess bits of the significand.
770 unsigned bitsToPreserve = semantics->precision - 1;
771 unsigned part = bitsToPreserve / 64;
772 bitsToPreserve %= 64;
773 significand[part] &= ((1ULL << bitsToPreserve) - 1);
774 for (part++; part != numParts; ++part)
775 significand[part] = 0;
778 unsigned QNaNBit = semantics->precision - 2;
781 // We always have to clear the QNaN bit to make it an SNaN.
782 APInt::tcClearBit(significand, QNaNBit);
784 // If there are no bits set in the payload, we have to set
785 // *something* to make it a NaN instead of an infinity;
786 // conventionally, this is the next bit down from the QNaN bit.
787 if (APInt::tcIsZero(significand, numParts))
788 APInt::tcSetBit(significand, QNaNBit - 1);
790 // We always have to set the QNaN bit to make it a QNaN.
791 APInt::tcSetBit(significand, QNaNBit);
794 // For x87 extended precision, we want to make a NaN, not a
795 // pseudo-NaN. Maybe we should expose the ability to make
797 if (semantics == &semX87DoubleExtended)
798 APInt::tcSetBit(significand, QNaNBit + 1);
801 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
803 if (semantics != rhs.semantics) {
805 initialize(rhs.semantics);
813 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
816 semantics = rhs.semantics;
817 significand = rhs.significand;
818 exponent = rhs.exponent;
819 category = rhs.category;
822 rhs.semantics = &semBogus;
826 bool IEEEFloat::isDenormal() const {
827 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
828 (APInt::tcExtractBit(significandParts(),
829 semantics->precision - 1) == 0);
832 bool IEEEFloat::isSmallest() const {
833 // The smallest number by magnitude in our format will be the smallest
834 // denormal, i.e. the floating point number with exponent being minimum
835 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
836 return isFiniteNonZero() && exponent == semantics->minExponent &&
837 significandMSB() == 0;
840 bool IEEEFloat::isSignificandAllOnes() const {
841 // Test if the significand excluding the integral bit is all ones. This allows
842 // us to test for binade boundaries.
843 const integerPart *Parts = significandParts();
844 const unsigned PartCount = partCount();
845 for (unsigned i = 0; i < PartCount - 1; i++)
849 // Set the unused high bits to all ones when we compare.
850 const unsigned NumHighBits =
851 PartCount*integerPartWidth - semantics->precision + 1;
852 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
853 "fill than integerPartWidth");
854 const integerPart HighBitFill =
855 ~integerPart(0) << (integerPartWidth - NumHighBits);
856 if (~(Parts[PartCount - 1] | HighBitFill))
862 bool IEEEFloat::isSignificandAllZeros() const {
863 // Test if the significand excluding the integral bit is all zeros. This
864 // allows us to test for binade boundaries.
865 const integerPart *Parts = significandParts();
866 const unsigned PartCount = partCount();
868 for (unsigned i = 0; i < PartCount - 1; i++)
872 const unsigned NumHighBits =
873 PartCount*integerPartWidth - semantics->precision + 1;
874 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
875 "clear than integerPartWidth");
876 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
878 if (Parts[PartCount - 1] & HighBitMask)
884 bool IEEEFloat::isLargest() const {
885 // The largest number by magnitude in our format will be the floating point
886 // number with maximum exponent and with significand that is all ones.
887 return isFiniteNonZero() && exponent == semantics->maxExponent
888 && isSignificandAllOnes();
891 bool IEEEFloat::isInteger() const {
892 // This could be made more efficient; I'm going for obviously correct.
893 if (!isFinite()) return false;
894 IEEEFloat truncated = *this;
895 truncated.roundToIntegral(rmTowardZero);
896 return compare(truncated) == cmpEqual;
899 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
902 if (semantics != rhs.semantics ||
903 category != rhs.category ||
906 if (category==fcZero || category==fcInfinity)
909 if (isFiniteNonZero() && exponent != rhs.exponent)
912 return std::equal(significandParts(), significandParts() + partCount(),
913 rhs.significandParts());
916 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
917 initialize(&ourSemantics);
921 exponent = ourSemantics.precision - 1;
922 significandParts()[0] = value;
923 normalize(rmNearestTiesToEven, lfExactlyZero);
926 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
927 initialize(&ourSemantics);
932 // Delegate to the previous constructor, because later copy constructor may
933 // actually inspects category, which can't be garbage.
934 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
935 : IEEEFloat(ourSemantics) {}
937 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
938 initialize(rhs.semantics);
942 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
943 *this = std::move(rhs);
946 IEEEFloat::~IEEEFloat() { freeSignificand(); }
948 unsigned int IEEEFloat::partCount() const {
949 return partCountForBits(semantics->precision + 1);
952 const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
953 return const_cast<IEEEFloat *>(this)->significandParts();
956 IEEEFloat::integerPart *IEEEFloat::significandParts() {
958 return significand.parts;
960 return &significand.part;
963 void IEEEFloat::zeroSignificand() {
964 APInt::tcSet(significandParts(), 0, partCount());
967 /* Increment an fcNormal floating point number's significand. */
968 void IEEEFloat::incrementSignificand() {
971 carry = APInt::tcIncrement(significandParts(), partCount());
973 /* Our callers should never cause us to overflow. */
978 /* Add the significand of the RHS. Returns the carry flag. */
979 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
982 parts = significandParts();
984 assert(semantics == rhs.semantics);
985 assert(exponent == rhs.exponent);
987 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
990 /* Subtract the significand of the RHS with a borrow flag. Returns
992 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
993 integerPart borrow) {
996 parts = significandParts();
998 assert(semantics == rhs.semantics);
999 assert(exponent == rhs.exponent);
1001 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1005 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
1006 on to the full-precision result of the multiplication. Returns the
1008 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1010 unsigned int omsb; // One, not zero, based MSB.
1011 unsigned int partsCount, newPartsCount, precision;
1012 integerPart *lhsSignificand;
1013 integerPart scratch[4];
1014 integerPart *fullSignificand;
1015 lostFraction lost_fraction;
1018 assert(semantics == rhs.semantics);
1020 precision = semantics->precision;
1022 // Allocate space for twice as many bits as the original significand, plus one
1023 // extra bit for the addition to overflow into.
1024 newPartsCount = partCountForBits(precision * 2 + 1);
1026 if (newPartsCount > 4)
1027 fullSignificand = new integerPart[newPartsCount];
1029 fullSignificand = scratch;
1031 lhsSignificand = significandParts();
1032 partsCount = partCount();
1034 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1035 rhs.significandParts(), partsCount, partsCount);
1037 lost_fraction = lfExactlyZero;
1038 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1039 exponent += rhs.exponent;
1041 // Assume the operands involved in the multiplication are single-precision
1042 // FP, and the two multiplicants are:
1043 // *this = a23 . a22 ... a0 * 2^e1
1044 // rhs = b23 . b22 ... b0 * 2^e2
1045 // the result of multiplication is:
1046 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1047 // Note that there are three significant bits at the left-hand side of the
1048 // radix point: two for the multiplication, and an overflow bit for the
1049 // addition (that will always be zero at this point). Move the radix point
1050 // toward left by two bits, and adjust exponent accordingly.
1053 if (addend.isNonZero()) {
1054 // The intermediate result of the multiplication has "2 * precision"
1055 // signicant bit; adjust the addend to be consistent with mul result.
1057 Significand savedSignificand = significand;
1058 const fltSemantics *savedSemantics = semantics;
1059 fltSemantics extendedSemantics;
1061 unsigned int extendedPrecision;
1063 // Normalize our MSB to one below the top bit to allow for overflow.
1064 extendedPrecision = 2 * precision + 1;
1065 if (omsb != extendedPrecision - 1) {
1066 assert(extendedPrecision > omsb);
1067 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1068 (extendedPrecision - 1) - omsb);
1069 exponent -= (extendedPrecision - 1) - omsb;
1072 /* Create new semantics. */
1073 extendedSemantics = *semantics;
1074 extendedSemantics.precision = extendedPrecision;
1076 if (newPartsCount == 1)
1077 significand.part = fullSignificand[0];
1079 significand.parts = fullSignificand;
1080 semantics = &extendedSemantics;
1082 // Make a copy so we can convert it to the extended semantics.
1083 // Note that we cannot convert the addend directly, as the extendedSemantics
1084 // is a local variable (which we take a reference to).
1085 IEEEFloat extendedAddend(addend);
1086 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1087 assert(status == opOK);
1090 // Shift the significand of the addend right by one bit. This guarantees
1091 // that the high bit of the significand is zero (same as fullSignificand),
1092 // so the addition will overflow (if it does overflow at all) into the top bit.
1093 lost_fraction = extendedAddend.shiftSignificandRight(1);
1094 assert(lost_fraction == lfExactlyZero &&
1095 "Lost precision while shifting addend for fused-multiply-add.");
1097 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1099 /* Restore our state. */
1100 if (newPartsCount == 1)
1101 fullSignificand[0] = significand.part;
1102 significand = savedSignificand;
1103 semantics = savedSemantics;
1105 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1108 // Convert the result having "2 * precision" significant-bits back to the one
1109 // having "precision" significant-bits. First, move the radix point from
1110 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1111 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1112 exponent -= precision + 1;
1114 // In case MSB resides at the left-hand side of radix point, shift the
1115 // mantissa right by some amount to make sure the MSB reside right before
1116 // the radix point (i.e. "MSB . rest-significant-bits").
1118 // Note that the result is not normalized when "omsb < precision". So, the
1119 // caller needs to call IEEEFloat::normalize() if normalized value is
1121 if (omsb > precision) {
1122 unsigned int bits, significantParts;
1125 bits = omsb - precision;
1126 significantParts = partCountForBits(omsb);
1127 lf = shiftRight(fullSignificand, significantParts, bits);
1128 lost_fraction = combineLostFractions(lf, lost_fraction);
1132 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1134 if (newPartsCount > 4)
1135 delete [] fullSignificand;
1137 return lost_fraction;
1140 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1141 return multiplySignificand(rhs, IEEEFloat(*semantics));
1144 /* Multiply the significands of LHS and RHS to DST. */
1145 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1146 unsigned int bit, i, partsCount;
1147 const integerPart *rhsSignificand;
1148 integerPart *lhsSignificand, *dividend, *divisor;
1149 integerPart scratch[4];
1150 lostFraction lost_fraction;
1152 assert(semantics == rhs.semantics);
1154 lhsSignificand = significandParts();
1155 rhsSignificand = rhs.significandParts();
1156 partsCount = partCount();
1159 dividend = new integerPart[partsCount * 2];
1163 divisor = dividend + partsCount;
1165 /* Copy the dividend and divisor as they will be modified in-place. */
1166 for (i = 0; i < partsCount; i++) {
1167 dividend[i] = lhsSignificand[i];
1168 divisor[i] = rhsSignificand[i];
1169 lhsSignificand[i] = 0;
1172 exponent -= rhs.exponent;
1174 unsigned int precision = semantics->precision;
1176 /* Normalize the divisor. */
1177 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1180 APInt::tcShiftLeft(divisor, partsCount, bit);
1183 /* Normalize the dividend. */
1184 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1187 APInt::tcShiftLeft(dividend, partsCount, bit);
1190 /* Ensure the dividend >= divisor initially for the loop below.
1191 Incidentally, this means that the division loop below is
1192 guaranteed to set the integer bit to one. */
1193 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1195 APInt::tcShiftLeft(dividend, partsCount, 1);
1196 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1199 /* Long division. */
1200 for (bit = precision; bit; bit -= 1) {
1201 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1202 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1203 APInt::tcSetBit(lhsSignificand, bit - 1);
1206 APInt::tcShiftLeft(dividend, partsCount, 1);
1209 /* Figure out the lost fraction. */
1210 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1213 lost_fraction = lfMoreThanHalf;
1215 lost_fraction = lfExactlyHalf;
1216 else if (APInt::tcIsZero(dividend, partsCount))
1217 lost_fraction = lfExactlyZero;
1219 lost_fraction = lfLessThanHalf;
1224 return lost_fraction;
1227 unsigned int IEEEFloat::significandMSB() const {
1228 return APInt::tcMSB(significandParts(), partCount());
1231 unsigned int IEEEFloat::significandLSB() const {
1232 return APInt::tcLSB(significandParts(), partCount());
1235 /* Note that a zero result is NOT normalized to fcZero. */
1236 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1237 /* Our exponent should not overflow. */
1238 assert((ExponentType) (exponent + bits) >= exponent);
1242 return shiftRight(significandParts(), partCount(), bits);
1245 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1246 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1247 assert(bits < semantics->precision);
1250 unsigned int partsCount = partCount();
1252 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1255 assert(!APInt::tcIsZero(significandParts(), partsCount));
1259 IEEEFloat::cmpResult
1260 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1263 assert(semantics == rhs.semantics);
1264 assert(isFiniteNonZero());
1265 assert(rhs.isFiniteNonZero());
1267 compare = exponent - rhs.exponent;
1269 /* If exponents are equal, do an unsigned bignum comparison of the
1272 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1276 return cmpGreaterThan;
1277 else if (compare < 0)
1283 /* Handle overflow. Sign is preserved. We either become infinity or
1284 the largest finite number. */
1285 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1287 if (rounding_mode == rmNearestTiesToEven ||
1288 rounding_mode == rmNearestTiesToAway ||
1289 (rounding_mode == rmTowardPositive && !sign) ||
1290 (rounding_mode == rmTowardNegative && sign)) {
1291 category = fcInfinity;
1292 return (opStatus) (opOverflow | opInexact);
1295 /* Otherwise we become the largest finite number. */
1296 category = fcNormal;
1297 exponent = semantics->maxExponent;
1298 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1299 semantics->precision);
1304 /* Returns TRUE if, when truncating the current number, with BIT the
1305 new LSB, with the given lost fraction and rounding mode, the result
1306 would need to be rounded away from zero (i.e., by increasing the
1307 signficand). This routine must work for fcZero of both signs, and
1308 fcNormal numbers. */
1309 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1310 lostFraction lost_fraction,
1311 unsigned int bit) const {
1312 /* NaNs and infinities should not have lost fractions. */
1313 assert(isFiniteNonZero() || category == fcZero);
1315 /* Current callers never pass this so we don't handle it. */
1316 assert(lost_fraction != lfExactlyZero);
1318 switch (rounding_mode) {
1319 case rmNearestTiesToAway:
1320 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1322 case rmNearestTiesToEven:
1323 if (lost_fraction == lfMoreThanHalf)
1326 /* Our zeroes don't have a significand to test. */
1327 if (lost_fraction == lfExactlyHalf && category != fcZero)
1328 return APInt::tcExtractBit(significandParts(), bit);
1335 case rmTowardPositive:
1338 case rmTowardNegative:
1344 llvm_unreachable("Invalid rounding mode found");
1347 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1348 lostFraction lost_fraction) {
1349 unsigned int omsb; /* One, not zero, based MSB. */
1352 if (!isFiniteNonZero())
1355 /* Before rounding normalize the exponent of fcNormal numbers. */
1356 omsb = significandMSB() + 1;
1359 /* OMSB is numbered from 1. We want to place it in the integer
1360 bit numbered PRECISION if possible, with a compensating change in
1362 exponentChange = omsb - semantics->precision;
1364 /* If the resulting exponent is too high, overflow according to
1365 the rounding mode. */
1366 if (exponent + exponentChange > semantics->maxExponent)
1367 return handleOverflow(rounding_mode);
1369 /* Subnormal numbers have exponent minExponent, and their MSB
1370 is forced based on that. */
1371 if (exponent + exponentChange < semantics->minExponent)
1372 exponentChange = semantics->minExponent - exponent;
1374 /* Shifting left is easy as we don't lose precision. */
1375 if (exponentChange < 0) {
1376 assert(lost_fraction == lfExactlyZero);
1378 shiftSignificandLeft(-exponentChange);
1383 if (exponentChange > 0) {
1386 /* Shift right and capture any new lost fraction. */
1387 lf = shiftSignificandRight(exponentChange);
1389 lost_fraction = combineLostFractions(lf, lost_fraction);
1391 /* Keep OMSB up-to-date. */
1392 if (omsb > (unsigned) exponentChange)
1393 omsb -= exponentChange;
1399 /* Now round the number according to rounding_mode given the lost
1402 /* As specified in IEEE 754, since we do not trap we do not report
1403 underflow for exact results. */
1404 if (lost_fraction == lfExactlyZero) {
1405 /* Canonicalize zeroes. */
1412 /* Increment the significand if we're rounding away from zero. */
1413 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1415 exponent = semantics->minExponent;
1417 incrementSignificand();
1418 omsb = significandMSB() + 1;
1420 /* Did the significand increment overflow? */
1421 if (omsb == (unsigned) semantics->precision + 1) {
1422 /* Renormalize by incrementing the exponent and shifting our
1423 significand right one. However if we already have the
1424 maximum exponent we overflow to infinity. */
1425 if (exponent == semantics->maxExponent) {
1426 category = fcInfinity;
1428 return (opStatus) (opOverflow | opInexact);
1431 shiftSignificandRight(1);
1437 /* The normal case - we were and are not denormal, and any
1438 significand increment above didn't overflow. */
1439 if (omsb == semantics->precision)
1442 /* We have a non-zero denormal. */
1443 assert(omsb < semantics->precision);
1445 /* Canonicalize zeroes. */
1449 /* The fcZero case is a denormal that underflowed to zero. */
1450 return (opStatus) (opUnderflow | opInexact);
1453 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1455 switch (PackCategoriesIntoKey(category, rhs.category)) {
1457 llvm_unreachable(nullptr);
1459 case PackCategoriesIntoKey(fcZero, fcNaN):
1460 case PackCategoriesIntoKey(fcNormal, fcNaN):
1461 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1464 case PackCategoriesIntoKey(fcNaN, fcZero):
1465 case PackCategoriesIntoKey(fcNaN, fcNormal):
1466 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1467 case PackCategoriesIntoKey(fcNaN, fcNaN):
1468 if (isSignaling()) {
1472 return rhs.isSignaling() ? opInvalidOp : opOK;
1474 case PackCategoriesIntoKey(fcNormal, fcZero):
1475 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1476 case PackCategoriesIntoKey(fcInfinity, fcZero):
1479 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1480 case PackCategoriesIntoKey(fcZero, fcInfinity):
1481 category = fcInfinity;
1482 sign = rhs.sign ^ subtract;
1485 case PackCategoriesIntoKey(fcZero, fcNormal):
1487 sign = rhs.sign ^ subtract;
1490 case PackCategoriesIntoKey(fcZero, fcZero):
1491 /* Sign depends on rounding mode; handled by caller. */
1494 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1495 /* Differently signed infinities can only be validly
1497 if (((sign ^ rhs.sign)!=0) != subtract) {
1504 case PackCategoriesIntoKey(fcNormal, fcNormal):
1509 /* Add or subtract two normal numbers. */
1510 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1513 lostFraction lost_fraction;
1516 /* Determine if the operation on the absolute values is effectively
1517 an addition or subtraction. */
1518 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1520 /* Are we bigger exponent-wise than the RHS? */
1521 bits = exponent - rhs.exponent;
1523 /* Subtraction is more subtle than one might naively expect. */
1525 IEEEFloat temp_rhs(rhs);
1528 lost_fraction = lfExactlyZero;
1529 else if (bits > 0) {
1530 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1531 shiftSignificandLeft(1);
1533 lost_fraction = shiftSignificandRight(-bits - 1);
1534 temp_rhs.shiftSignificandLeft(1);
1537 // Should we reverse the subtraction.
1538 if (compareAbsoluteValue(temp_rhs) == cmpLessThan) {
1539 carry = temp_rhs.subtractSignificand
1540 (*this, lost_fraction != lfExactlyZero);
1541 copySignificand(temp_rhs);
1544 carry = subtractSignificand
1545 (temp_rhs, lost_fraction != lfExactlyZero);
1548 /* Invert the lost fraction - it was on the RHS and
1550 if (lost_fraction == lfLessThanHalf)
1551 lost_fraction = lfMoreThanHalf;
1552 else if (lost_fraction == lfMoreThanHalf)
1553 lost_fraction = lfLessThanHalf;
1555 /* The code above is intended to ensure that no borrow is
1561 IEEEFloat temp_rhs(rhs);
1563 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1564 carry = addSignificand(temp_rhs);
1566 lost_fraction = shiftSignificandRight(-bits);
1567 carry = addSignificand(rhs);
1570 /* We have a guard bit; generating a carry cannot happen. */
1575 return lost_fraction;
1578 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1579 switch (PackCategoriesIntoKey(category, rhs.category)) {
1581 llvm_unreachable(nullptr);
1583 case PackCategoriesIntoKey(fcZero, fcNaN):
1584 case PackCategoriesIntoKey(fcNormal, fcNaN):
1585 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1589 case PackCategoriesIntoKey(fcNaN, fcZero):
1590 case PackCategoriesIntoKey(fcNaN, fcNormal):
1591 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1592 case PackCategoriesIntoKey(fcNaN, fcNaN):
1593 sign ^= rhs.sign; // restore the original sign
1594 if (isSignaling()) {
1598 return rhs.isSignaling() ? opInvalidOp : opOK;
1600 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1601 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1602 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1603 category = fcInfinity;
1606 case PackCategoriesIntoKey(fcZero, fcNormal):
1607 case PackCategoriesIntoKey(fcNormal, fcZero):
1608 case PackCategoriesIntoKey(fcZero, fcZero):
1612 case PackCategoriesIntoKey(fcZero, fcInfinity):
1613 case PackCategoriesIntoKey(fcInfinity, fcZero):
1617 case PackCategoriesIntoKey(fcNormal, fcNormal):
1622 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1623 switch (PackCategoriesIntoKey(category, rhs.category)) {
1625 llvm_unreachable(nullptr);
1627 case PackCategoriesIntoKey(fcZero, fcNaN):
1628 case PackCategoriesIntoKey(fcNormal, fcNaN):
1629 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1633 case PackCategoriesIntoKey(fcNaN, fcZero):
1634 case PackCategoriesIntoKey(fcNaN, fcNormal):
1635 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1636 case PackCategoriesIntoKey(fcNaN, fcNaN):
1637 sign ^= rhs.sign; // restore the original sign
1638 if (isSignaling()) {
1642 return rhs.isSignaling() ? opInvalidOp : opOK;
1644 case PackCategoriesIntoKey(fcInfinity, fcZero):
1645 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1646 case PackCategoriesIntoKey(fcZero, fcInfinity):
1647 case PackCategoriesIntoKey(fcZero, fcNormal):
1650 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1654 case PackCategoriesIntoKey(fcNormal, fcZero):
1655 category = fcInfinity;
1658 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1659 case PackCategoriesIntoKey(fcZero, fcZero):
1663 case PackCategoriesIntoKey(fcNormal, fcNormal):
1668 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1669 switch (PackCategoriesIntoKey(category, rhs.category)) {
1671 llvm_unreachable(nullptr);
1673 case PackCategoriesIntoKey(fcZero, fcNaN):
1674 case PackCategoriesIntoKey(fcNormal, fcNaN):
1675 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1678 case PackCategoriesIntoKey(fcNaN, fcZero):
1679 case PackCategoriesIntoKey(fcNaN, fcNormal):
1680 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1681 case PackCategoriesIntoKey(fcNaN, fcNaN):
1682 if (isSignaling()) {
1686 return rhs.isSignaling() ? opInvalidOp : opOK;
1688 case PackCategoriesIntoKey(fcZero, fcInfinity):
1689 case PackCategoriesIntoKey(fcZero, fcNormal):
1690 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1693 case PackCategoriesIntoKey(fcNormal, fcZero):
1694 case PackCategoriesIntoKey(fcInfinity, fcZero):
1695 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1696 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1697 case PackCategoriesIntoKey(fcZero, fcZero):
1701 case PackCategoriesIntoKey(fcNormal, fcNormal):
1706 IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
1707 switch (PackCategoriesIntoKey(category, rhs.category)) {
1709 llvm_unreachable(nullptr);
1711 case PackCategoriesIntoKey(fcZero, fcNaN):
1712 case PackCategoriesIntoKey(fcNormal, fcNaN):
1713 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1716 case PackCategoriesIntoKey(fcNaN, fcZero):
1717 case PackCategoriesIntoKey(fcNaN, fcNormal):
1718 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1719 case PackCategoriesIntoKey(fcNaN, fcNaN):
1720 if (isSignaling()) {
1724 return rhs.isSignaling() ? opInvalidOp : opOK;
1726 case PackCategoriesIntoKey(fcZero, fcInfinity):
1727 case PackCategoriesIntoKey(fcZero, fcNormal):
1728 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1731 case PackCategoriesIntoKey(fcNormal, fcZero):
1732 case PackCategoriesIntoKey(fcInfinity, fcZero):
1733 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1734 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1735 case PackCategoriesIntoKey(fcZero, fcZero):
1739 case PackCategoriesIntoKey(fcNormal, fcNormal):
1740 return opDivByZero; // fake status, indicating this is not a special case
1745 void IEEEFloat::changeSign() {
1746 /* Look mummy, this one's easy. */
1750 /* Normalized addition or subtraction. */
1751 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
1752 roundingMode rounding_mode,
1756 fs = addOrSubtractSpecials(rhs, subtract);
1758 /* This return code means it was not a simple case. */
1759 if (fs == opDivByZero) {
1760 lostFraction lost_fraction;
1762 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1763 fs = normalize(rounding_mode, lost_fraction);
1765 /* Can only be zero if we lost no fraction. */
1766 assert(category != fcZero || lost_fraction == lfExactlyZero);
1769 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1770 positive zero unless rounding to minus infinity, except that
1771 adding two like-signed zeroes gives that zero. */
1772 if (category == fcZero) {
1773 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1774 sign = (rounding_mode == rmTowardNegative);
1780 /* Normalized addition. */
1781 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
1782 roundingMode rounding_mode) {
1783 return addOrSubtract(rhs, rounding_mode, false);
1786 /* Normalized subtraction. */
1787 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
1788 roundingMode rounding_mode) {
1789 return addOrSubtract(rhs, rounding_mode, true);
1792 /* Normalized multiply. */
1793 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
1794 roundingMode rounding_mode) {
1798 fs = multiplySpecials(rhs);
1800 if (isFiniteNonZero()) {
1801 lostFraction lost_fraction = multiplySignificand(rhs);
1802 fs = normalize(rounding_mode, lost_fraction);
1803 if (lost_fraction != lfExactlyZero)
1804 fs = (opStatus) (fs | opInexact);
1810 /* Normalized divide. */
1811 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
1812 roundingMode rounding_mode) {
1816 fs = divideSpecials(rhs);
1818 if (isFiniteNonZero()) {
1819 lostFraction lost_fraction = divideSignificand(rhs);
1820 fs = normalize(rounding_mode, lost_fraction);
1821 if (lost_fraction != lfExactlyZero)
1822 fs = (opStatus) (fs | opInexact);
1828 /* Normalized remainder. */
1829 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
1831 unsigned int origSign = sign;
1833 // First handle the special cases.
1834 fs = remainderSpecials(rhs);
1835 if (fs != opDivByZero)
1840 // Make sure the current value is less than twice the denom. If the addition
1841 // did not succeed (an overflow has happened), which means that the finite
1842 // value we currently posses must be less than twice the denom (as we are
1843 // using the same semantics).
1845 if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
1850 // Lets work with absolute numbers.
1856 // To calculate the remainder we use the following scheme.
1858 // The remainder is defained as follows:
1860 // remainder = numer - rquot * denom = x - r * p
1862 // Where r is the result of: x/p, rounded toward the nearest integral value
1863 // (with halfway cases rounded toward the even number).
1865 // Currently, (after x mod 2p):
1866 // r is the number of 2p's present inside x, which is inherently, an even
1869 // We may split the remaining calculation into 4 options:
1870 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
1871 // - if x == 0.5p then we round to the nearest even number which is 0, and we
1872 // are done as well.
1873 // - if 0.5p < x < p then we round to nearest number which is 1, and we have
1874 // to subtract 1p at least once.
1875 // - if x >= p then we must subtract p at least once, as x must be a
1878 // By now, we were done, or we added 1 to r, which in turn, now an odd number.
1880 // We can now split the remaining calculation to the following 3 options:
1881 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
1882 // - if x == 0.5p then we round to the nearest even number. As r is odd, we
1883 // must round up to the next even number. so we must subtract p once more.
1884 // - if x > 0.5p (and inherently x < p) then we must round r up to the next
1885 // integral, and subtract p once more.
1888 // Extend the semantics to prevent an overflow/underflow or inexact result.
1890 fltSemantics extendedSemantics = *semantics;
1891 extendedSemantics.maxExponent++;
1892 extendedSemantics.minExponent--;
1893 extendedSemantics.precision += 2;
1895 IEEEFloat VEx = *this;
1896 fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
1897 assert(fs == opOK && !losesInfo);
1899 fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
1900 assert(fs == opOK && !losesInfo);
1902 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
1904 fs = VEx.add(VEx, rmNearestTiesToEven);
1907 if (VEx.compare(PEx) == cmpGreaterThan) {
1908 fs = subtract(P, rmNearestTiesToEven);
1911 // Make VEx = this.add(this), but because we have different semantics, we do
1912 // not want to `convert` again, so we just subtract PEx twice (which equals
1913 // to the desired value).
1914 fs = VEx.subtract(PEx, rmNearestTiesToEven);
1916 fs = VEx.subtract(PEx, rmNearestTiesToEven);
1919 cmpResult result = VEx.compare(PEx);
1920 if (result == cmpGreaterThan || result == cmpEqual) {
1921 fs = subtract(P, rmNearestTiesToEven);
1927 sign = origSign; // IEEE754 requires this
1933 /* Normalized llvm frem (C fmod). */
1934 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
1936 fs = modSpecials(rhs);
1937 unsigned int origSign = sign;
1939 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
1940 compareAbsoluteValue(rhs) != cmpLessThan) {
1941 IEEEFloat V = scalbn(rhs, ilogb(*this) - ilogb(rhs), rmNearestTiesToEven);
1942 if (compareAbsoluteValue(V) == cmpLessThan)
1943 V = scalbn(V, -1, rmNearestTiesToEven);
1946 fs = subtract(V, rmNearestTiesToEven);
1950 sign = origSign; // fmod requires this
1954 /* Normalized fused-multiply-add. */
1955 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
1956 const IEEEFloat &addend,
1957 roundingMode rounding_mode) {
1960 /* Post-multiplication sign, before addition. */
1961 sign ^= multiplicand.sign;
1963 /* If and only if all arguments are normal do we need to do an
1964 extended-precision calculation. */
1965 if (isFiniteNonZero() &&
1966 multiplicand.isFiniteNonZero() &&
1967 addend.isFinite()) {
1968 lostFraction lost_fraction;
1970 lost_fraction = multiplySignificand(multiplicand, addend);
1971 fs = normalize(rounding_mode, lost_fraction);
1972 if (lost_fraction != lfExactlyZero)
1973 fs = (opStatus) (fs | opInexact);
1975 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1976 positive zero unless rounding to minus infinity, except that
1977 adding two like-signed zeroes gives that zero. */
1978 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
1979 sign = (rounding_mode == rmTowardNegative);
1981 fs = multiplySpecials(multiplicand);
1983 /* FS can only be opOK or opInvalidOp. There is no more work
1984 to do in the latter case. The IEEE-754R standard says it is
1985 implementation-defined in this case whether, if ADDEND is a
1986 quiet NaN, we raise invalid op; this implementation does so.
1988 If we need to do the addition we can do so with normal
1991 fs = addOrSubtract(addend, rounding_mode, false);
1997 /* Rounding-mode correct round to integral value. */
1998 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
2002 // [IEEE Std 754-2008 6.1]:
2003 // The behavior of infinity in floating-point arithmetic is derived from the
2004 // limiting cases of real arithmetic with operands of arbitrarily
2005 // large magnitude, when such a limit exists.
2007 // Operations on infinite operands are usually exact and therefore signal no
2012 if (isSignaling()) {
2013 // [IEEE Std 754-2008 6.2]:
2014 // Under default exception handling, any operation signaling an invalid
2015 // operation exception and for which a floating-point result is to be
2016 // delivered shall deliver a quiet NaN.
2018 // [IEEE Std 754-2008 6.2]:
2019 // Signaling NaNs shall be reserved operands that, under default exception
2020 // handling, signal the invalid operation exception(see 7.2) for every
2021 // general-computational and signaling-computational operation except for
2022 // the conversions described in 5.12.
2025 // [IEEE Std 754-2008 6.2]:
2026 // For an operation with quiet NaN inputs, other than maximum and minimum
2027 // operations, if a floating-point result is to be delivered the result
2028 // shall be a quiet NaN which should be one of the input NaNs.
2030 // Every general-computational and quiet-computational operation involving
2031 // one or more input NaNs, none of them signaling, shall signal no
2032 // exception, except fusedMultiplyAdd might signal the invalid operation
2033 // exception(see 7.2).
2039 // [IEEE Std 754-2008 6.3]:
2040 // ... the sign of the result of conversions, the quantize operation, the
2041 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2042 // the sign of the first or only operand.
2046 // If the exponent is large enough, we know that this value is already
2047 // integral, and the arithmetic below would potentially cause it to saturate
2048 // to +/-Inf. Bail out early instead.
2049 if (exponent+1 >= (int)semanticsPrecision(*semantics))
2052 // The algorithm here is quite simple: we add 2^(p-1), where p is the
2053 // precision of our format, and then subtract it back off again. The choice
2054 // of rounding modes for the addition/subtraction determines the rounding mode
2055 // for our integral rounding as well.
2056 // NOTE: When the input value is negative, we do subtraction followed by
2057 // addition instead.
2058 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
2059 IntegerConstant <<= semanticsPrecision(*semantics)-1;
2060 IEEEFloat MagicConstant(*semantics);
2061 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2062 rmNearestTiesToEven);
2064 MagicConstant.sign = sign;
2066 // Preserve the input sign so that we can handle the case of zero result
2068 bool inputSign = isNegative();
2070 fs = add(MagicConstant, rounding_mode);
2072 // Current value and 'MagicConstant' are both integers, so the result of the
2073 // subtraction is always exact according to Sterbenz' lemma.
2074 subtract(MagicConstant, rounding_mode);
2076 // Restore the input sign.
2077 if (inputSign != isNegative())
2084 /* Comparison requires normalized numbers. */
2085 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
2088 assert(semantics == rhs.semantics);
2090 switch (PackCategoriesIntoKey(category, rhs.category)) {
2092 llvm_unreachable(nullptr);
2094 case PackCategoriesIntoKey(fcNaN, fcZero):
2095 case PackCategoriesIntoKey(fcNaN, fcNormal):
2096 case PackCategoriesIntoKey(fcNaN, fcInfinity):
2097 case PackCategoriesIntoKey(fcNaN, fcNaN):
2098 case PackCategoriesIntoKey(fcZero, fcNaN):
2099 case PackCategoriesIntoKey(fcNormal, fcNaN):
2100 case PackCategoriesIntoKey(fcInfinity, fcNaN):
2101 return cmpUnordered;
2103 case PackCategoriesIntoKey(fcInfinity, fcNormal):
2104 case PackCategoriesIntoKey(fcInfinity, fcZero):
2105 case PackCategoriesIntoKey(fcNormal, fcZero):
2109 return cmpGreaterThan;
2111 case PackCategoriesIntoKey(fcNormal, fcInfinity):
2112 case PackCategoriesIntoKey(fcZero, fcInfinity):
2113 case PackCategoriesIntoKey(fcZero, fcNormal):
2115 return cmpGreaterThan;
2119 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2120 if (sign == rhs.sign)
2125 return cmpGreaterThan;
2127 case PackCategoriesIntoKey(fcZero, fcZero):
2130 case PackCategoriesIntoKey(fcNormal, fcNormal):
2134 /* Two normal numbers. Do they have the same sign? */
2135 if (sign != rhs.sign) {
2137 result = cmpLessThan;
2139 result = cmpGreaterThan;
2141 /* Compare absolute values; invert result if negative. */
2142 result = compareAbsoluteValue(rhs);
2145 if (result == cmpLessThan)
2146 result = cmpGreaterThan;
2147 else if (result == cmpGreaterThan)
2148 result = cmpLessThan;
2155 /// IEEEFloat::convert - convert a value of one floating point type to another.
2156 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
2157 /// records whether the transformation lost information, i.e. whether
2158 /// converting the result back to the original type will produce the
2159 /// original value (this is almost the same as return value==fsOK, but there
2160 /// are edge cases where this is not so).
2162 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
2163 roundingMode rounding_mode,
2165 lostFraction lostFraction;
2166 unsigned int newPartCount, oldPartCount;
2169 const fltSemantics &fromSemantics = *semantics;
2171 lostFraction = lfExactlyZero;
2172 newPartCount = partCountForBits(toSemantics.precision + 1);
2173 oldPartCount = partCount();
2174 shift = toSemantics.precision - fromSemantics.precision;
2176 bool X86SpecialNan = false;
2177 if (&fromSemantics == &semX87DoubleExtended &&
2178 &toSemantics != &semX87DoubleExtended && category == fcNaN &&
2179 (!(*significandParts() & 0x8000000000000000ULL) ||
2180 !(*significandParts() & 0x4000000000000000ULL))) {
2181 // x86 has some unusual NaNs which cannot be represented in any other
2182 // format; note them here.
2183 X86SpecialNan = true;
2186 // If this is a truncation of a denormal number, and the target semantics
2187 // has larger exponent range than the source semantics (this can happen
2188 // when truncating from PowerPC double-double to double format), the
2189 // right shift could lose result mantissa bits. Adjust exponent instead
2190 // of performing excessive shift.
2191 if (shift < 0 && isFiniteNonZero()) {
2192 int exponentChange = significandMSB() + 1 - fromSemantics.precision;
2193 if (exponent + exponentChange < toSemantics.minExponent)
2194 exponentChange = toSemantics.minExponent - exponent;
2195 if (exponentChange < shift)
2196 exponentChange = shift;
2197 if (exponentChange < 0) {
2198 shift -= exponentChange;
2199 exponent += exponentChange;
2203 // If this is a truncation, perform the shift before we narrow the storage.
2204 if (shift < 0 && (isFiniteNonZero() || category==fcNaN))
2205 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2207 // Fix the storage so it can hold to new value.
2208 if (newPartCount > oldPartCount) {
2209 // The new type requires more storage; make it available.
2210 integerPart *newParts;
2211 newParts = new integerPart[newPartCount];
2212 APInt::tcSet(newParts, 0, newPartCount);
2213 if (isFiniteNonZero() || category==fcNaN)
2214 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2216 significand.parts = newParts;
2217 } else if (newPartCount == 1 && oldPartCount != 1) {
2218 // Switch to built-in storage for a single part.
2219 integerPart newPart = 0;
2220 if (isFiniteNonZero() || category==fcNaN)
2221 newPart = significandParts()[0];
2223 significand.part = newPart;
2226 // Now that we have the right storage, switch the semantics.
2227 semantics = &toSemantics;
2229 // If this is an extension, perform the shift now that the storage is
2231 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2232 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2234 if (isFiniteNonZero()) {
2235 fs = normalize(rounding_mode, lostFraction);
2236 *losesInfo = (fs != opOK);
2237 } else if (category == fcNaN) {
2238 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2240 // For x87 extended precision, we want to make a NaN, not a special NaN if
2241 // the input wasn't special either.
2242 if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2243 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2245 // If we are truncating NaN, it is possible that we shifted out all of the
2246 // set bits in a signalling NaN payload. But NaN must remain NaN, so some
2247 // bit in the significand must be set (otherwise it is Inf).
2248 // This can only happen with sNaN. Set the 1st bit after the quiet bit,
2249 // so that we still have an sNaN.
2250 // FIXME: Set quiet and return opInvalidOp (on convert of any sNaN).
2251 // But this requires fixing LLVM to parse 32-bit hex FP or ignoring
2252 // conversions while parsing IR.
2253 if (APInt::tcIsZero(significandParts(), newPartCount)) {
2254 assert(shift < 0 && "Should not lose NaN payload on extend");
2255 assert(semantics->precision >= 3 && "Unexpectedly narrow significand");
2256 assert(*losesInfo && "Missing payload should have set lost info");
2257 APInt::tcSetBit(significandParts(), semantics->precision - 3);
2260 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
2261 // does not give you back the same bits. This is dubious, and we
2262 // don't currently do it. You're really supposed to get
2263 // an invalid operation signal at runtime, but nobody does that.
2273 /* Convert a floating point number to an integer according to the
2274 rounding mode. If the rounded integer value is out of range this
2275 returns an invalid operation exception and the contents of the
2276 destination parts are unspecified. If the rounded value is in
2277 range but the floating point number is not the exact integer, the C
2278 standard doesn't require an inexact exception to be raised. IEEE
2279 854 does require it so we do that.
2281 Note that for conversions to integer type the C standard requires
2282 round-to-zero to always be used. */
2283 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2284 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2285 roundingMode rounding_mode, bool *isExact) const {
2286 lostFraction lost_fraction;
2287 const integerPart *src;
2288 unsigned int dstPartsCount, truncatedBits;
2292 /* Handle the three special cases first. */
2293 if (category == fcInfinity || category == fcNaN)
2296 dstPartsCount = partCountForBits(width);
2297 assert(dstPartsCount <= parts.size() && "Integer too big");
2299 if (category == fcZero) {
2300 APInt::tcSet(parts.data(), 0, dstPartsCount);
2301 // Negative zero can't be represented as an int.
2306 src = significandParts();
2308 /* Step 1: place our absolute value, with any fraction truncated, in
2311 /* Our absolute value is less than one; truncate everything. */
2312 APInt::tcSet(parts.data(), 0, dstPartsCount);
2313 /* For exponent -1 the integer bit represents .5, look at that.
2314 For smaller exponents leftmost truncated bit is 0. */
2315 truncatedBits = semantics->precision -1U - exponent;
2317 /* We want the most significant (exponent + 1) bits; the rest are
2319 unsigned int bits = exponent + 1U;
2321 /* Hopelessly large in magnitude? */
2325 if (bits < semantics->precision) {
2326 /* We truncate (semantics->precision - bits) bits. */
2327 truncatedBits = semantics->precision - bits;
2328 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2330 /* We want at least as many bits as are available. */
2331 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2333 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2334 bits - semantics->precision);
2339 /* Step 2: work out any lost fraction, and increment the absolute
2340 value if we would round away from zero. */
2341 if (truncatedBits) {
2342 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2344 if (lost_fraction != lfExactlyZero &&
2345 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2346 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2347 return opInvalidOp; /* Overflow. */
2350 lost_fraction = lfExactlyZero;
2353 /* Step 3: check if we fit in the destination. */
2354 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2358 /* Negative numbers cannot be represented as unsigned. */
2362 /* It takes omsb bits to represent the unsigned integer value.
2363 We lose a bit for the sign, but care is needed as the
2364 maximally negative integer is a special case. */
2365 if (omsb == width &&
2366 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2369 /* This case can happen because of rounding. */
2374 APInt::tcNegate (parts.data(), dstPartsCount);
2376 if (omsb >= width + !isSigned)
2380 if (lost_fraction == lfExactlyZero) {
2387 /* Same as convertToSignExtendedInteger, except we provide
2388 deterministic values in case of an invalid operation exception,
2389 namely zero for NaNs and the minimal or maximal value respectively
2390 for underflow or overflow.
2391 The *isExact output tells whether the result is exact, in the sense
2392 that converting it back to the original floating point type produces
2393 the original value. This is almost equivalent to result==opOK,
2394 except for negative zeroes.
2397 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2398 unsigned int width, bool isSigned,
2399 roundingMode rounding_mode, bool *isExact) const {
2402 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2405 if (fs == opInvalidOp) {
2406 unsigned int bits, dstPartsCount;
2408 dstPartsCount = partCountForBits(width);
2409 assert(dstPartsCount <= parts.size() && "Integer too big");
2411 if (category == fcNaN)
2416 bits = width - isSigned;
2418 APInt::tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2419 if (sign && isSigned)
2420 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2426 /* Convert an unsigned integer SRC to a floating point number,
2427 rounding according to ROUNDING_MODE. The sign of the floating
2428 point number is not modified. */
2429 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2430 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2431 unsigned int omsb, precision, dstCount;
2433 lostFraction lost_fraction;
2435 category = fcNormal;
2436 omsb = APInt::tcMSB(src, srcCount) + 1;
2437 dst = significandParts();
2438 dstCount = partCount();
2439 precision = semantics->precision;
2441 /* We want the most significant PRECISION bits of SRC. There may not
2442 be that many; extract what we can. */
2443 if (precision <= omsb) {
2444 exponent = omsb - 1;
2445 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2447 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2449 exponent = precision - 1;
2450 lost_fraction = lfExactlyZero;
2451 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2454 return normalize(rounding_mode, lost_fraction);
2457 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2458 roundingMode rounding_mode) {
2459 unsigned int partCount = Val.getNumWords();
2463 if (isSigned && api.isNegative()) {
2468 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2471 /* Convert a two's complement integer SRC to a floating point number,
2472 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2473 integer is signed, in which case it must be sign-extended. */
2475 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2476 unsigned int srcCount, bool isSigned,
2477 roundingMode rounding_mode) {
2481 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2484 /* If we're signed and negative negate a copy. */
2486 copy = new integerPart[srcCount];
2487 APInt::tcAssign(copy, src, srcCount);
2488 APInt::tcNegate(copy, srcCount);
2489 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2493 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2499 /* FIXME: should this just take a const APInt reference? */
2501 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2502 unsigned int width, bool isSigned,
2503 roundingMode rounding_mode) {
2504 unsigned int partCount = partCountForBits(width);
2505 APInt api = APInt(width, makeArrayRef(parts, partCount));
2508 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2513 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2516 Expected<IEEEFloat::opStatus>
2517 IEEEFloat::convertFromHexadecimalString(StringRef s,
2518 roundingMode rounding_mode) {
2519 lostFraction lost_fraction = lfExactlyZero;
2521 category = fcNormal;
2525 integerPart *significand = significandParts();
2526 unsigned partsCount = partCount();
2527 unsigned bitPos = partsCount * integerPartWidth;
2528 bool computedTrailingFraction = false;
2530 // Skip leading zeroes and any (hexa)decimal point.
2531 StringRef::iterator begin = s.begin();
2532 StringRef::iterator end = s.end();
2533 StringRef::iterator dot;
2534 auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2536 return PtrOrErr.takeError();
2537 StringRef::iterator p = *PtrOrErr;
2538 StringRef::iterator firstSignificantDigit = p;
2541 integerPart hex_value;
2545 return createError("String contains multiple dots");
2550 hex_value = hexDigitValue(*p);
2551 if (hex_value == -1U)
2556 // Store the number while we have space.
2559 hex_value <<= bitPos % integerPartWidth;
2560 significand[bitPos / integerPartWidth] |= hex_value;
2561 } else if (!computedTrailingFraction) {
2562 auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2564 return FractOrErr.takeError();
2565 lost_fraction = *FractOrErr;
2566 computedTrailingFraction = true;
2570 /* Hex floats require an exponent but not a hexadecimal point. */
2572 return createError("Hex strings require an exponent");
2573 if (*p != 'p' && *p != 'P')
2574 return createError("Invalid character in significand");
2576 return createError("Significand has no digits");
2577 if (dot != end && p - begin == 1)
2578 return createError("Significand has no digits");
2580 /* Ignore the exponent if we are zero. */
2581 if (p != firstSignificantDigit) {
2584 /* Implicit hexadecimal point? */
2588 /* Calculate the exponent adjustment implicit in the number of
2589 significant digits. */
2590 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2591 if (expAdjustment < 0)
2593 expAdjustment = expAdjustment * 4 - 1;
2595 /* Adjust for writing the significand starting at the most
2596 significant nibble. */
2597 expAdjustment += semantics->precision;
2598 expAdjustment -= partsCount * integerPartWidth;
2600 /* Adjust for the given exponent. */
2601 auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2603 return ExpOrErr.takeError();
2604 exponent = *ExpOrErr;
2607 return normalize(rounding_mode, lost_fraction);
2611 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2612 unsigned sigPartCount, int exp,
2613 roundingMode rounding_mode) {
2614 unsigned int parts, pow5PartCount;
2615 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2616 integerPart pow5Parts[maxPowerOfFiveParts];
2619 isNearest = (rounding_mode == rmNearestTiesToEven ||
2620 rounding_mode == rmNearestTiesToAway);
2622 parts = partCountForBits(semantics->precision + 11);
2624 /* Calculate pow(5, abs(exp)). */
2625 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2627 for (;; parts *= 2) {
2628 opStatus sigStatus, powStatus;
2629 unsigned int excessPrecision, truncatedBits;
2631 calcSemantics.precision = parts * integerPartWidth - 1;
2632 excessPrecision = calcSemantics.precision - semantics->precision;
2633 truncatedBits = excessPrecision;
2635 IEEEFloat decSig(calcSemantics, uninitialized);
2636 decSig.makeZero(sign);
2637 IEEEFloat pow5(calcSemantics);
2639 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2640 rmNearestTiesToEven);
2641 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2642 rmNearestTiesToEven);
2643 /* Add exp, as 10^n = 5^n * 2^n. */
2644 decSig.exponent += exp;
2646 lostFraction calcLostFraction;
2647 integerPart HUerr, HUdistance;
2648 unsigned int powHUerr;
2651 /* multiplySignificand leaves the precision-th bit set to 1. */
2652 calcLostFraction = decSig.multiplySignificand(pow5);
2653 powHUerr = powStatus != opOK;
2655 calcLostFraction = decSig.divideSignificand(pow5);
2656 /* Denormal numbers have less precision. */
2657 if (decSig.exponent < semantics->minExponent) {
2658 excessPrecision += (semantics->minExponent - decSig.exponent);
2659 truncatedBits = excessPrecision;
2660 if (excessPrecision > calcSemantics.precision)
2661 excessPrecision = calcSemantics.precision;
2663 /* Extra half-ulp lost in reciprocal of exponent. */
2664 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2667 /* Both multiplySignificand and divideSignificand return the
2668 result with the integer bit set. */
2669 assert(APInt::tcExtractBit
2670 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2672 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2674 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2675 excessPrecision, isNearest);
2677 /* Are we guaranteed to round correctly if we truncate? */
2678 if (HUdistance >= HUerr) {
2679 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2680 calcSemantics.precision - excessPrecision,
2682 /* Take the exponent of decSig. If we tcExtract-ed less bits
2683 above we must adjust our exponent to compensate for the
2684 implicit right shift. */
2685 exponent = (decSig.exponent + semantics->precision
2686 - (calcSemantics.precision - excessPrecision));
2687 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2690 return normalize(rounding_mode, calcLostFraction);
2695 Expected<IEEEFloat::opStatus>
2696 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2700 /* Scan the text. */
2701 StringRef::iterator p = str.begin();
2702 if (Error Err = interpretDecimal(p, str.end(), &D))
2703 return std::move(Err);
2705 /* Handle the quick cases. First the case of no significant digits,
2706 i.e. zero, and then exponents that are obviously too large or too
2707 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2708 definitely overflows if
2710 (exp - 1) * L >= maxExponent
2712 and definitely underflows to zero where
2714 (exp + 1) * L <= minExponent - precision
2716 With integer arithmetic the tightest bounds for L are
2718 93/28 < L < 196/59 [ numerator <= 256 ]
2719 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2722 // Test if we have a zero number allowing for strings with no null terminators
2723 // and zero decimals with non-zero exponents.
2725 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2726 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2727 // be at most one dot. On the other hand, if we have a zero with a non-zero
2728 // exponent, then we know that D.firstSigDigit will be non-numeric.
2729 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2733 /* Check whether the normalized exponent is high enough to overflow
2734 max during the log-rebasing in the max-exponent check below. */
2735 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2736 fs = handleOverflow(rounding_mode);
2738 /* If it wasn't, then it also wasn't high enough to overflow max
2739 during the log-rebasing in the min-exponent check. Check that it
2740 won't overflow min in either check, then perform the min-exponent
2742 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2743 (D.normalizedExponent + 1) * 28738 <=
2744 8651 * (semantics->minExponent - (int) semantics->precision)) {
2745 /* Underflow to zero and round. */
2746 category = fcNormal;
2748 fs = normalize(rounding_mode, lfLessThanHalf);
2750 /* We can finally safely perform the max-exponent check. */
2751 } else if ((D.normalizedExponent - 1) * 42039
2752 >= 12655 * semantics->maxExponent) {
2753 /* Overflow and round. */
2754 fs = handleOverflow(rounding_mode);
2756 integerPart *decSignificand;
2757 unsigned int partCount;
2759 /* A tight upper bound on number of bits required to hold an
2760 N-digit decimal integer is N * 196 / 59. Allocate enough space
2761 to hold the full significand, and an extra part required by
2763 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2764 partCount = partCountForBits(1 + 196 * partCount / 59);
2765 decSignificand = new integerPart[partCount + 1];
2768 /* Convert to binary efficiently - we do almost all multiplication
2769 in an integerPart. When this would overflow do we do a single
2770 bignum multiplication, and then revert again to multiplication
2771 in an integerPart. */
2773 integerPart decValue, val, multiplier;
2781 if (p == str.end()) {
2785 decValue = decDigitValue(*p++);
2786 if (decValue >= 10U) {
2787 delete[] decSignificand;
2788 return createError("Invalid character in significand");
2791 val = val * 10 + decValue;
2792 /* The maximum number that can be multiplied by ten with any
2793 digit added without overflowing an integerPart. */
2794 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2796 /* Multiply out the current part. */
2797 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2798 partCount, partCount + 1, false);
2800 /* If we used another part (likely but not guaranteed), increase
2802 if (decSignificand[partCount])
2804 } while (p <= D.lastSigDigit);
2806 category = fcNormal;
2807 fs = roundSignificandWithExponent(decSignificand, partCount,
2808 D.exponent, rounding_mode);
2810 delete [] decSignificand;
2816 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
2817 const size_t MIN_NAME_SIZE = 3;
2819 if (str.size() < MIN_NAME_SIZE)
2822 if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) {
2827 bool IsNegative = str.front() == '-';
2829 str = str.drop_front();
2830 if (str.size() < MIN_NAME_SIZE)
2833 if (str.equals("inf") || str.equals("INFINITY") || str.equals("Inf")) {
2839 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
2840 bool IsSignaling = str.front() == 's' || str.front() == 'S';
2842 str = str.drop_front();
2843 if (str.size() < MIN_NAME_SIZE)
2847 if (str.startswith("nan") || str.startswith("NaN")) {
2848 str = str.drop_front(3);
2850 // A NaN without payload.
2852 makeNaN(IsSignaling, IsNegative);
2856 // Allow the payload to be inside parentheses.
2857 if (str.front() == '(') {
2858 // Parentheses should be balanced (and not empty).
2859 if (str.size() <= 2 || str.back() != ')')
2862 str = str.slice(1, str.size() - 1);
2865 // Determine the payload number's radix.
2866 unsigned Radix = 10;
2867 if (str[0] == '0') {
2868 if (str.size() > 1 && tolower(str[1]) == 'x') {
2869 str = str.drop_front(2);
2875 // Parse the payload and make the NaN.
2877 if (!str.getAsInteger(Radix, Payload)) {
2878 makeNaN(IsSignaling, IsNegative, &Payload);
2886 Expected<IEEEFloat::opStatus>
2887 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) {
2889 return createError("Invalid string length");
2891 // Handle special cases.
2892 if (convertFromStringSpecials(str))
2895 /* Handle a leading minus sign. */
2896 StringRef::iterator p = str.begin();
2897 size_t slen = str.size();
2898 sign = *p == '-' ? 1 : 0;
2899 if (*p == '-' || *p == '+') {
2903 return createError("String has no digits");
2906 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2908 return createError("Invalid string");
2909 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2913 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2916 /* Write out a hexadecimal representation of the floating point value
2917 to DST, which must be of sufficient size, in the C99 form
2918 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2919 excluding the terminating NUL.
2921 If UPPERCASE, the output is in upper case, otherwise in lower case.
2923 HEXDIGITS digits appear altogether, rounding the value if
2924 necessary. If HEXDIGITS is 0, the minimal precision to display the
2925 number precisely is used instead. If nothing would appear after
2926 the decimal point it is suppressed.
2928 The decimal exponent is always printed and has at least one digit.
2929 Zero values display an exponent of zero. Infinities and NaNs
2930 appear as "infinity" or "nan" respectively.
2932 The above rules are as specified by C99. There is ambiguity about
2933 what the leading hexadecimal digit should be. This implementation
2934 uses whatever is necessary so that the exponent is displayed as
2935 stored. This implies the exponent will fall within the IEEE format
2936 range, and the leading hexadecimal digit will be 0 (for denormals),
2937 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2938 any other digits zero).
2940 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
2942 roundingMode rounding_mode) const {
2951 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2952 dst += sizeof infinityL - 1;
2956 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2957 dst += sizeof NaNU - 1;
2962 *dst++ = upperCase ? 'X': 'x';
2964 if (hexDigits > 1) {
2966 memset (dst, '0', hexDigits - 1);
2967 dst += hexDigits - 1;
2969 *dst++ = upperCase ? 'P': 'p';
2974 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2980 return static_cast<unsigned int>(dst - p);
2983 /* Does the hard work of outputting the correctly rounded hexadecimal
2984 form of a normal floating point number with the specified number of
2985 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2986 digits necessary to print the value precisely is output. */
2987 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2989 roundingMode rounding_mode) const {
2990 unsigned int count, valueBits, shift, partsCount, outputDigits;
2991 const char *hexDigitChars;
2992 const integerPart *significand;
2997 *dst++ = upperCase ? 'X': 'x';
3000 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
3002 significand = significandParts();
3003 partsCount = partCount();
3005 /* +3 because the first digit only uses the single integer bit, so
3006 we have 3 virtual zero most-significant-bits. */
3007 valueBits = semantics->precision + 3;
3008 shift = integerPartWidth - valueBits % integerPartWidth;
3010 /* The natural number of digits required ignoring trailing
3011 insignificant zeroes. */
3012 outputDigits = (valueBits - significandLSB () + 3) / 4;
3014 /* hexDigits of zero means use the required number for the
3015 precision. Otherwise, see if we are truncating. If we are,
3016 find out if we need to round away from zero. */
3018 if (hexDigits < outputDigits) {
3019 /* We are dropping non-zero bits, so need to check how to round.
3020 "bits" is the number of dropped bits. */
3022 lostFraction fraction;
3024 bits = valueBits - hexDigits * 4;
3025 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3026 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3028 outputDigits = hexDigits;
3031 /* Write the digits consecutively, and start writing in the location
3032 of the hexadecimal point. We move the most significant digit
3033 left and add the hexadecimal point later. */
3036 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3038 while (outputDigits && count) {
3041 /* Put the most significant integerPartWidth bits in "part". */
3042 if (--count == partsCount)
3043 part = 0; /* An imaginary higher zero part. */
3045 part = significand[count] << shift;
3048 part |= significand[count - 1] >> (integerPartWidth - shift);
3050 /* Convert as much of "part" to hexdigits as we can. */
3051 unsigned int curDigits = integerPartWidth / 4;
3053 if (curDigits > outputDigits)
3054 curDigits = outputDigits;
3055 dst += partAsHex (dst, part, curDigits, hexDigitChars);
3056 outputDigits -= curDigits;
3062 /* Note that hexDigitChars has a trailing '0'. */
3065 *q = hexDigitChars[hexDigitValue (*q) + 1];
3066 } while (*q == '0');
3069 /* Add trailing zeroes. */
3070 memset (dst, '0', outputDigits);
3071 dst += outputDigits;
3074 /* Move the most significant digit to before the point, and if there
3075 is something after the decimal point add it. This must come
3076 after rounding above. */
3083 /* Finally output the exponent. */
3084 *dst++ = upperCase ? 'P': 'p';
3086 return writeSignedDecimal (dst, exponent);
3089 hash_code hash_value(const IEEEFloat &Arg) {
3090 if (!Arg.isFiniteNonZero())
3091 return hash_combine((uint8_t)Arg.category,
3092 // NaN has no sign, fix it at zero.
3093 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3094 Arg.semantics->precision);
3096 // Normal floats need their exponent and significand hashed.
3097 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3098 Arg.semantics->precision, Arg.exponent,
3100 Arg.significandParts(),
3101 Arg.significandParts() + Arg.partCount()));
3104 // Conversion from APFloat to/from host float/double. It may eventually be
3105 // possible to eliminate these and have everybody deal with APFloats, but that
3106 // will take a while. This approach will not easily extend to long double.
3107 // Current implementation requires integerPartWidth==64, which is correct at
3108 // the moment but could be made more general.
3110 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
3111 // the actual IEEE respresentations. We compensate for that here.
3113 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3114 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
3115 assert(partCount()==2);
3117 uint64_t myexponent, mysignificand;
3119 if (isFiniteNonZero()) {
3120 myexponent = exponent+16383; //bias
3121 mysignificand = significandParts()[0];
3122 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3123 myexponent = 0; // denormal
3124 } else if (category==fcZero) {
3127 } else if (category==fcInfinity) {
3128 myexponent = 0x7fff;
3129 mysignificand = 0x8000000000000000ULL;
3131 assert(category == fcNaN && "Unknown category");
3132 myexponent = 0x7fff;
3133 mysignificand = significandParts()[0];
3137 words[0] = mysignificand;
3138 words[1] = ((uint64_t)(sign & 1) << 15) |
3139 (myexponent & 0x7fffLL);
3140 return APInt(80, words);
3143 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
3144 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
3145 assert(partCount()==2);
3151 // Convert number to double. To avoid spurious underflows, we re-
3152 // normalize against the "double" minExponent first, and only *then*
3153 // truncate the mantissa. The result of that second conversion
3154 // may be inexact, but should never underflow.
3155 // Declare fltSemantics before APFloat that uses it (and
3156 // saves pointer to it) to ensure correct destruction order.
3157 fltSemantics extendedSemantics = *semantics;
3158 extendedSemantics.minExponent = semIEEEdouble.minExponent;
3159 IEEEFloat extended(*this);
3160 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3161 assert(fs == opOK && !losesInfo);
3164 IEEEFloat u(extended);
3165 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3166 assert(fs == opOK || fs == opInexact);
3168 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3170 // If conversion was exact or resulted in a special case, we're done;
3171 // just set the second double to zero. Otherwise, re-convert back to
3172 // the extended format and compute the difference. This now should
3173 // convert exactly to double.
3174 if (u.isFiniteNonZero() && losesInfo) {
3175 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3176 assert(fs == opOK && !losesInfo);
3179 IEEEFloat v(extended);
3180 v.subtract(u, rmNearestTiesToEven);
3181 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3182 assert(fs == opOK && !losesInfo);
3184 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3189 return APInt(128, words);
3192 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3193 assert(semantics == (const llvm::fltSemantics*)&semIEEEquad);
3194 assert(partCount()==2);
3196 uint64_t myexponent, mysignificand, mysignificand2;
3198 if (isFiniteNonZero()) {
3199 myexponent = exponent+16383; //bias
3200 mysignificand = significandParts()[0];
3201 mysignificand2 = significandParts()[1];
3202 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
3203 myexponent = 0; // denormal
3204 } else if (category==fcZero) {
3206 mysignificand = mysignificand2 = 0;
3207 } else if (category==fcInfinity) {
3208 myexponent = 0x7fff;
3209 mysignificand = mysignificand2 = 0;
3211 assert(category == fcNaN && "Unknown category!");
3212 myexponent = 0x7fff;
3213 mysignificand = significandParts()[0];
3214 mysignificand2 = significandParts()[1];
3218 words[0] = mysignificand;
3219 words[1] = ((uint64_t)(sign & 1) << 63) |
3220 ((myexponent & 0x7fff) << 48) |
3221 (mysignificand2 & 0xffffffffffffLL);
3223 return APInt(128, words);
3226 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3227 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble);
3228 assert(partCount()==1);
3230 uint64_t myexponent, mysignificand;
3232 if (isFiniteNonZero()) {
3233 myexponent = exponent+1023; //bias
3234 mysignificand = *significandParts();
3235 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
3236 myexponent = 0; // denormal
3237 } else if (category==fcZero) {
3240 } else if (category==fcInfinity) {
3244 assert(category == fcNaN && "Unknown category!");
3246 mysignificand = *significandParts();
3249 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
3250 ((myexponent & 0x7ff) << 52) |
3251 (mysignificand & 0xfffffffffffffLL))));
3254 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3255 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle);
3256 assert(partCount()==1);
3258 uint32_t myexponent, mysignificand;
3260 if (isFiniteNonZero()) {
3261 myexponent = exponent+127; //bias
3262 mysignificand = (uint32_t)*significandParts();
3263 if (myexponent == 1 && !(mysignificand & 0x800000))
3264 myexponent = 0; // denormal
3265 } else if (category==fcZero) {
3268 } else if (category==fcInfinity) {
3272 assert(category == fcNaN && "Unknown category!");
3274 mysignificand = (uint32_t)*significandParts();
3277 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
3278 (mysignificand & 0x7fffff)));
3281 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3282 assert(semantics == (const llvm::fltSemantics *)&semBFloat);
3283 assert(partCount() == 1);
3285 uint32_t myexponent, mysignificand;
3287 if (isFiniteNonZero()) {
3288 myexponent = exponent + 127; // bias
3289 mysignificand = (uint32_t)*significandParts();
3290 if (myexponent == 1 && !(mysignificand & 0x80))
3291 myexponent = 0; // denormal
3292 } else if (category == fcZero) {
3295 } else if (category == fcInfinity) {
3299 assert(category == fcNaN && "Unknown category!");
3301 mysignificand = (uint32_t)*significandParts();
3304 return APInt(16, (((sign & 1) << 15) | ((myexponent & 0xff) << 7) |
3305 (mysignificand & 0x7f)));
3308 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3309 assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf);
3310 assert(partCount()==1);
3312 uint32_t myexponent, mysignificand;
3314 if (isFiniteNonZero()) {
3315 myexponent = exponent+15; //bias
3316 mysignificand = (uint32_t)*significandParts();
3317 if (myexponent == 1 && !(mysignificand & 0x400))
3318 myexponent = 0; // denormal
3319 } else if (category==fcZero) {
3322 } else if (category==fcInfinity) {
3326 assert(category == fcNaN && "Unknown category!");
3328 mysignificand = (uint32_t)*significandParts();
3331 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
3332 (mysignificand & 0x3ff)));
3335 // This function creates an APInt that is just a bit map of the floating
3336 // point constant as it would appear in memory. It is not a conversion,
3337 // and treating the result as a normal integer is unlikely to be useful.
3339 APInt IEEEFloat::bitcastToAPInt() const {
3340 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3341 return convertHalfAPFloatToAPInt();
3343 if (semantics == (const llvm::fltSemantics *)&semBFloat)
3344 return convertBFloatAPFloatToAPInt();
3346 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3347 return convertFloatAPFloatToAPInt();
3349 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3350 return convertDoubleAPFloatToAPInt();
3352 if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3353 return convertQuadrupleAPFloatToAPInt();
3355 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3356 return convertPPCDoubleDoubleAPFloatToAPInt();
3358 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3360 return convertF80LongDoubleAPFloatToAPInt();
3363 float IEEEFloat::convertToFloat() const {
3364 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3365 "Float semantics are not IEEEsingle");
3366 APInt api = bitcastToAPInt();
3367 return api.bitsToFloat();
3370 double IEEEFloat::convertToDouble() const {
3371 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3372 "Float semantics are not IEEEdouble");
3373 APInt api = bitcastToAPInt();
3374 return api.bitsToDouble();
3377 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3378 /// does not support these bit patterns:
3379 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3380 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3381 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3382 /// exponent = 0, integer bit 1 ("pseudodenormal")
3383 /// At the moment, the first three are treated as NaNs, the last one as Normal.
3384 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3385 assert(api.getBitWidth()==80);
3386 uint64_t i1 = api.getRawData()[0];
3387 uint64_t i2 = api.getRawData()[1];
3388 uint64_t myexponent = (i2 & 0x7fff);
3389 uint64_t mysignificand = i1;
3390 uint8_t myintegerbit = mysignificand >> 63;
3392 initialize(&semX87DoubleExtended);
3393 assert(partCount()==2);
3395 sign = static_cast<unsigned int>(i2>>15);
3396 if (myexponent == 0 && mysignificand == 0) {
3397 // exponent, significand meaningless
3399 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3400 // exponent, significand meaningless
3401 category = fcInfinity;
3402 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3403 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3404 // exponent meaningless
3406 significandParts()[0] = mysignificand;
3407 significandParts()[1] = 0;
3409 category = fcNormal;
3410 exponent = myexponent - 16383;
3411 significandParts()[0] = mysignificand;
3412 significandParts()[1] = 0;
3413 if (myexponent==0) // denormal
3418 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3419 assert(api.getBitWidth()==128);
3420 uint64_t i1 = api.getRawData()[0];
3421 uint64_t i2 = api.getRawData()[1];
3425 // Get the first double and convert to our format.
3426 initFromDoubleAPInt(APInt(64, i1));
3427 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3428 assert(fs == opOK && !losesInfo);
3431 // Unless we have a special case, add in second double.
3432 if (isFiniteNonZero()) {
3433 IEEEFloat v(semIEEEdouble, APInt(64, i2));
3434 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3435 assert(fs == opOK && !losesInfo);
3438 add(v, rmNearestTiesToEven);
3442 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3443 assert(api.getBitWidth()==128);
3444 uint64_t i1 = api.getRawData()[0];
3445 uint64_t i2 = api.getRawData()[1];
3446 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3447 uint64_t mysignificand = i1;
3448 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3450 initialize(&semIEEEquad);
3451 assert(partCount()==2);
3453 sign = static_cast<unsigned int>(i2>>63);
3454 if (myexponent==0 &&
3455 (mysignificand==0 && mysignificand2==0)) {
3456 // exponent, significand meaningless
3458 } else if (myexponent==0x7fff &&
3459 (mysignificand==0 && mysignificand2==0)) {
3460 // exponent, significand meaningless
3461 category = fcInfinity;
3462 } else if (myexponent==0x7fff &&
3463 (mysignificand!=0 || mysignificand2 !=0)) {
3464 // exponent meaningless
3466 significandParts()[0] = mysignificand;
3467 significandParts()[1] = mysignificand2;
3469 category = fcNormal;
3470 exponent = myexponent - 16383;
3471 significandParts()[0] = mysignificand;
3472 significandParts()[1] = mysignificand2;
3473 if (myexponent==0) // denormal
3476 significandParts()[1] |= 0x1000000000000LL; // integer bit
3480 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3481 assert(api.getBitWidth()==64);
3482 uint64_t i = *api.getRawData();
3483 uint64_t myexponent = (i >> 52) & 0x7ff;
3484 uint64_t mysignificand = i & 0xfffffffffffffLL;
3486 initialize(&semIEEEdouble);
3487 assert(partCount()==1);
3489 sign = static_cast<unsigned int>(i>>63);
3490 if (myexponent==0 && mysignificand==0) {
3491 // exponent, significand meaningless
3493 } else if (myexponent==0x7ff && mysignificand==0) {
3494 // exponent, significand meaningless
3495 category = fcInfinity;
3496 } else if (myexponent==0x7ff && mysignificand!=0) {
3497 // exponent meaningless
3499 *significandParts() = mysignificand;
3501 category = fcNormal;
3502 exponent = myexponent - 1023;
3503 *significandParts() = mysignificand;
3504 if (myexponent==0) // denormal
3507 *significandParts() |= 0x10000000000000LL; // integer bit
3511 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3512 assert(api.getBitWidth()==32);
3513 uint32_t i = (uint32_t)*api.getRawData();
3514 uint32_t myexponent = (i >> 23) & 0xff;
3515 uint32_t mysignificand = i & 0x7fffff;
3517 initialize(&semIEEEsingle);
3518 assert(partCount()==1);
3521 if (myexponent==0 && mysignificand==0) {
3522 // exponent, significand meaningless
3524 } else if (myexponent==0xff && mysignificand==0) {
3525 // exponent, significand meaningless
3526 category = fcInfinity;
3527 } else if (myexponent==0xff && mysignificand!=0) {
3528 // sign, exponent, significand meaningless
3530 *significandParts() = mysignificand;
3532 category = fcNormal;
3533 exponent = myexponent - 127; //bias
3534 *significandParts() = mysignificand;
3535 if (myexponent==0) // denormal
3538 *significandParts() |= 0x800000; // integer bit
3542 void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
3543 assert(api.getBitWidth() == 16);
3544 uint32_t i = (uint32_t)*api.getRawData();
3545 uint32_t myexponent = (i >> 7) & 0xff;
3546 uint32_t mysignificand = i & 0x7f;
3548 initialize(&semBFloat);
3549 assert(partCount() == 1);
3552 if (myexponent == 0 && mysignificand == 0) {
3553 // exponent, significand meaningless
3555 } else if (myexponent == 0xff && mysignificand == 0) {
3556 // exponent, significand meaningless
3557 category = fcInfinity;
3558 } else if (myexponent == 0xff && mysignificand != 0) {
3559 // sign, exponent, significand meaningless
3561 *significandParts() = mysignificand;
3563 category = fcNormal;
3564 exponent = myexponent - 127; // bias
3565 *significandParts() = mysignificand;
3566 if (myexponent == 0) // denormal
3569 *significandParts() |= 0x80; // integer bit
3573 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3574 assert(api.getBitWidth()==16);
3575 uint32_t i = (uint32_t)*api.getRawData();
3576 uint32_t myexponent = (i >> 10) & 0x1f;
3577 uint32_t mysignificand = i & 0x3ff;
3579 initialize(&semIEEEhalf);
3580 assert(partCount()==1);
3583 if (myexponent==0 && mysignificand==0) {
3584 // exponent, significand meaningless
3586 } else if (myexponent==0x1f && mysignificand==0) {
3587 // exponent, significand meaningless
3588 category = fcInfinity;
3589 } else if (myexponent==0x1f && mysignificand!=0) {
3590 // sign, exponent, significand meaningless
3592 *significandParts() = mysignificand;
3594 category = fcNormal;
3595 exponent = myexponent - 15; //bias
3596 *significandParts() = mysignificand;
3597 if (myexponent==0) // denormal
3600 *significandParts() |= 0x400; // integer bit
3604 /// Treat api as containing the bits of a floating point number. Currently
3605 /// we infer the floating point type from the size of the APInt. The
3606 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3607 /// when the size is anything else).
3608 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3609 if (Sem == &semIEEEhalf)
3610 return initFromHalfAPInt(api);
3611 if (Sem == &semBFloat)
3612 return initFromBFloatAPInt(api);
3613 if (Sem == &semIEEEsingle)
3614 return initFromFloatAPInt(api);
3615 if (Sem == &semIEEEdouble)
3616 return initFromDoubleAPInt(api);
3617 if (Sem == &semX87DoubleExtended)
3618 return initFromF80LongDoubleAPInt(api);
3619 if (Sem == &semIEEEquad)
3620 return initFromQuadrupleAPInt(api);
3621 if (Sem == &semPPCDoubleDoubleLegacy)
3622 return initFromPPCDoubleDoubleAPInt(api);
3624 llvm_unreachable(nullptr);
3627 /// Make this number the largest magnitude normal number in the given
3629 void IEEEFloat::makeLargest(bool Negative) {
3630 // We want (in interchange format):
3631 // sign = {Negative}
3633 // significand = 1..1
3634 category = fcNormal;
3636 exponent = semantics->maxExponent;
3638 // Use memset to set all but the highest integerPart to all ones.
3639 integerPart *significand = significandParts();
3640 unsigned PartCount = partCount();
3641 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3643 // Set the high integerPart especially setting all unused top bits for
3644 // internal consistency.
3645 const unsigned NumUnusedHighBits =
3646 PartCount*integerPartWidth - semantics->precision;
3647 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3648 ? (~integerPart(0) >> NumUnusedHighBits)
3652 /// Make this number the smallest magnitude denormal number in the given
3654 void IEEEFloat::makeSmallest(bool Negative) {
3655 // We want (in interchange format):
3656 // sign = {Negative}
3658 // significand = 0..01
3659 category = fcNormal;
3661 exponent = semantics->minExponent;
3662 APInt::tcSet(significandParts(), 1, partCount());
3665 void IEEEFloat::makeSmallestNormalized(bool Negative) {
3666 // We want (in interchange format):
3667 // sign = {Negative}
3669 // significand = 10..0
3671 category = fcNormal;
3674 exponent = semantics->minExponent;
3675 significandParts()[partCountForBits(semantics->precision) - 1] |=
3676 (((integerPart)1) << ((semantics->precision - 1) % integerPartWidth));
3679 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
3680 initFromAPInt(&Sem, API);
3683 IEEEFloat::IEEEFloat(float f) {
3684 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
3687 IEEEFloat::IEEEFloat(double d) {
3688 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
3692 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3693 Buffer.append(Str.begin(), Str.end());
3696 /// Removes data from the given significand until it is no more
3697 /// precise than is required for the desired precision.
3698 void AdjustToPrecision(APInt &significand,
3699 int &exp, unsigned FormatPrecision) {
3700 unsigned bits = significand.getActiveBits();
3702 // 196/59 is a very slight overestimate of lg_2(10).
3703 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3705 if (bits <= bitsRequired) return;
3707 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3708 if (!tensRemovable) return;
3710 exp += tensRemovable;
3712 APInt divisor(significand.getBitWidth(), 1);
3713 APInt powten(significand.getBitWidth(), 10);
3715 if (tensRemovable & 1)
3717 tensRemovable >>= 1;
3718 if (!tensRemovable) break;
3722 significand = significand.udiv(divisor);
3724 // Truncate the significand down to its active bit count.
3725 significand = significand.trunc(significand.getActiveBits());
3729 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3730 int &exp, unsigned FormatPrecision) {
3731 unsigned N = buffer.size();
3732 if (N <= FormatPrecision) return;
3734 // The most significant figures are the last ones in the buffer.
3735 unsigned FirstSignificant = N - FormatPrecision;
3738 // FIXME: this probably shouldn't use 'round half up'.
3740 // Rounding down is just a truncation, except we also want to drop
3741 // trailing zeros from the new result.
3742 if (buffer[FirstSignificant - 1] < '5') {
3743 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3746 exp += FirstSignificant;
3747 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3751 // Rounding up requires a decimal add-with-carry. If we continue
3752 // the carry, the newly-introduced zeros will just be truncated.
3753 for (unsigned I = FirstSignificant; I != N; ++I) {
3754 if (buffer[I] == '9') {
3762 // If we carried through, we have exactly one digit of precision.
3763 if (FirstSignificant == N) {
3764 exp += FirstSignificant;
3766 buffer.push_back('1');
3770 exp += FirstSignificant;
3771 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3775 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
3776 unsigned FormatMaxPadding, bool TruncateZero) const {
3780 return append(Str, "-Inf");
3782 return append(Str, "+Inf");
3784 case fcNaN: return append(Str, "NaN");
3790 if (!FormatMaxPadding) {
3792 append(Str, "0.0E+0");
3795 if (FormatPrecision > 1)
3796 Str.append(FormatPrecision - 1, '0');
3797 append(Str, "e+00");
3810 // Decompose the number into an APInt and an exponent.
3811 int exp = exponent - ((int) semantics->precision - 1);
3812 APInt significand(semantics->precision,
3813 makeArrayRef(significandParts(),
3814 partCountForBits(semantics->precision)));
3816 // Set FormatPrecision if zero. We want to do this before we
3817 // truncate trailing zeros, as those are part of the precision.
3818 if (!FormatPrecision) {
3819 // We use enough digits so the number can be round-tripped back to an
3820 // APFloat. The formula comes from "How to Print Floating-Point Numbers
3821 // Accurately" by Steele and White.
3822 // FIXME: Using a formula based purely on the precision is conservative;
3823 // we can print fewer digits depending on the actual value being printed.
3825 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
3826 FormatPrecision = 2 + semantics->precision * 59 / 196;
3829 // Ignore trailing binary zeros.
3830 int trailingZeros = significand.countTrailingZeros();
3831 exp += trailingZeros;
3832 significand.lshrInPlace(trailingZeros);
3834 // Change the exponent from 2^e to 10^e.
3837 } else if (exp > 0) {
3839 significand = significand.zext(semantics->precision + exp);
3840 significand <<= exp;
3842 } else { /* exp < 0 */
3845 // We transform this using the identity:
3846 // (N)(2^-e) == (N)(5^e)(10^-e)
3847 // This means we have to multiply N (the significand) by 5^e.
3848 // To avoid overflow, we have to operate on numbers large
3849 // enough to store N * 5^e:
3850 // log2(N * 5^e) == log2(N) + e * log2(5)
3851 // <= semantics->precision + e * 137 / 59
3852 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3854 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3856 // Multiply significand by 5^e.
3857 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3858 significand = significand.zext(precision);
3859 APInt five_to_the_i(precision, 5);
3861 if (texp & 1) significand *= five_to_the_i;
3865 five_to_the_i *= five_to_the_i;
3869 AdjustToPrecision(significand, exp, FormatPrecision);
3871 SmallVector<char, 256> buffer;
3874 unsigned precision = significand.getBitWidth();
3875 APInt ten(precision, 10);
3876 APInt digit(precision, 0);
3878 bool inTrail = true;
3879 while (significand != 0) {
3880 // digit <- significand % 10
3881 // significand <- significand / 10
3882 APInt::udivrem(significand, ten, significand, digit);
3884 unsigned d = digit.getZExtValue();
3886 // Drop trailing zeros.
3887 if (inTrail && !d) exp++;
3889 buffer.push_back((char) ('0' + d));
3894 assert(!buffer.empty() && "no characters in buffer!");
3896 // Drop down to FormatPrecision.
3897 // TODO: don't do more precise calculations above than are required.
3898 AdjustToPrecision(buffer, exp, FormatPrecision);
3900 unsigned NDigits = buffer.size();
3902 // Check whether we should use scientific notation.
3903 bool FormatScientific;
3904 if (!FormatMaxPadding)
3905 FormatScientific = true;
3910 // But we shouldn't make the number look more precise than it is.
3911 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3912 NDigits + (unsigned) exp > FormatPrecision);
3914 // Power of the most significant digit.
3915 int MSD = exp + (int) (NDigits - 1);
3918 FormatScientific = false;
3920 // 765e-5 == 0.00765
3922 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3927 // Scientific formatting is pretty straightforward.
3928 if (FormatScientific) {
3929 exp += (NDigits - 1);
3931 Str.push_back(buffer[NDigits-1]);
3933 if (NDigits == 1 && TruncateZero)
3936 for (unsigned I = 1; I != NDigits; ++I)
3937 Str.push_back(buffer[NDigits-1-I]);
3938 // Fill with zeros up to FormatPrecision.
3939 if (!TruncateZero && FormatPrecision > NDigits - 1)
3940 Str.append(FormatPrecision - NDigits + 1, '0');
3941 // For !TruncateZero we use lower 'e'.
3942 Str.push_back(TruncateZero ? 'E' : 'e');
3944 Str.push_back(exp >= 0 ? '+' : '-');
3945 if (exp < 0) exp = -exp;
3946 SmallVector<char, 6> expbuf;
3948 expbuf.push_back((char) ('0' + (exp % 10)));
3951 // Exponent always at least two digits if we do not truncate zeros.
3952 if (!TruncateZero && expbuf.size() < 2)
3953 expbuf.push_back('0');
3954 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3955 Str.push_back(expbuf[E-1-I]);
3959 // Non-scientific, positive exponents.
3961 for (unsigned I = 0; I != NDigits; ++I)
3962 Str.push_back(buffer[NDigits-1-I]);
3963 for (unsigned I = 0; I != (unsigned) exp; ++I)
3968 // Non-scientific, negative exponents.
3970 // The number of digits to the left of the decimal point.
3971 int NWholeDigits = exp + (int) NDigits;
3974 if (NWholeDigits > 0) {
3975 for (; I != (unsigned) NWholeDigits; ++I)
3976 Str.push_back(buffer[NDigits-I-1]);
3979 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3983 for (unsigned Z = 1; Z != NZeros; ++Z)
3987 for (; I != NDigits; ++I)
3988 Str.push_back(buffer[NDigits-I-1]);
3991 bool IEEEFloat::getExactInverse(APFloat *inv) const {
3992 // Special floats and denormals have no exact inverse.
3993 if (!isFiniteNonZero())
3996 // Check that the number is a power of two by making sure that only the
3997 // integer bit is set in the significand.
3998 if (significandLSB() != semantics->precision - 1)
4002 IEEEFloat reciprocal(*semantics, 1ULL);
4003 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
4006 // Avoid multiplication with a denormal, it is not safe on all platforms and
4007 // may be slower than a normal division.
4008 if (reciprocal.isDenormal())
4011 assert(reciprocal.isFiniteNonZero() &&
4012 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
4015 *inv = APFloat(reciprocal, *semantics);
4020 bool IEEEFloat::isSignaling() const {
4024 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4025 // first bit of the trailing significand being 0.
4026 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4029 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4031 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4032 /// appropriate sign switching before/after the computation.
4033 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
4034 // If we are performing nextDown, swap sign so we have -x.
4038 // Compute nextUp(x)
4039 opStatus result = opOK;
4041 // Handle each float category separately.
4044 // nextUp(+inf) = +inf
4047 // nextUp(-inf) = -getLargest()
4051 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4052 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4053 // change the payload.
4054 if (isSignaling()) {
4055 result = opInvalidOp;
4056 // For consistency, propagate the sign of the sNaN to the qNaN.
4057 makeNaN(false, isNegative(), nullptr);
4061 // nextUp(pm 0) = +getSmallest()
4062 makeSmallest(false);
4065 // nextUp(-getSmallest()) = -0
4066 if (isSmallest() && isNegative()) {
4067 APInt::tcSet(significandParts(), 0, partCount());
4073 // nextUp(getLargest()) == INFINITY
4074 if (isLargest() && !isNegative()) {
4075 APInt::tcSet(significandParts(), 0, partCount());
4076 category = fcInfinity;
4077 exponent = semantics->maxExponent + 1;
4081 // nextUp(normal) == normal + inc.
4083 // If we are negative, we need to decrement the significand.
4085 // We only cross a binade boundary that requires adjusting the exponent
4087 // 1. exponent != semantics->minExponent. This implies we are not in the
4088 // smallest binade or are dealing with denormals.
4089 // 2. Our significand excluding the integral bit is all zeros.
4090 bool WillCrossBinadeBoundary =
4091 exponent != semantics->minExponent && isSignificandAllZeros();
4093 // Decrement the significand.
4095 // We always do this since:
4096 // 1. If we are dealing with a non-binade decrement, by definition we
4097 // just decrement the significand.
4098 // 2. If we are dealing with a normal -> normal binade decrement, since
4099 // we have an explicit integral bit the fact that all bits but the
4100 // integral bit are zero implies that subtracting one will yield a
4101 // significand with 0 integral bit and 1 in all other spots. Thus we
4102 // must just adjust the exponent and set the integral bit to 1.
4103 // 3. If we are dealing with a normal -> denormal binade decrement,
4104 // since we set the integral bit to 0 when we represent denormals, we
4105 // just decrement the significand.
4106 integerPart *Parts = significandParts();
4107 APInt::tcDecrement(Parts, partCount());
4109 if (WillCrossBinadeBoundary) {
4110 // Our result is a normal number. Do the following:
4111 // 1. Set the integral bit to 1.
4112 // 2. Decrement the exponent.
4113 APInt::tcSetBit(Parts, semantics->precision - 1);
4117 // If we are positive, we need to increment the significand.
4119 // We only cross a binade boundary that requires adjusting the exponent if
4120 // the input is not a denormal and all of said input's significand bits
4121 // are set. If all of said conditions are true: clear the significand, set
4122 // the integral bit to 1, and increment the exponent. If we have a
4123 // denormal always increment since moving denormals and the numbers in the
4124 // smallest normal binade have the same exponent in our representation.
4125 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
4127 if (WillCrossBinadeBoundary) {
4128 integerPart *Parts = significandParts();
4129 APInt::tcSet(Parts, 0, partCount());
4130 APInt::tcSetBit(Parts, semantics->precision - 1);
4131 assert(exponent != semantics->maxExponent &&
4132 "We can not increment an exponent beyond the maxExponent allowed"
4133 " by the given floating point semantics.");
4136 incrementSignificand();
4142 // If we are performing nextDown, swap sign so we have -nextUp(-x)
4149 void IEEEFloat::makeInf(bool Negative) {
4150 category = fcInfinity;
4152 exponent = semantics->maxExponent + 1;
4153 APInt::tcSet(significandParts(), 0, partCount());
4156 void IEEEFloat::makeZero(bool Negative) {
4159 exponent = semantics->minExponent-1;
4160 APInt::tcSet(significandParts(), 0, partCount());
4163 void IEEEFloat::makeQuiet() {
4165 APInt::tcSetBit(significandParts(), semantics->precision - 2);
4168 int ilogb(const IEEEFloat &Arg) {
4170 return IEEEFloat::IEK_NaN;
4172 return IEEEFloat::IEK_Zero;
4173 if (Arg.isInfinity())
4174 return IEEEFloat::IEK_Inf;
4175 if (!Arg.isDenormal())
4176 return Arg.exponent;
4178 IEEEFloat Normalized(Arg);
4179 int SignificandBits = Arg.getSemantics().precision - 1;
4181 Normalized.exponent += SignificandBits;
4182 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
4183 return Normalized.exponent - SignificandBits;
4186 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
4187 auto MaxExp = X.getSemantics().maxExponent;
4188 auto MinExp = X.getSemantics().minExponent;
4190 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4191 // overflow; clamp it to a safe range before adding, but ensure that the range
4192 // is large enough that the clamp does not change the result. The range we
4193 // need to support is the difference between the largest possible exponent and
4194 // the normalized exponent of half the smallest denormal.
4196 int SignificandBits = X.getSemantics().precision - 1;
4197 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4199 // Clamp to one past the range ends to let normalize handle overlflow.
4200 X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement);
4201 X.normalize(RoundingMode, lfExactlyZero);
4207 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
4210 // Quiet signalling nans.
4211 if (Exp == IEEEFloat::IEK_NaN) {
4212 IEEEFloat Quiet(Val);
4217 if (Exp == IEEEFloat::IEK_Inf)
4220 // 1 is added because frexp is defined to return a normalized fraction in
4221 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4222 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
4223 return scalbn(Val, -Exp, RM);
4226 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
4228 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
4229 assert(Semantics == &semPPCDoubleDouble);
4232 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
4234 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
4235 APFloat(semIEEEdouble, uninitialized)}) {
4236 assert(Semantics == &semPPCDoubleDouble);
4239 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
4240 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
4241 APFloat(semIEEEdouble)}) {
4242 assert(Semantics == &semPPCDoubleDouble);
4245 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
4247 Floats(new APFloat[2]{
4248 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
4249 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4250 assert(Semantics == &semPPCDoubleDouble);
4253 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
4256 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4257 assert(Semantics == &semPPCDoubleDouble);
4258 assert(&Floats[0].getSemantics() == &semIEEEdouble);
4259 assert(&Floats[1].getSemantics() == &semIEEEdouble);
4262 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
4263 : Semantics(RHS.Semantics),
4264 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4265 APFloat(RHS.Floats[1])}
4267 assert(Semantics == &semPPCDoubleDouble);
4270 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
4271 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
4272 RHS.Semantics = &semBogus;
4273 assert(Semantics == &semPPCDoubleDouble);
4276 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
4277 if (Semantics == RHS.Semantics && RHS.Floats) {
4278 Floats[0] = RHS.Floats[0];
4279 Floats[1] = RHS.Floats[1];
4280 } else if (this != &RHS) {
4281 this->~DoubleAPFloat();
4282 new (this) DoubleAPFloat(RHS);
4287 // Implement addition, subtraction, multiplication and division based on:
4288 // "Software for Doubled-Precision Floating-Point Computations",
4289 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4290 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4291 const APFloat &c, const APFloat &cc,
4295 Status |= z.add(c, RM);
4296 if (!z.isFinite()) {
4297 if (!z.isInfinity()) {
4298 Floats[0] = std::move(z);
4299 Floats[1].makeZero(/* Neg = */ false);
4300 return (opStatus)Status;
4303 auto AComparedToC = a.compareAbsoluteValue(c);
4305 Status |= z.add(aa, RM);
4306 if (AComparedToC == APFloat::cmpGreaterThan) {
4307 // z = cc + aa + c + a;
4308 Status |= z.add(c, RM);
4309 Status |= z.add(a, RM);
4311 // z = cc + aa + a + c;
4312 Status |= z.add(a, RM);
4313 Status |= z.add(c, RM);
4315 if (!z.isFinite()) {
4316 Floats[0] = std::move(z);
4317 Floats[1].makeZero(/* Neg = */ false);
4318 return (opStatus)Status;
4322 Status |= zz.add(cc, RM);
4323 if (AComparedToC == APFloat::cmpGreaterThan) {
4324 // Floats[1] = a - z + c + zz;
4326 Status |= Floats[1].subtract(z, RM);
4327 Status |= Floats[1].add(c, RM);
4328 Status |= Floats[1].add(zz, RM);
4330 // Floats[1] = c - z + a + zz;
4332 Status |= Floats[1].subtract(z, RM);
4333 Status |= Floats[1].add(a, RM);
4334 Status |= Floats[1].add(zz, RM);
4339 Status |= q.subtract(z, RM);
4341 // zz = q + c + (a - (q + z)) + aa + cc;
4342 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4344 Status |= zz.add(c, RM);
4345 Status |= q.add(z, RM);
4346 Status |= q.subtract(a, RM);
4348 Status |= zz.add(q, RM);
4349 Status |= zz.add(aa, RM);
4350 Status |= zz.add(cc, RM);
4351 if (zz.isZero() && !zz.isNegative()) {
4352 Floats[0] = std::move(z);
4353 Floats[1].makeZero(/* Neg = */ false);
4357 Status |= Floats[0].add(zz, RM);
4358 if (!Floats[0].isFinite()) {
4359 Floats[1].makeZero(/* Neg = */ false);
4360 return (opStatus)Status;
4362 Floats[1] = std::move(z);
4363 Status |= Floats[1].subtract(Floats[0], RM);
4364 Status |= Floats[1].add(zz, RM);
4366 return (opStatus)Status;
4369 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4370 const DoubleAPFloat &RHS,
4373 if (LHS.getCategory() == fcNaN) {
4377 if (RHS.getCategory() == fcNaN) {
4381 if (LHS.getCategory() == fcZero) {
4385 if (RHS.getCategory() == fcZero) {
4389 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4390 LHS.isNegative() != RHS.isNegative()) {
4391 Out.makeNaN(false, Out.isNegative(), nullptr);
4394 if (LHS.getCategory() == fcInfinity) {
4398 if (RHS.getCategory() == fcInfinity) {
4402 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4404 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4406 assert(&A.getSemantics() == &semIEEEdouble);
4407 assert(&AA.getSemantics() == &semIEEEdouble);
4408 assert(&C.getSemantics() == &semIEEEdouble);
4409 assert(&CC.getSemantics() == &semIEEEdouble);
4410 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4411 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4412 return Out.addImpl(A, AA, C, CC, RM);
4415 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4417 return addWithSpecial(*this, RHS, *this, RM);
4420 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4423 auto Ret = add(RHS, RM);
4428 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
4429 APFloat::roundingMode RM) {
4430 const auto &LHS = *this;
4432 /* Interesting observation: For special categories, finding the lowest
4433 common ancestor of the following layered graph gives the correct
4442 e.g. NaN * NaN = NaN
4444 Normal * Zero = Zero
4447 if (LHS.getCategory() == fcNaN) {
4451 if (RHS.getCategory() == fcNaN) {
4455 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4456 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4457 Out.makeNaN(false, false, nullptr);
4460 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4464 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4468 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4469 "Special cases not handled exhaustively");
4472 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4475 Status |= T.multiply(C, RM);
4476 if (!T.isFiniteNonZero()) {
4478 Floats[1].makeZero(/* Neg = */ false);
4479 return (opStatus)Status;
4482 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4485 Status |= Tau.fusedMultiplyAdd(C, T, RM);
4490 Status |= V.multiply(D, RM);
4493 Status |= W.multiply(C, RM);
4494 Status |= V.add(W, RM);
4496 Status |= Tau.add(V, RM);
4500 Status |= U.add(Tau, RM);
4503 if (!U.isFinite()) {
4504 Floats[1].makeZero(/* Neg = */ false);
4506 // Floats[1] = (t - u) + tau
4507 Status |= T.subtract(U, RM);
4508 Status |= T.add(Tau, RM);
4511 return (opStatus)Status;
4514 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
4515 APFloat::roundingMode RM) {
4516 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4517 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4519 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4520 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4524 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
4525 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4526 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4528 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4529 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4533 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
4534 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4535 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4536 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4537 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4542 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
4543 const DoubleAPFloat &Addend,
4544 APFloat::roundingMode RM) {
4545 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4546 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4547 auto Ret = Tmp.fusedMultiplyAdd(
4548 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
4549 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
4550 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4554 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
4555 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4556 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4557 auto Ret = Tmp.roundToIntegral(RM);
4558 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4562 void DoubleAPFloat::changeSign() {
4563 Floats[0].changeSign();
4564 Floats[1].changeSign();
4568 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
4569 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
4570 if (Result != cmpEqual)
4572 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
4573 if (Result == cmpLessThan || Result == cmpGreaterThan) {
4574 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
4575 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
4576 if (Against && !RHSAgainst)
4578 if (!Against && RHSAgainst)
4579 return cmpGreaterThan;
4580 if (!Against && !RHSAgainst)
4582 if (Against && RHSAgainst)
4583 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
4588 APFloat::fltCategory DoubleAPFloat::getCategory() const {
4589 return Floats[0].getCategory();
4592 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
4594 void DoubleAPFloat::makeInf(bool Neg) {
4595 Floats[0].makeInf(Neg);
4596 Floats[1].makeZero(/* Neg = */ false);
4599 void DoubleAPFloat::makeZero(bool Neg) {
4600 Floats[0].makeZero(Neg);
4601 Floats[1].makeZero(/* Neg = */ false);
4604 void DoubleAPFloat::makeLargest(bool Neg) {
4605 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4606 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
4607 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
4612 void DoubleAPFloat::makeSmallest(bool Neg) {
4613 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4614 Floats[0].makeSmallest(Neg);
4615 Floats[1].makeZero(/* Neg = */ false);
4618 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
4619 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4620 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
4622 Floats[0].changeSign();
4623 Floats[1].makeZero(/* Neg = */ false);
4626 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
4627 Floats[0].makeNaN(SNaN, Neg, fill);
4628 Floats[1].makeZero(/* Neg = */ false);
4631 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
4632 auto Result = Floats[0].compare(RHS.Floats[0]);
4633 // |Float[0]| > |Float[1]|
4634 if (Result == APFloat::cmpEqual)
4635 return Floats[1].compare(RHS.Floats[1]);
4639 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
4640 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
4641 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
4644 hash_code hash_value(const DoubleAPFloat &Arg) {
4646 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
4647 return hash_combine(Arg.Semantics);
4650 APInt DoubleAPFloat::bitcastToAPInt() const {
4651 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4653 Floats[0].bitcastToAPInt().getRawData()[0],
4654 Floats[1].bitcastToAPInt().getRawData()[0],
4656 return APInt(128, 2, Data);
4659 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
4661 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4662 APFloat Tmp(semPPCDoubleDoubleLegacy);
4663 auto Ret = Tmp.convertFromString(S, RM);
4664 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4668 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
4669 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4670 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4671 auto Ret = Tmp.next(nextDown);
4672 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4677 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
4678 unsigned int Width, bool IsSigned,
4679 roundingMode RM, bool *IsExact) const {
4680 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4681 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4682 .convertToInteger(Input, Width, IsSigned, RM, IsExact);
4685 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
4688 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4689 APFloat Tmp(semPPCDoubleDoubleLegacy);
4690 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
4691 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4696 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
4697 unsigned int InputSize,
4698 bool IsSigned, roundingMode RM) {
4699 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4700 APFloat Tmp(semPPCDoubleDoubleLegacy);
4701 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
4702 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4707 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
4708 unsigned int InputSize,
4709 bool IsSigned, roundingMode RM) {
4710 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4711 APFloat Tmp(semPPCDoubleDoubleLegacy);
4712 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
4713 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4717 unsigned int DoubleAPFloat::convertToHexString(char *DST,
4718 unsigned int HexDigits,
4720 roundingMode RM) const {
4721 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4722 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4723 .convertToHexString(DST, HexDigits, UpperCase, RM);
4726 bool DoubleAPFloat::isDenormal() const {
4727 return getCategory() == fcNormal &&
4728 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
4729 // (double)(Hi + Lo) == Hi defines a normal number.
4730 Floats[0] != Floats[0] + Floats[1]);
4733 bool DoubleAPFloat::isSmallest() const {
4734 if (getCategory() != fcNormal)
4736 DoubleAPFloat Tmp(*this);
4737 Tmp.makeSmallest(this->isNegative());
4738 return Tmp.compare(*this) == cmpEqual;
4741 bool DoubleAPFloat::isLargest() const {
4742 if (getCategory() != fcNormal)
4744 DoubleAPFloat Tmp(*this);
4745 Tmp.makeLargest(this->isNegative());
4746 return Tmp.compare(*this) == cmpEqual;
4749 bool DoubleAPFloat::isInteger() const {
4750 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4751 return Floats[0].isInteger() && Floats[1].isInteger();
4754 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
4755 unsigned FormatPrecision,
4756 unsigned FormatMaxPadding,
4757 bool TruncateZero) const {
4758 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4759 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4760 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
4763 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
4764 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4765 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4767 return Tmp.getExactInverse(nullptr);
4768 APFloat Inv(semPPCDoubleDoubleLegacy);
4769 auto Ret = Tmp.getExactInverse(&Inv);
4770 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
4774 DoubleAPFloat scalbn(DoubleAPFloat Arg, int Exp, APFloat::roundingMode RM) {
4775 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4776 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
4777 scalbn(Arg.Floats[1], Exp, RM));
4780 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
4781 APFloat::roundingMode RM) {
4782 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4783 APFloat First = frexp(Arg.Floats[0], Exp, RM);
4784 APFloat Second = Arg.Floats[1];
4785 if (Arg.getCategory() == APFloat::fcNormal)
4786 Second = scalbn(Second, -Exp, RM);
4787 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
4790 } // End detail namespace
4792 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
4793 if (usesLayout<IEEEFloat>(Semantics)) {
4794 new (&IEEE) IEEEFloat(std::move(F));
4797 if (usesLayout<DoubleAPFloat>(Semantics)) {
4798 const fltSemantics& S = F.getSemantics();
4800 DoubleAPFloat(Semantics, APFloat(std::move(F), S),
4801 APFloat(semIEEEdouble));
4804 llvm_unreachable("Unexpected semantics");
4807 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str,
4809 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
4812 hash_code hash_value(const APFloat &Arg) {
4813 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
4814 return hash_value(Arg.U.IEEE);
4815 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
4816 return hash_value(Arg.U.Double);
4817 llvm_unreachable("Unexpected semantics");
4820 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
4821 : APFloat(Semantics) {
4822 auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
4823 assert(StatusOrErr && "Invalid floating point representation");
4824 consumeError(StatusOrErr.takeError());
4827 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
4828 roundingMode RM, bool *losesInfo) {
4829 if (&getSemantics() == &ToSemantics) {
4833 if (usesLayout<IEEEFloat>(getSemantics()) &&
4834 usesLayout<IEEEFloat>(ToSemantics))
4835 return U.IEEE.convert(ToSemantics, RM, losesInfo);
4836 if (usesLayout<IEEEFloat>(getSemantics()) &&
4837 usesLayout<DoubleAPFloat>(ToSemantics)) {
4838 assert(&ToSemantics == &semPPCDoubleDouble);
4839 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
4840 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
4843 if (usesLayout<DoubleAPFloat>(getSemantics()) &&
4844 usesLayout<IEEEFloat>(ToSemantics)) {
4845 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
4846 *this = APFloat(std::move(getIEEE()), ToSemantics);
4849 llvm_unreachable("Unexpected semantics");
4852 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics,
4853 unsigned BitWidth) {
4854 return APFloat(Semantics, APInt::getAllOnesValue(BitWidth));
4857 void APFloat::print(raw_ostream &OS) const {
4858 SmallVector<char, 16> Buffer;
4860 OS << Buffer << "\n";
4863 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4864 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }
4867 void APFloat::Profile(FoldingSetNodeID &NID) const {
4868 NID.Add(bitcastToAPInt());
4871 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
4872 an APSInt, whose initial bit-width and signed-ness are used to determine the
4873 precision of the conversion.
4875 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
4876 roundingMode rounding_mode,
4877 bool *isExact) const {
4878 unsigned bitWidth = result.getBitWidth();
4879 SmallVector<uint64_t, 4> parts(result.getNumWords());
4880 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
4881 rounding_mode, isExact);
4882 // Keeps the original signed-ness.
4883 result = APInt(bitWidth, parts);
4887 } // End llvm namespace
4889 #undef APFLOAT_DISPATCH_ON_SEMANTICS