1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision integer
11 // constant values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/SmallString.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/ADT/bit.h"
23 #include "llvm/Config/llvm-config.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/ErrorHandling.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
34 #define DEBUG_TYPE "apint"
36 /// A utility function for allocating memory, checking for allocation failures,
37 /// and ensuring the contents are zeroed.
38 inline static uint64_t* getClearedMemory(unsigned numWords) {
39 uint64_t *result = new uint64_t[numWords];
40 memset(result, 0, numWords * sizeof(uint64_t));
44 /// A utility function for allocating memory and checking for allocation
45 /// failure. The content is not zeroed.
46 inline static uint64_t* getMemory(unsigned numWords) {
47 return new uint64_t[numWords];
50 /// A utility function that converts a character to a digit.
51 inline static unsigned getDigit(char cdigit, uint8_t radix) {
54 if (radix == 16 || radix == 36) {
78 void APInt::initSlowCase(uint64_t val, bool isSigned) {
79 U.pVal = getClearedMemory(getNumWords());
81 if (isSigned && int64_t(val) < 0)
82 for (unsigned i = 1; i < getNumWords(); ++i)
83 U.pVal[i] = WORDTYPE_MAX;
87 void APInt::initSlowCase(const APInt& that) {
88 U.pVal = getMemory(getNumWords());
89 memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
92 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
93 assert(BitWidth && "Bitwidth too small");
94 assert(bigVal.data() && "Null pointer detected!");
98 // Get memory, cleared to 0
99 U.pVal = getClearedMemory(getNumWords());
100 // Calculate the number of words to copy
101 unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
102 // Copy the words from bigVal to pVal
103 memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
105 // Make sure unused high bits are cleared
109 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
110 : BitWidth(numBits) {
111 initFromArray(bigVal);
114 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
115 : BitWidth(numBits) {
116 initFromArray(makeArrayRef(bigVal, numWords));
119 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
120 : BitWidth(numbits) {
121 assert(BitWidth && "Bitwidth too small");
122 fromString(numbits, Str, radix);
125 void APInt::reallocate(unsigned NewBitWidth) {
126 // If the number of words is the same we can just change the width and stop.
127 if (getNumWords() == getNumWords(NewBitWidth)) {
128 BitWidth = NewBitWidth;
132 // If we have an allocation, delete it.
137 BitWidth = NewBitWidth;
139 // If we are supposed to have an allocation, create it.
141 U.pVal = getMemory(getNumWords());
144 void APInt::AssignSlowCase(const APInt& RHS) {
145 // Don't do anything for X = X
149 // Adjust the bit width and handle allocations as necessary.
150 reallocate(RHS.getBitWidth());
156 memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
159 /// This method 'profiles' an APInt for use with FoldingSet.
160 void APInt::Profile(FoldingSetNodeID& ID) const {
161 ID.AddInteger(BitWidth);
163 if (isSingleWord()) {
164 ID.AddInteger(U.VAL);
168 unsigned NumWords = getNumWords();
169 for (unsigned i = 0; i < NumWords; ++i)
170 ID.AddInteger(U.pVal[i]);
173 /// Prefix increment operator. Increments the APInt by one.
174 APInt& APInt::operator++() {
178 tcIncrement(U.pVal, getNumWords());
179 return clearUnusedBits();
182 /// Prefix decrement operator. Decrements the APInt by one.
183 APInt& APInt::operator--() {
187 tcDecrement(U.pVal, getNumWords());
188 return clearUnusedBits();
191 /// Adds the RHS APint to this APInt.
192 /// @returns this, after addition of RHS.
193 /// Addition assignment operator.
194 APInt& APInt::operator+=(const APInt& RHS) {
195 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
199 tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
200 return clearUnusedBits();
203 APInt& APInt::operator+=(uint64_t RHS) {
207 tcAddPart(U.pVal, RHS, getNumWords());
208 return clearUnusedBits();
211 /// Subtracts the RHS APInt from this APInt
212 /// @returns this, after subtraction
213 /// Subtraction assignment operator.
214 APInt& APInt::operator-=(const APInt& RHS) {
215 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
219 tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
220 return clearUnusedBits();
223 APInt& APInt::operator-=(uint64_t RHS) {
227 tcSubtractPart(U.pVal, RHS, getNumWords());
228 return clearUnusedBits();
231 APInt APInt::operator*(const APInt& RHS) const {
232 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
234 return APInt(BitWidth, U.VAL * RHS.U.VAL);
236 APInt Result(getMemory(getNumWords()), getBitWidth());
238 tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
240 Result.clearUnusedBits();
244 void APInt::AndAssignSlowCase(const APInt& RHS) {
245 tcAnd(U.pVal, RHS.U.pVal, getNumWords());
248 void APInt::OrAssignSlowCase(const APInt& RHS) {
249 tcOr(U.pVal, RHS.U.pVal, getNumWords());
252 void APInt::XorAssignSlowCase(const APInt& RHS) {
253 tcXor(U.pVal, RHS.U.pVal, getNumWords());
256 APInt& APInt::operator*=(const APInt& RHS) {
257 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
262 APInt& APInt::operator*=(uint64_t RHS) {
263 if (isSingleWord()) {
266 unsigned NumWords = getNumWords();
267 tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
269 return clearUnusedBits();
272 bool APInt::EqualSlowCase(const APInt& RHS) const {
273 return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
276 int APInt::compare(const APInt& RHS) const {
277 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
279 return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
281 return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
284 int APInt::compareSigned(const APInt& RHS) const {
285 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
286 if (isSingleWord()) {
287 int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
288 int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
289 return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
292 bool lhsNeg = isNegative();
293 bool rhsNeg = RHS.isNegative();
295 // If the sign bits don't match, then (LHS < RHS) if LHS is negative
296 if (lhsNeg != rhsNeg)
297 return lhsNeg ? -1 : 1;
299 // Otherwise we can just use an unsigned comparison, because even negative
300 // numbers compare correctly this way if both have the same signed-ness.
301 return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
304 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
305 unsigned loWord = whichWord(loBit);
306 unsigned hiWord = whichWord(hiBit);
308 // Create an initial mask for the low word with zeros below loBit.
309 uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
311 // If hiBit is not aligned, we need a high mask.
312 unsigned hiShiftAmt = whichBit(hiBit);
313 if (hiShiftAmt != 0) {
314 // Create a high mask with zeros above hiBit.
315 uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
316 // If loWord and hiWord are equal, then we combine the masks. Otherwise,
317 // set the bits in hiWord.
318 if (hiWord == loWord)
321 U.pVal[hiWord] |= hiMask;
323 // Apply the mask to the low word.
324 U.pVal[loWord] |= loMask;
326 // Fill any words between loWord and hiWord with all ones.
327 for (unsigned word = loWord + 1; word < hiWord; ++word)
328 U.pVal[word] = WORDTYPE_MAX;
331 /// Toggle every bit to its opposite value.
332 void APInt::flipAllBitsSlowCase() {
333 tcComplement(U.pVal, getNumWords());
337 /// Toggle a given bit to its opposite value whose position is given
338 /// as "bitPosition".
339 /// Toggles a given bit to its opposite value.
340 void APInt::flipBit(unsigned bitPosition) {
341 assert(bitPosition < BitWidth && "Out of the bit-width range!");
342 if ((*this)[bitPosition]) clearBit(bitPosition);
343 else setBit(bitPosition);
346 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
347 unsigned subBitWidth = subBits.getBitWidth();
348 assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
349 "Illegal bit insertion");
351 // Insertion is a direct copy.
352 if (subBitWidth == BitWidth) {
357 // Single word result can be done as a direct bitmask.
358 if (isSingleWord()) {
359 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
360 U.VAL &= ~(mask << bitPosition);
361 U.VAL |= (subBits.U.VAL << bitPosition);
365 unsigned loBit = whichBit(bitPosition);
366 unsigned loWord = whichWord(bitPosition);
367 unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
369 // Insertion within a single word can be done as a direct bitmask.
370 if (loWord == hi1Word) {
371 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
372 U.pVal[loWord] &= ~(mask << loBit);
373 U.pVal[loWord] |= (subBits.U.VAL << loBit);
377 // Insert on word boundaries.
379 // Direct copy whole words.
380 unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
381 memcpy(U.pVal + loWord, subBits.getRawData(),
382 numWholeSubWords * APINT_WORD_SIZE);
384 // Mask+insert remaining bits.
385 unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
386 if (remainingBits != 0) {
387 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
388 U.pVal[hi1Word] &= ~mask;
389 U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
394 // General case - set/clear individual bits in dst based on src.
395 // TODO - there is scope for optimization here, but at the moment this code
396 // path is barely used so prefer readability over performance.
397 for (unsigned i = 0; i != subBitWidth; ++i) {
399 setBit(bitPosition + i);
401 clearBit(bitPosition + i);
405 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
406 assert(numBits > 0 && "Can't extract zero bits");
407 assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
408 "Illegal bit extraction");
411 return APInt(numBits, U.VAL >> bitPosition);
413 unsigned loBit = whichBit(bitPosition);
414 unsigned loWord = whichWord(bitPosition);
415 unsigned hiWord = whichWord(bitPosition + numBits - 1);
417 // Single word result extracting bits from a single word source.
418 if (loWord == hiWord)
419 return APInt(numBits, U.pVal[loWord] >> loBit);
421 // Extracting bits that start on a source word boundary can be done
422 // as a fast memory copy.
424 return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
426 // General case - shift + copy source words directly into place.
427 APInt Result(numBits, 0);
428 unsigned NumSrcWords = getNumWords();
429 unsigned NumDstWords = Result.getNumWords();
431 uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
432 for (unsigned word = 0; word < NumDstWords; ++word) {
433 uint64_t w0 = U.pVal[loWord + word];
435 (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
436 DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
439 return Result.clearUnusedBits();
442 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
443 assert(!str.empty() && "Invalid string length");
444 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
446 "Radix should be 2, 8, 10, 16, or 36!");
448 size_t slen = str.size();
450 // Each computation below needs to know if it's negative.
451 StringRef::iterator p = str.begin();
452 unsigned isNegative = *p == '-';
453 if (*p == '-' || *p == '+') {
456 assert(slen && "String is only a sign, needs a value.");
459 // For radixes of power-of-two values, the bits required is accurately and
462 return slen + isNegative;
464 return slen * 3 + isNegative;
466 return slen * 4 + isNegative;
470 // This is grossly inefficient but accurate. We could probably do something
471 // with a computation of roughly slen*64/20 and then adjust by the value of
472 // the first few digits. But, I'm not sure how accurate that could be.
474 // Compute a sufficient number of bits that is always large enough but might
475 // be too large. This avoids the assertion in the constructor. This
476 // calculation doesn't work appropriately for the numbers 0-9, so just use 4
477 // bits in that case.
479 = radix == 10? (slen == 1 ? 4 : slen * 64/18)
480 : (slen == 1 ? 7 : slen * 16/3);
482 // Convert to the actual binary value.
483 APInt tmp(sufficient, StringRef(p, slen), radix);
485 // Compute how many bits are required. If the log is infinite, assume we need
487 unsigned log = tmp.logBase2();
488 if (log == (unsigned)-1) {
489 return isNegative + 1;
491 return isNegative + log + 1;
495 hash_code llvm::hash_value(const APInt &Arg) {
496 if (Arg.isSingleWord())
497 return hash_combine(Arg.U.VAL);
499 return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords());
502 bool APInt::isSplat(unsigned SplatSizeInBits) const {
503 assert(getBitWidth() % SplatSizeInBits == 0 &&
504 "SplatSizeInBits must divide width!");
505 // We can check that all parts of an integer are equal by making use of a
506 // little trick: rotate and check if it's still the same value.
507 return *this == rotl(SplatSizeInBits);
510 /// This function returns the high "numBits" bits of this APInt.
511 APInt APInt::getHiBits(unsigned numBits) const {
512 return this->lshr(BitWidth - numBits);
515 /// This function returns the low "numBits" bits of this APInt.
516 APInt APInt::getLoBits(unsigned numBits) const {
517 APInt Result(getLowBitsSet(BitWidth, numBits));
522 /// Return a value containing V broadcasted over NewLen bits.
523 APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
524 assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
526 APInt Val = V.zextOrSelf(NewLen);
527 for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
533 unsigned APInt::countLeadingZerosSlowCase() const {
535 for (int i = getNumWords()-1; i >= 0; --i) {
536 uint64_t V = U.pVal[i];
538 Count += APINT_BITS_PER_WORD;
540 Count += llvm::countLeadingZeros(V);
544 // Adjust for unused bits in the most significant word (they are zero).
545 unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
546 Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
550 unsigned APInt::countLeadingOnesSlowCase() const {
551 unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
554 highWordBits = APINT_BITS_PER_WORD;
557 shift = APINT_BITS_PER_WORD - highWordBits;
559 int i = getNumWords() - 1;
560 unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
561 if (Count == highWordBits) {
562 for (i--; i >= 0; --i) {
563 if (U.pVal[i] == WORDTYPE_MAX)
564 Count += APINT_BITS_PER_WORD;
566 Count += llvm::countLeadingOnes(U.pVal[i]);
574 unsigned APInt::countTrailingZerosSlowCase() const {
577 for (; i < getNumWords() && U.pVal[i] == 0; ++i)
578 Count += APINT_BITS_PER_WORD;
579 if (i < getNumWords())
580 Count += llvm::countTrailingZeros(U.pVal[i]);
581 return std::min(Count, BitWidth);
584 unsigned APInt::countTrailingOnesSlowCase() const {
587 for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
588 Count += APINT_BITS_PER_WORD;
589 if (i < getNumWords())
590 Count += llvm::countTrailingOnes(U.pVal[i]);
591 assert(Count <= BitWidth);
595 unsigned APInt::countPopulationSlowCase() const {
597 for (unsigned i = 0; i < getNumWords(); ++i)
598 Count += llvm::countPopulation(U.pVal[i]);
602 bool APInt::intersectsSlowCase(const APInt &RHS) const {
603 for (unsigned i = 0, e = getNumWords(); i != e; ++i)
604 if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
610 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
611 for (unsigned i = 0, e = getNumWords(); i != e; ++i)
612 if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
618 APInt APInt::byteSwap() const {
619 assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
621 return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
623 return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
624 if (BitWidth == 48) {
625 unsigned Tmp1 = unsigned(U.VAL >> 16);
626 Tmp1 = ByteSwap_32(Tmp1);
627 uint16_t Tmp2 = uint16_t(U.VAL);
628 Tmp2 = ByteSwap_16(Tmp2);
629 return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
632 return APInt(BitWidth, ByteSwap_64(U.VAL));
634 APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
635 for (unsigned I = 0, N = getNumWords(); I != N; ++I)
636 Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
637 if (Result.BitWidth != BitWidth) {
638 Result.lshrInPlace(Result.BitWidth - BitWidth);
639 Result.BitWidth = BitWidth;
644 APInt APInt::reverseBits() const {
647 return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
649 return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
651 return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
653 return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
659 APInt Reversed(BitWidth, 0);
660 unsigned S = BitWidth;
662 for (; Val != 0; Val.lshrInPlace(1)) {
672 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
673 // Fast-path a common case.
674 if (A == B) return A;
676 // Corner cases: if either operand is zero, the other is the gcd.
680 // Count common powers of 2 and remove all other powers of 2.
683 unsigned Pow2_A = A.countTrailingZeros();
684 unsigned Pow2_B = B.countTrailingZeros();
685 if (Pow2_A > Pow2_B) {
686 A.lshrInPlace(Pow2_A - Pow2_B);
688 } else if (Pow2_B > Pow2_A) {
689 B.lshrInPlace(Pow2_B - Pow2_A);
696 // Both operands are odd multiples of 2^Pow_2:
698 // gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
700 // This is a modified version of Stein's algorithm, taking advantage of
701 // efficient countTrailingZeros().
705 A.lshrInPlace(A.countTrailingZeros() - Pow2);
708 B.lshrInPlace(B.countTrailingZeros() - Pow2);
715 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
716 uint64_t I = bit_cast<uint64_t>(Double);
718 // Get the sign bit from the highest order bit
719 bool isNeg = I >> 63;
721 // Get the 11-bit exponent and adjust for the 1023 bit bias
722 int64_t exp = ((I >> 52) & 0x7ff) - 1023;
724 // If the exponent is negative, the value is < 0 so just return 0.
726 return APInt(width, 0u);
728 // Extract the mantissa by clearing the top 12 bits (sign + exponent).
729 uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
731 // If the exponent doesn't shift all bits out of the mantissa
733 return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
734 APInt(width, mantissa >> (52 - exp));
736 // If the client didn't provide enough bits for us to shift the mantissa into
737 // then the result is undefined, just return 0
738 if (width <= exp - 52)
739 return APInt(width, 0);
741 // Otherwise, we have to shift the mantissa bits up to the right location
742 APInt Tmp(width, mantissa);
743 Tmp <<= (unsigned)exp - 52;
744 return isNeg ? -Tmp : Tmp;
747 /// This function converts this APInt to a double.
748 /// The layout for double is as following (IEEE Standard 754):
749 /// --------------------------------------
750 /// | Sign Exponent Fraction Bias |
751 /// |-------------------------------------- |
752 /// | 1[63] 11[62-52] 52[51-00] 1023 |
753 /// --------------------------------------
754 double APInt::roundToDouble(bool isSigned) const {
756 // Handle the simple case where the value is contained in one uint64_t.
757 // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
758 if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
760 int64_t sext = SignExtend64(getWord(0), BitWidth);
763 return double(getWord(0));
766 // Determine if the value is negative.
767 bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
769 // Construct the absolute value if we're negative.
770 APInt Tmp(isNeg ? -(*this) : (*this));
772 // Figure out how many bits we're using.
773 unsigned n = Tmp.getActiveBits();
775 // The exponent (without bias normalization) is just the number of bits
776 // we are using. Note that the sign bit is gone since we constructed the
780 // Return infinity for exponent overflow
782 if (!isSigned || !isNeg)
783 return std::numeric_limits<double>::infinity();
785 return -std::numeric_limits<double>::infinity();
787 exp += 1023; // Increment for 1023 bias
789 // Number of bits in mantissa is 52. To obtain the mantissa value, we must
790 // extract the high 52 bits from the correct words in pVal.
792 unsigned hiWord = whichWord(n-1);
794 mantissa = Tmp.U.pVal[0];
796 mantissa >>= n - 52; // shift down, we want the top 52 bits.
798 assert(hiWord > 0 && "huh?");
799 uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
800 uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
801 mantissa = hibits | lobits;
804 // The leading bit of mantissa is implicit, so get rid of it.
805 uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
806 uint64_t I = sign | (exp << 52) | mantissa;
807 return bit_cast<double>(I);
810 // Truncate to new width.
811 APInt APInt::trunc(unsigned width) const {
812 assert(width < BitWidth && "Invalid APInt Truncate request");
813 assert(width && "Can't truncate to 0 bits");
815 if (width <= APINT_BITS_PER_WORD)
816 return APInt(width, getRawData()[0]);
818 APInt Result(getMemory(getNumWords(width)), width);
822 for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
823 Result.U.pVal[i] = U.pVal[i];
825 // Truncate and copy any partial word.
826 unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
828 Result.U.pVal[i] = U.pVal[i] << bits >> bits;
833 // Sign extend to a new width.
834 APInt APInt::sext(unsigned Width) const {
835 assert(Width > BitWidth && "Invalid APInt SignExtend request");
837 if (Width <= APINT_BITS_PER_WORD)
838 return APInt(Width, SignExtend64(U.VAL, BitWidth));
840 APInt Result(getMemory(getNumWords(Width)), Width);
843 std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
845 // Sign extend the last word since there may be unused bits in the input.
846 Result.U.pVal[getNumWords() - 1] =
847 SignExtend64(Result.U.pVal[getNumWords() - 1],
848 ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
850 // Fill with sign bits.
851 std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
852 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
853 Result.clearUnusedBits();
857 // Zero extend to a new width.
858 APInt APInt::zext(unsigned width) const {
859 assert(width > BitWidth && "Invalid APInt ZeroExtend request");
861 if (width <= APINT_BITS_PER_WORD)
862 return APInt(width, U.VAL);
864 APInt Result(getMemory(getNumWords(width)), width);
867 std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
869 // Zero remaining words.
870 std::memset(Result.U.pVal + getNumWords(), 0,
871 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
876 APInt APInt::zextOrTrunc(unsigned width) const {
877 if (BitWidth < width)
879 if (BitWidth > width)
884 APInt APInt::sextOrTrunc(unsigned width) const {
885 if (BitWidth < width)
887 if (BitWidth > width)
892 APInt APInt::zextOrSelf(unsigned width) const {
893 if (BitWidth < width)
898 APInt APInt::sextOrSelf(unsigned width) const {
899 if (BitWidth < width)
904 /// Arithmetic right-shift this APInt by shiftAmt.
905 /// Arithmetic right-shift function.
906 void APInt::ashrInPlace(const APInt &shiftAmt) {
907 ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
910 /// Arithmetic right-shift this APInt by shiftAmt.
911 /// Arithmetic right-shift function.
912 void APInt::ashrSlowCase(unsigned ShiftAmt) {
913 // Don't bother performing a no-op shift.
917 // Save the original sign bit for later.
918 bool Negative = isNegative();
920 // WordShift is the inter-part shift; BitShift is intra-part shift.
921 unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
922 unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
924 unsigned WordsToMove = getNumWords() - WordShift;
925 if (WordsToMove != 0) {
926 // Sign extend the last word to fill in the unused bits.
927 U.pVal[getNumWords() - 1] = SignExtend64(
928 U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
930 // Fastpath for moving by whole words.
932 std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
934 // Move the words containing significant bits.
935 for (unsigned i = 0; i != WordsToMove - 1; ++i)
936 U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
937 (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
939 // Handle the last word which has no high bits to copy.
940 U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
941 // Sign extend one more time.
942 U.pVal[WordsToMove - 1] =
943 SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
947 // Fill in the remainder based on the original sign.
948 std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
949 WordShift * APINT_WORD_SIZE);
953 /// Logical right-shift this APInt by shiftAmt.
954 /// Logical right-shift function.
955 void APInt::lshrInPlace(const APInt &shiftAmt) {
956 lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
959 /// Logical right-shift this APInt by shiftAmt.
960 /// Logical right-shift function.
961 void APInt::lshrSlowCase(unsigned ShiftAmt) {
962 tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
965 /// Left-shift this APInt by shiftAmt.
966 /// Left-shift function.
967 APInt &APInt::operator<<=(const APInt &shiftAmt) {
968 // It's undefined behavior in C to shift by BitWidth or greater.
969 *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
973 void APInt::shlSlowCase(unsigned ShiftAmt) {
974 tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
978 // Calculate the rotate amount modulo the bit width.
979 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
980 unsigned rotBitWidth = rotateAmt.getBitWidth();
981 APInt rot = rotateAmt;
982 if (rotBitWidth < BitWidth) {
983 // Extend the rotate APInt, so that the urem doesn't divide by 0.
984 // e.g. APInt(1, 32) would give APInt(1, 0).
985 rot = rotateAmt.zext(BitWidth);
987 rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
988 return rot.getLimitedValue(BitWidth);
991 APInt APInt::rotl(const APInt &rotateAmt) const {
992 return rotl(rotateModulo(BitWidth, rotateAmt));
995 APInt APInt::rotl(unsigned rotateAmt) const {
996 rotateAmt %= BitWidth;
999 return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1002 APInt APInt::rotr(const APInt &rotateAmt) const {
1003 return rotr(rotateModulo(BitWidth, rotateAmt));
1006 APInt APInt::rotr(unsigned rotateAmt) const {
1007 rotateAmt %= BitWidth;
1010 return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1013 // Square Root - this method computes and returns the square root of "this".
1014 // Three mechanisms are used for computation. For small values (<= 5 bits),
1015 // a table lookup is done. This gets some performance for common cases. For
1016 // values using less than 52 bits, the value is converted to double and then
1017 // the libc sqrt function is called. The result is rounded and then converted
1018 // back to a uint64_t which is then used to construct the result. Finally,
1019 // the Babylonian method for computing square roots is used.
1020 APInt APInt::sqrt() const {
1022 // Determine the magnitude of the value.
1023 unsigned magnitude = getActiveBits();
1025 // Use a fast table for some small values. This also gets rid of some
1026 // rounding errors in libc sqrt for small values.
1027 if (magnitude <= 5) {
1028 static const uint8_t results[32] = {
1031 /* 3- 6 */ 2, 2, 2, 2,
1032 /* 7-12 */ 3, 3, 3, 3, 3, 3,
1033 /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1034 /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1037 return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1040 // If the magnitude of the value fits in less than 52 bits (the precision of
1041 // an IEEE double precision floating point value), then we can use the
1042 // libc sqrt function which will probably use a hardware sqrt computation.
1043 // This should be faster than the algorithm below.
1044 if (magnitude < 52) {
1045 return APInt(BitWidth,
1046 uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1050 // Okay, all the short cuts are exhausted. We must compute it. The following
1051 // is a classical Babylonian method for computing the square root. This code
1052 // was adapted to APInt from a wikipedia article on such computations.
1053 // See http://www.wikipedia.org/ and go to the page named
1054 // Calculate_an_integer_square_root.
1055 unsigned nbits = BitWidth, i = 4;
1056 APInt testy(BitWidth, 16);
1057 APInt x_old(BitWidth, 1);
1058 APInt x_new(BitWidth, 0);
1059 APInt two(BitWidth, 2);
1061 // Select a good starting value using binary logarithms.
1062 for (;; i += 2, testy = testy.shl(2))
1063 if (i >= nbits || this->ule(testy)) {
1064 x_old = x_old.shl(i / 2);
1068 // Use the Babylonian method to arrive at the integer square root:
1070 x_new = (this->udiv(x_old) + x_old).udiv(two);
1071 if (x_old.ule(x_new))
1076 // Make sure we return the closest approximation
1077 // NOTE: The rounding calculation below is correct. It will produce an
1078 // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1079 // determined to be a rounding issue with pari/gp as it begins to use a
1080 // floating point representation after 192 bits. There are no discrepancies
1081 // between this algorithm and pari/gp for bit widths < 192 bits.
1082 APInt square(x_old * x_old);
1083 APInt nextSquare((x_old + 1) * (x_old +1));
1084 if (this->ult(square))
1086 assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1087 APInt midpoint((nextSquare - square).udiv(two));
1088 APInt offset(*this - square);
1089 if (offset.ult(midpoint))
1094 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1095 /// iterative extended Euclidean algorithm is used to solve for this value,
1096 /// however we simplify it to speed up calculating only the inverse, and take
1097 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1098 /// (potentially large) APInts around.
1099 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1100 assert(ult(modulo) && "This APInt must be smaller than the modulo");
1102 // Using the properties listed at the following web page (accessed 06/21/08):
1103 // http://www.numbertheory.org/php/euclid.html
1104 // (especially the properties numbered 3, 4 and 9) it can be proved that
1105 // BitWidth bits suffice for all the computations in the algorithm implemented
1106 // below. More precisely, this number of bits suffice if the multiplicative
1107 // inverse exists, but may not suffice for the general extended Euclidean
1110 APInt r[2] = { modulo, *this };
1111 APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1112 APInt q(BitWidth, 0);
1115 for (i = 0; r[i^1] != 0; i ^= 1) {
1116 // An overview of the math without the confusing bit-flipping:
1117 // q = r[i-2] / r[i-1]
1118 // r[i] = r[i-2] % r[i-1]
1119 // t[i] = t[i-2] - t[i-1] * q
1120 udivrem(r[i], r[i^1], q, r[i]);
1124 // If this APInt and the modulo are not coprime, there is no multiplicative
1125 // inverse, so return 0. We check this by looking at the next-to-last
1126 // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1129 return APInt(BitWidth, 0);
1131 // The next-to-last t is the multiplicative inverse. However, we are
1132 // interested in a positive inverse. Calculate a positive one from a negative
1133 // one if necessary. A simple addition of the modulo suffices because
1134 // abs(t[i]) is known to be less than *this/2 (see the link above).
1135 if (t[i].isNegative())
1138 return std::move(t[i]);
1141 /// Calculate the magic numbers required to implement a signed integer division
1142 /// by a constant as a sequence of multiplies, adds and shifts. Requires that
1143 /// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S.
1144 /// Warren, Jr., chapter 10.
1145 APInt::ms APInt::magic() const {
1146 const APInt& d = *this;
1148 APInt ad, anc, delta, q1, r1, q2, r2, t;
1149 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1153 t = signedMin + (d.lshr(d.getBitWidth() - 1));
1154 anc = t - 1 - t.urem(ad); // absolute value of nc
1155 p = d.getBitWidth() - 1; // initialize p
1156 q1 = signedMin.udiv(anc); // initialize q1 = 2p/abs(nc)
1157 r1 = signedMin - q1*anc; // initialize r1 = rem(2p,abs(nc))
1158 q2 = signedMin.udiv(ad); // initialize q2 = 2p/abs(d)
1159 r2 = signedMin - q2*ad; // initialize r2 = rem(2p,abs(d))
1162 q1 = q1<<1; // update q1 = 2p/abs(nc)
1163 r1 = r1<<1; // update r1 = rem(2p/abs(nc))
1164 if (r1.uge(anc)) { // must be unsigned comparison
1168 q2 = q2<<1; // update q2 = 2p/abs(d)
1169 r2 = r2<<1; // update r2 = rem(2p/abs(d))
1170 if (r2.uge(ad)) { // must be unsigned comparison
1175 } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1178 if (d.isNegative()) mag.m = -mag.m; // resulting magic number
1179 mag.s = p - d.getBitWidth(); // resulting shift
1183 /// Calculate the magic numbers required to implement an unsigned integer
1184 /// division by a constant as a sequence of multiplies, adds and shifts.
1185 /// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry
1186 /// S. Warren, Jr., chapter 10.
1187 /// LeadingZeros can be used to simplify the calculation if the upper bits
1188 /// of the divided value are known zero.
1189 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1190 const APInt& d = *this;
1192 APInt nc, delta, q1, r1, q2, r2;
1194 magu.a = 0; // initialize "add" indicator
1195 APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1196 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1197 APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1199 nc = allOnes - (allOnes - d).urem(d);
1200 p = d.getBitWidth() - 1; // initialize p
1201 q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc
1202 r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc)
1203 q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d
1204 r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d)
1207 if (r1.uge(nc - r1)) {
1208 q1 = q1 + q1 + 1; // update q1
1209 r1 = r1 + r1 - nc; // update r1
1212 q1 = q1+q1; // update q1
1213 r1 = r1+r1; // update r1
1215 if ((r2 + 1).uge(d - r2)) {
1216 if (q2.uge(signedMax)) magu.a = 1;
1217 q2 = q2+q2 + 1; // update q2
1218 r2 = r2+r2 + 1 - d; // update r2
1221 if (q2.uge(signedMin)) magu.a = 1;
1222 q2 = q2+q2; // update q2
1223 r2 = r2+r2 + 1; // update r2
1226 } while (p < d.getBitWidth()*2 &&
1227 (q1.ult(delta) || (q1 == delta && r1 == 0)));
1228 magu.m = q2 + 1; // resulting magic number
1229 magu.s = p - d.getBitWidth(); // resulting shift
1233 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1234 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1235 /// variables here have the same names as in the algorithm. Comments explain
1236 /// the algorithm and any deviation from it.
1237 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1238 unsigned m, unsigned n) {
1239 assert(u && "Must provide dividend");
1240 assert(v && "Must provide divisor");
1241 assert(q && "Must provide quotient");
1242 assert(u != v && u != q && v != q && "Must use different memory");
1243 assert(n>1 && "n must be > 1");
1245 // b denotes the base of the number system. In our case b is 2^32.
1246 const uint64_t b = uint64_t(1) << 32;
1248 // The DEBUG macros here tend to be spam in the debug output if you're not
1249 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1251 #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1253 #define DEBUG_KNUTH(X) do {} while(false)
1256 DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1257 DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1258 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1259 DEBUG_KNUTH(dbgs() << " by");
1260 DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1261 DEBUG_KNUTH(dbgs() << '\n');
1262 // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1263 // u and v by d. Note that we have taken Knuth's advice here to use a power
1264 // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1265 // 2 allows us to shift instead of multiply and it is easy to determine the
1266 // shift amount from the leading zeros. We are basically normalizing the u
1267 // and v so that its high bits are shifted to the top of v's range without
1268 // overflow. Note that this can require an extra word in u so that u must
1269 // be of length m+n+1.
1270 unsigned shift = countLeadingZeros(v[n-1]);
1271 uint32_t v_carry = 0;
1272 uint32_t u_carry = 0;
1274 for (unsigned i = 0; i < m+n; ++i) {
1275 uint32_t u_tmp = u[i] >> (32 - shift);
1276 u[i] = (u[i] << shift) | u_carry;
1279 for (unsigned i = 0; i < n; ++i) {
1280 uint32_t v_tmp = v[i] >> (32 - shift);
1281 v[i] = (v[i] << shift) | v_carry;
1287 DEBUG_KNUTH(dbgs() << "KnuthDiv: normal:");
1288 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1289 DEBUG_KNUTH(dbgs() << " by");
1290 DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1291 DEBUG_KNUTH(dbgs() << '\n');
1293 // D2. [Initialize j.] Set j to m. This is the loop counter over the places.
1296 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1297 // D3. [Calculate q'.].
1298 // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1299 // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1300 // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1301 // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1302 // on v[n-2] determines at high speed most of the cases in which the trial
1303 // value qp is one too large, and it eliminates all cases where qp is two
1305 uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1306 DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1307 uint64_t qp = dividend / v[n-1];
1308 uint64_t rp = dividend % v[n-1];
1309 if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1312 if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1315 DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1317 // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1318 // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1319 // consists of a simple multiplication by a one-place number, combined with
1321 // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1322 // this step is actually negative, (u[j+n]...u[j]) should be left as the
1323 // true value plus b**(n+1), namely as the b's complement of
1324 // the true value, and a "borrow" to the left should be remembered.
1326 for (unsigned i = 0; i < n; ++i) {
1327 uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1328 int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1329 u[j+i] = Lo_32(subres);
1330 borrow = Hi_32(p) - Hi_32(subres);
1331 DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1332 << ", borrow = " << borrow << '\n');
1334 bool isNeg = u[j+n] < borrow;
1335 u[j+n] -= Lo_32(borrow);
1337 DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1338 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1339 DEBUG_KNUTH(dbgs() << '\n');
1341 // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1342 // negative, go to step D6; otherwise go on to step D7.
1345 // D6. [Add back]. The probability that this step is necessary is very
1346 // small, on the order of only 2/b. Make sure that test data accounts for
1347 // this possibility. Decrease q[j] by 1
1349 // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1350 // A carry will occur to the left of u[j+n], and it should be ignored
1351 // since it cancels with the borrow that occurred in D4.
1353 for (unsigned i = 0; i < n; i++) {
1354 uint32_t limit = std::min(u[j+i],v[i]);
1355 u[j+i] += v[i] + carry;
1356 carry = u[j+i] < limit || (carry && u[j+i] == limit);
1360 DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1361 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1362 DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1364 // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3.
1367 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1368 DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1369 DEBUG_KNUTH(dbgs() << '\n');
1371 // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1372 // remainder may be obtained by dividing u[...] by d. If r is non-null we
1373 // compute the remainder (urem uses this).
1375 // The value d is expressed by the "shift" value above since we avoided
1376 // multiplication by d by using a shift left. So, all we have to do is
1377 // shift right here.
1380 DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1381 for (int i = n-1; i >= 0; i--) {
1382 r[i] = (u[i] >> shift) | carry;
1383 carry = u[i] << (32 - shift);
1384 DEBUG_KNUTH(dbgs() << " " << r[i]);
1387 for (int i = n-1; i >= 0; i--) {
1389 DEBUG_KNUTH(dbgs() << " " << r[i]);
1392 DEBUG_KNUTH(dbgs() << '\n');
1394 DEBUG_KNUTH(dbgs() << '\n');
1397 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1398 unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1399 assert(lhsWords >= rhsWords && "Fractional result");
1401 // First, compose the values into an array of 32-bit words instead of
1402 // 64-bit words. This is a necessity of both the "short division" algorithm
1403 // and the Knuth "classical algorithm" which requires there to be native
1404 // operations for +, -, and * on an m bit value with an m*2 bit result. We
1405 // can't use 64-bit operands here because we don't have native results of
1406 // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1407 // work on large-endian machines.
1408 unsigned n = rhsWords * 2;
1409 unsigned m = (lhsWords * 2) - n;
1411 // Allocate space for the temporary values we need either on the stack, if
1412 // it will fit, or on the heap if it won't.
1413 uint32_t SPACE[128];
1414 uint32_t *U = nullptr;
1415 uint32_t *V = nullptr;
1416 uint32_t *Q = nullptr;
1417 uint32_t *R = nullptr;
1418 if ((Remainder?4:3)*n+2*m+1 <= 128) {
1421 Q = &SPACE[(m+n+1) + n];
1423 R = &SPACE[(m+n+1) + n + (m+n)];
1425 U = new uint32_t[m + n + 1];
1426 V = new uint32_t[n];
1427 Q = new uint32_t[m+n];
1429 R = new uint32_t[n];
1432 // Initialize the dividend
1433 memset(U, 0, (m+n+1)*sizeof(uint32_t));
1434 for (unsigned i = 0; i < lhsWords; ++i) {
1435 uint64_t tmp = LHS[i];
1436 U[i * 2] = Lo_32(tmp);
1437 U[i * 2 + 1] = Hi_32(tmp);
1439 U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1441 // Initialize the divisor
1442 memset(V, 0, (n)*sizeof(uint32_t));
1443 for (unsigned i = 0; i < rhsWords; ++i) {
1444 uint64_t tmp = RHS[i];
1445 V[i * 2] = Lo_32(tmp);
1446 V[i * 2 + 1] = Hi_32(tmp);
1449 // initialize the quotient and remainder
1450 memset(Q, 0, (m+n) * sizeof(uint32_t));
1452 memset(R, 0, n * sizeof(uint32_t));
1454 // Now, adjust m and n for the Knuth division. n is the number of words in
1455 // the divisor. m is the number of words by which the dividend exceeds the
1456 // divisor (i.e. m+n is the length of the dividend). These sizes must not
1457 // contain any zero words or the Knuth algorithm fails.
1458 for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1462 for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1465 // If we're left with only a single word for the divisor, Knuth doesn't work
1466 // so we implement the short division algorithm here. This is much simpler
1467 // and faster because we are certain that we can divide a 64-bit quantity
1468 // by a 32-bit quantity at hardware speed and short division is simply a
1469 // series of such operations. This is just like doing short division but we
1470 // are using base 2^32 instead of base 10.
1471 assert(n != 0 && "Divide by zero?");
1473 uint32_t divisor = V[0];
1474 uint32_t remainder = 0;
1475 for (int i = m; i >= 0; i--) {
1476 uint64_t partial_dividend = Make_64(remainder, U[i]);
1477 if (partial_dividend == 0) {
1480 } else if (partial_dividend < divisor) {
1482 remainder = Lo_32(partial_dividend);
1483 } else if (partial_dividend == divisor) {
1487 Q[i] = Lo_32(partial_dividend / divisor);
1488 remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1494 // Now we're ready to invoke the Knuth classical divide algorithm. In this
1496 KnuthDiv(U, V, Q, R, m, n);
1499 // If the caller wants the quotient
1501 for (unsigned i = 0; i < lhsWords; ++i)
1502 Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1505 // If the caller wants the remainder
1507 for (unsigned i = 0; i < rhsWords; ++i)
1508 Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1511 // Clean up the memory we allocated.
1512 if (U != &SPACE[0]) {
1520 APInt APInt::udiv(const APInt &RHS) const {
1521 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1523 // First, deal with the easy case
1524 if (isSingleWord()) {
1525 assert(RHS.U.VAL != 0 && "Divide by zero?");
1526 return APInt(BitWidth, U.VAL / RHS.U.VAL);
1529 // Get some facts about the LHS and RHS number of bits and words
1530 unsigned lhsWords = getNumWords(getActiveBits());
1531 unsigned rhsBits = RHS.getActiveBits();
1532 unsigned rhsWords = getNumWords(rhsBits);
1533 assert(rhsWords && "Divided by zero???");
1535 // Deal with some degenerate cases
1538 return APInt(BitWidth, 0);
1542 if (lhsWords < rhsWords || this->ult(RHS))
1543 // X / Y ===> 0, iff X < Y
1544 return APInt(BitWidth, 0);
1547 return APInt(BitWidth, 1);
1548 if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1549 // All high words are zero, just use native divide
1550 return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1552 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1553 APInt Quotient(BitWidth, 0); // to hold result.
1554 divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1558 APInt APInt::udiv(uint64_t RHS) const {
1559 assert(RHS != 0 && "Divide by zero?");
1561 // First, deal with the easy case
1563 return APInt(BitWidth, U.VAL / RHS);
1565 // Get some facts about the LHS words.
1566 unsigned lhsWords = getNumWords(getActiveBits());
1568 // Deal with some degenerate cases
1571 return APInt(BitWidth, 0);
1576 // X / Y ===> 0, iff X < Y
1577 return APInt(BitWidth, 0);
1580 return APInt(BitWidth, 1);
1581 if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1582 // All high words are zero, just use native divide
1583 return APInt(BitWidth, this->U.pVal[0] / RHS);
1585 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1586 APInt Quotient(BitWidth, 0); // to hold result.
1587 divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1591 APInt APInt::sdiv(const APInt &RHS) const {
1593 if (RHS.isNegative())
1594 return (-(*this)).udiv(-RHS);
1595 return -((-(*this)).udiv(RHS));
1597 if (RHS.isNegative())
1598 return -(this->udiv(-RHS));
1599 return this->udiv(RHS);
1602 APInt APInt::sdiv(int64_t RHS) const {
1605 return (-(*this)).udiv(-RHS);
1606 return -((-(*this)).udiv(RHS));
1609 return -(this->udiv(-RHS));
1610 return this->udiv(RHS);
1613 APInt APInt::urem(const APInt &RHS) const {
1614 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1615 if (isSingleWord()) {
1616 assert(RHS.U.VAL != 0 && "Remainder by zero?");
1617 return APInt(BitWidth, U.VAL % RHS.U.VAL);
1620 // Get some facts about the LHS
1621 unsigned lhsWords = getNumWords(getActiveBits());
1623 // Get some facts about the RHS
1624 unsigned rhsBits = RHS.getActiveBits();
1625 unsigned rhsWords = getNumWords(rhsBits);
1626 assert(rhsWords && "Performing remainder operation by zero ???");
1628 // Check the degenerate cases
1631 return APInt(BitWidth, 0);
1634 return APInt(BitWidth, 0);
1635 if (lhsWords < rhsWords || this->ult(RHS))
1636 // X % Y ===> X, iff X < Y
1640 return APInt(BitWidth, 0);
1642 // All high words are zero, just use native remainder
1643 return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1645 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1646 APInt Remainder(BitWidth, 0);
1647 divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1651 uint64_t APInt::urem(uint64_t RHS) const {
1652 assert(RHS != 0 && "Remainder by zero?");
1657 // Get some facts about the LHS
1658 unsigned lhsWords = getNumWords(getActiveBits());
1660 // Check the degenerate cases
1668 // X % Y ===> X, iff X < Y
1669 return getZExtValue();
1674 // All high words are zero, just use native remainder
1675 return U.pVal[0] % RHS;
1677 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1679 divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1683 APInt APInt::srem(const APInt &RHS) const {
1685 if (RHS.isNegative())
1686 return -((-(*this)).urem(-RHS));
1687 return -((-(*this)).urem(RHS));
1689 if (RHS.isNegative())
1690 return this->urem(-RHS);
1691 return this->urem(RHS);
1694 int64_t APInt::srem(int64_t RHS) const {
1697 return -((-(*this)).urem(-RHS));
1698 return -((-(*this)).urem(RHS));
1701 return this->urem(-RHS);
1702 return this->urem(RHS);
1705 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1706 APInt &Quotient, APInt &Remainder) {
1707 assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1708 unsigned BitWidth = LHS.BitWidth;
1710 // First, deal with the easy case
1711 if (LHS.isSingleWord()) {
1712 assert(RHS.U.VAL != 0 && "Divide by zero?");
1713 uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1714 uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1715 Quotient = APInt(BitWidth, QuotVal);
1716 Remainder = APInt(BitWidth, RemVal);
1720 // Get some size facts about the dividend and divisor
1721 unsigned lhsWords = getNumWords(LHS.getActiveBits());
1722 unsigned rhsBits = RHS.getActiveBits();
1723 unsigned rhsWords = getNumWords(rhsBits);
1724 assert(rhsWords && "Performing divrem operation by zero ???");
1726 // Check the degenerate cases
1727 if (lhsWords == 0) {
1728 Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0
1729 Remainder = APInt(BitWidth, 0); // 0 % Y ===> 0
1734 Quotient = LHS; // X / 1 ===> X
1735 Remainder = APInt(BitWidth, 0); // X % 1 ===> 0
1738 if (lhsWords < rhsWords || LHS.ult(RHS)) {
1739 Remainder = LHS; // X % Y ===> X, iff X < Y
1740 Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y
1745 Quotient = APInt(BitWidth, 1); // X / X ===> 1
1746 Remainder = APInt(BitWidth, 0); // X % X ===> 0;
1750 // Make sure there is enough space to hold the results.
1751 // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1752 // change the size. This is necessary if Quotient or Remainder is aliased
1754 Quotient.reallocate(BitWidth);
1755 Remainder.reallocate(BitWidth);
1757 if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1758 // There is only one word to consider so use the native versions.
1759 uint64_t lhsValue = LHS.U.pVal[0];
1760 uint64_t rhsValue = RHS.U.pVal[0];
1761 Quotient = lhsValue / rhsValue;
1762 Remainder = lhsValue % rhsValue;
1766 // Okay, lets do it the long way
1767 divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1769 // Clear the rest of the Quotient and Remainder.
1770 std::memset(Quotient.U.pVal + lhsWords, 0,
1771 (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1772 std::memset(Remainder.U.pVal + rhsWords, 0,
1773 (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1776 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1777 uint64_t &Remainder) {
1778 assert(RHS != 0 && "Divide by zero?");
1779 unsigned BitWidth = LHS.BitWidth;
1781 // First, deal with the easy case
1782 if (LHS.isSingleWord()) {
1783 uint64_t QuotVal = LHS.U.VAL / RHS;
1784 Remainder = LHS.U.VAL % RHS;
1785 Quotient = APInt(BitWidth, QuotVal);
1789 // Get some size facts about the dividend and divisor
1790 unsigned lhsWords = getNumWords(LHS.getActiveBits());
1792 // Check the degenerate cases
1793 if (lhsWords == 0) {
1794 Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0
1795 Remainder = 0; // 0 % Y ===> 0
1800 Quotient = LHS; // X / 1 ===> X
1801 Remainder = 0; // X % 1 ===> 0
1806 Remainder = LHS.getZExtValue(); // X % Y ===> X, iff X < Y
1807 Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y
1812 Quotient = APInt(BitWidth, 1); // X / X ===> 1
1813 Remainder = 0; // X % X ===> 0;
1817 // Make sure there is enough space to hold the results.
1818 // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1819 // change the size. This is necessary if Quotient is aliased with LHS.
1820 Quotient.reallocate(BitWidth);
1822 if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1823 // There is only one word to consider so use the native versions.
1824 uint64_t lhsValue = LHS.U.pVal[0];
1825 Quotient = lhsValue / RHS;
1826 Remainder = lhsValue % RHS;
1830 // Okay, lets do it the long way
1831 divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1832 // Clear the rest of the Quotient.
1833 std::memset(Quotient.U.pVal + lhsWords, 0,
1834 (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1837 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1838 APInt &Quotient, APInt &Remainder) {
1839 if (LHS.isNegative()) {
1840 if (RHS.isNegative())
1841 APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1843 APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1847 } else if (RHS.isNegative()) {
1848 APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1851 APInt::udivrem(LHS, RHS, Quotient, Remainder);
1855 void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1856 APInt &Quotient, int64_t &Remainder) {
1857 uint64_t R = Remainder;
1858 if (LHS.isNegative()) {
1860 APInt::udivrem(-LHS, -RHS, Quotient, R);
1862 APInt::udivrem(-LHS, RHS, Quotient, R);
1866 } else if (RHS < 0) {
1867 APInt::udivrem(LHS, -RHS, Quotient, R);
1870 APInt::udivrem(LHS, RHS, Quotient, R);
1875 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1876 APInt Res = *this+RHS;
1877 Overflow = isNonNegative() == RHS.isNonNegative() &&
1878 Res.isNonNegative() != isNonNegative();
1882 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1883 APInt Res = *this+RHS;
1884 Overflow = Res.ult(RHS);
1888 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1889 APInt Res = *this - RHS;
1890 Overflow = isNonNegative() != RHS.isNonNegative() &&
1891 Res.isNonNegative() != isNonNegative();
1895 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1896 APInt Res = *this-RHS;
1897 Overflow = Res.ugt(*this);
1901 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1902 // MININT/-1 --> overflow.
1903 Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1907 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1908 APInt Res = *this * RHS;
1910 if (*this != 0 && RHS != 0)
1911 Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1917 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1918 APInt Res = *this * RHS;
1920 if (*this != 0 && RHS != 0)
1921 Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS;
1927 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1928 Overflow = ShAmt.uge(getBitWidth());
1930 return APInt(BitWidth, 0);
1932 if (isNonNegative()) // Don't allow sign change.
1933 Overflow = ShAmt.uge(countLeadingZeros());
1935 Overflow = ShAmt.uge(countLeadingOnes());
1937 return *this << ShAmt;
1940 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
1941 Overflow = ShAmt.uge(getBitWidth());
1943 return APInt(BitWidth, 0);
1945 Overflow = ShAmt.ugt(countLeadingZeros());
1947 return *this << ShAmt;
1950 APInt APInt::sadd_sat(const APInt &RHS) const {
1952 APInt Res = sadd_ov(RHS, Overflow);
1956 return isNegative() ? APInt::getSignedMinValue(BitWidth)
1957 : APInt::getSignedMaxValue(BitWidth);
1960 APInt APInt::uadd_sat(const APInt &RHS) const {
1962 APInt Res = uadd_ov(RHS, Overflow);
1966 return APInt::getMaxValue(BitWidth);
1969 APInt APInt::ssub_sat(const APInt &RHS) const {
1971 APInt Res = ssub_ov(RHS, Overflow);
1975 return isNegative() ? APInt::getSignedMinValue(BitWidth)
1976 : APInt::getSignedMaxValue(BitWidth);
1979 APInt APInt::usub_sat(const APInt &RHS) const {
1981 APInt Res = usub_ov(RHS, Overflow);
1985 return APInt(BitWidth, 0);
1989 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
1990 // Check our assumptions here
1991 assert(!str.empty() && "Invalid string length");
1992 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
1994 "Radix should be 2, 8, 10, 16, or 36!");
1996 StringRef::iterator p = str.begin();
1997 size_t slen = str.size();
1998 bool isNeg = *p == '-';
1999 if (*p == '-' || *p == '+') {
2002 assert(slen && "String is only a sign, needs a value.");
2004 assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2005 assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2006 assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2007 assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2008 "Insufficient bit width");
2010 // Allocate memory if needed
2014 U.pVal = getClearedMemory(getNumWords());
2016 // Figure out if we can shift instead of multiply
2017 unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2019 // Enter digit traversal loop
2020 for (StringRef::iterator e = str.end(); p != e; ++p) {
2021 unsigned digit = getDigit(*p, radix);
2022 assert(digit < radix && "Invalid character in digit string");
2024 // Shift or multiply the value by the radix
2032 // Add in the digit we just interpreted
2035 // If its negative, put it in two's complement form
2040 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2041 bool Signed, bool formatAsCLiteral) const {
2042 assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2044 "Radix should be 2, 8, 10, 16, or 36!");
2046 const char *Prefix = "";
2047 if (formatAsCLiteral) {
2050 // Binary literals are a non-standard extension added in gcc 4.3:
2051 // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2063 llvm_unreachable("Invalid radix!");
2067 // First, check for a zero value and just short circuit the logic below.
2070 Str.push_back(*Prefix);
2077 static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2079 if (isSingleWord()) {
2081 char *BufPtr = std::end(Buffer);
2087 int64_t I = getSExtValue();
2097 Str.push_back(*Prefix);
2102 *--BufPtr = Digits[N % Radix];
2105 Str.append(BufPtr, std::end(Buffer));
2111 if (Signed && isNegative()) {
2112 // They want to print the signed version and it is a negative value
2113 // Flip the bits and add one to turn it into the equivalent positive
2114 // value and put a '-' in the result.
2120 Str.push_back(*Prefix);
2124 // We insert the digits backward, then reverse them to get the right order.
2125 unsigned StartDig = Str.size();
2127 // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2128 // because the number of bits per digit (1, 3 and 4 respectively) divides
2129 // equally. We just shift until the value is zero.
2130 if (Radix == 2 || Radix == 8 || Radix == 16) {
2131 // Just shift tmp right for each digit width until it becomes zero
2132 unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2133 unsigned MaskAmt = Radix - 1;
2135 while (Tmp.getBoolValue()) {
2136 unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2137 Str.push_back(Digits[Digit]);
2138 Tmp.lshrInPlace(ShiftAmt);
2141 while (Tmp.getBoolValue()) {
2143 udivrem(Tmp, Radix, Tmp, Digit);
2144 assert(Digit < Radix && "divide failed");
2145 Str.push_back(Digits[Digit]);
2149 // Reverse the digits before returning.
2150 std::reverse(Str.begin()+StartDig, Str.end());
2153 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2154 /// It is better to pass in a SmallVector/SmallString to the methods above.
2155 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2157 toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2161 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2162 LLVM_DUMP_METHOD void APInt::dump() const {
2163 SmallString<40> S, U;
2164 this->toStringUnsigned(U);
2165 this->toStringSigned(S);
2166 dbgs() << "APInt(" << BitWidth << "b, "
2167 << U << "u " << S << "s)\n";
2171 void APInt::print(raw_ostream &OS, bool isSigned) const {
2173 this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2177 // This implements a variety of operations on a representation of
2178 // arbitrary precision, two's-complement, bignum integer values.
2180 // Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe
2181 // and unrestricting assumption.
2182 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2183 "Part width must be divisible by 2!");
2185 /* Some handy functions local to this file. */
2187 /* Returns the integer part with the least significant BITS set.
2188 BITS cannot be zero. */
2189 static inline APInt::WordType lowBitMask(unsigned bits) {
2190 assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2192 return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2195 /* Returns the value of the lower half of PART. */
2196 static inline APInt::WordType lowHalf(APInt::WordType part) {
2197 return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2200 /* Returns the value of the upper half of PART. */
2201 static inline APInt::WordType highHalf(APInt::WordType part) {
2202 return part >> (APInt::APINT_BITS_PER_WORD / 2);
2205 /* Returns the bit number of the most significant set bit of a part.
2206 If the input number has no bits set -1U is returned. */
2207 static unsigned partMSB(APInt::WordType value) {
2208 return findLastSet(value, ZB_Max);
2211 /* Returns the bit number of the least significant set bit of a
2212 part. If the input number has no bits set -1U is returned. */
2213 static unsigned partLSB(APInt::WordType value) {
2214 return findFirstSet(value, ZB_Max);
2217 /* Sets the least significant part of a bignum to the input value, and
2218 zeroes out higher parts. */
2219 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2223 for (unsigned i = 1; i < parts; i++)
2227 /* Assign one bignum to another. */
2228 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2229 for (unsigned i = 0; i < parts; i++)
2233 /* Returns true if a bignum is zero, false otherwise. */
2234 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2235 for (unsigned i = 0; i < parts; i++)
2242 /* Extract the given bit of a bignum; returns 0 or 1. */
2243 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2244 return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2247 /* Set the given bit of a bignum. */
2248 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2249 parts[whichWord(bit)] |= maskBit(bit);
2252 /* Clears the given bit of a bignum. */
2253 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2254 parts[whichWord(bit)] &= ~maskBit(bit);
2257 /* Returns the bit number of the least significant set bit of a
2258 number. If the input number has no bits set -1U is returned. */
2259 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2260 for (unsigned i = 0; i < n; i++) {
2261 if (parts[i] != 0) {
2262 unsigned lsb = partLSB(parts[i]);
2264 return lsb + i * APINT_BITS_PER_WORD;
2271 /* Returns the bit number of the most significant set bit of a number.
2272 If the input number has no bits set -1U is returned. */
2273 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2277 if (parts[n] != 0) {
2278 unsigned msb = partMSB(parts[n]);
2280 return msb + n * APINT_BITS_PER_WORD;
2287 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2288 srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2289 the least significant bit of DST. All high bits above srcBITS in
2290 DST are zero-filled. */
2292 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2293 unsigned srcBits, unsigned srcLSB) {
2294 unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2295 assert(dstParts <= dstCount);
2297 unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2298 tcAssign (dst, src + firstSrcPart, dstParts);
2300 unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2301 tcShiftRight (dst, dstParts, shift);
2303 /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2304 in DST. If this is less that srcBits, append the rest, else
2305 clear the high bits. */
2306 unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2308 WordType mask = lowBitMask (srcBits - n);
2309 dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2310 << n % APINT_BITS_PER_WORD);
2311 } else if (n > srcBits) {
2312 if (srcBits % APINT_BITS_PER_WORD)
2313 dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2316 /* Clear high parts. */
2317 while (dstParts < dstCount)
2318 dst[dstParts++] = 0;
2321 /* DST += RHS + C where C is zero or one. Returns the carry flag. */
2322 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2323 WordType c, unsigned parts) {
2326 for (unsigned i = 0; i < parts; i++) {
2327 WordType l = dst[i];
2329 dst[i] += rhs[i] + 1;
2340 /// This function adds a single "word" integer, src, to the multiple
2341 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2342 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2343 /// @returns the carry of the addition.
2344 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2346 for (unsigned i = 0; i < parts; ++i) {
2349 return 0; // No need to carry so exit early.
2350 src = 1; // Carry one to next digit.
2356 /* DST -= RHS + C where C is zero or one. Returns the carry flag. */
2357 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2358 WordType c, unsigned parts) {
2361 for (unsigned i = 0; i < parts; i++) {
2362 WordType l = dst[i];
2364 dst[i] -= rhs[i] + 1;
2375 /// This function subtracts a single "word" (64-bit word), src, from
2376 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2377 /// no further borrowing is needed or it runs out of "words" in dst. The result
2378 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2379 /// exhausted. In other words, if src > dst then this function returns 1,
2381 /// @returns the borrow out of the subtraction
2382 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2384 for (unsigned i = 0; i < parts; ++i) {
2385 WordType Dst = dst[i];
2388 return 0; // No need to borrow so exit early.
2389 src = 1; // We have to "borrow 1" from next "word"
2395 /* Negate a bignum in-place. */
2396 void APInt::tcNegate(WordType *dst, unsigned parts) {
2397 tcComplement(dst, parts);
2398 tcIncrement(dst, parts);
2401 /* DST += SRC * MULTIPLIER + CARRY if add is true
2402 DST = SRC * MULTIPLIER + CARRY if add is false
2404 Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC
2405 they must start at the same point, i.e. DST == SRC.
2407 If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2408 returned. Otherwise DST is filled with the least significant
2409 DSTPARTS parts of the result, and if all of the omitted higher
2410 parts were zero return zero, otherwise overflow occurred and
2412 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2413 WordType multiplier, WordType carry,
2414 unsigned srcParts, unsigned dstParts,
2416 /* Otherwise our writes of DST kill our later reads of SRC. */
2417 assert(dst <= src || dst >= src + srcParts);
2418 assert(dstParts <= srcParts + 1);
2420 /* N loops; minimum of dstParts and srcParts. */
2421 unsigned n = std::min(dstParts, srcParts);
2423 for (unsigned i = 0; i < n; i++) {
2424 WordType low, mid, high, srcPart;
2426 /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2428 This cannot overflow, because
2430 (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2432 which is less than n^2. */
2436 if (multiplier == 0 || srcPart == 0) {
2440 low = lowHalf(srcPart) * lowHalf(multiplier);
2441 high = highHalf(srcPart) * highHalf(multiplier);
2443 mid = lowHalf(srcPart) * highHalf(multiplier);
2444 high += highHalf(mid);
2445 mid <<= APINT_BITS_PER_WORD / 2;
2446 if (low + mid < low)
2450 mid = highHalf(srcPart) * lowHalf(multiplier);
2451 high += highHalf(mid);
2452 mid <<= APINT_BITS_PER_WORD / 2;
2453 if (low + mid < low)
2457 /* Now add carry. */
2458 if (low + carry < low)
2464 /* And now DST[i], and store the new low part there. */
2465 if (low + dst[i] < low)
2474 if (srcParts < dstParts) {
2475 /* Full multiplication, there is no overflow. */
2476 assert(srcParts + 1 == dstParts);
2477 dst[srcParts] = carry;
2481 /* We overflowed if there is carry. */
2485 /* We would overflow if any significant unwritten parts would be
2486 non-zero. This is true if any remaining src parts are non-zero
2487 and the multiplier is non-zero. */
2489 for (unsigned i = dstParts; i < srcParts; i++)
2493 /* We fitted in the narrow destination. */
2497 /* DST = LHS * RHS, where DST has the same width as the operands and
2498 is filled with the least significant parts of the result. Returns
2499 one if overflow occurred, otherwise zero. DST must be disjoint
2500 from both operands. */
2501 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2502 const WordType *rhs, unsigned parts) {
2503 assert(dst != lhs && dst != rhs);
2506 tcSet(dst, 0, parts);
2508 for (unsigned i = 0; i < parts; i++)
2509 overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2515 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2516 /// operands. No overflow occurs. DST must be disjoint from both operands.
2517 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2518 const WordType *rhs, unsigned lhsParts,
2519 unsigned rhsParts) {
2520 /* Put the narrower number on the LHS for less loops below. */
2521 if (lhsParts > rhsParts)
2522 return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2524 assert(dst != lhs && dst != rhs);
2526 tcSet(dst, 0, rhsParts);
2528 for (unsigned i = 0; i < lhsParts; i++)
2529 tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2532 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2533 Otherwise set LHS to LHS / RHS with the fractional part discarded,
2534 set REMAINDER to the remainder, return zero. i.e.
2536 OLD_LHS = RHS * LHS + REMAINDER
2538 SCRATCH is a bignum of the same size as the operands and result for
2539 use by the routine; its contents need not be initialized and are
2540 destroyed. LHS, REMAINDER and SCRATCH must be distinct.
2542 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2543 WordType *remainder, WordType *srhs,
2545 assert(lhs != remainder && lhs != srhs && remainder != srhs);
2547 unsigned shiftCount = tcMSB(rhs, parts) + 1;
2548 if (shiftCount == 0)
2551 shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2552 unsigned n = shiftCount / APINT_BITS_PER_WORD;
2553 WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2555 tcAssign(srhs, rhs, parts);
2556 tcShiftLeft(srhs, parts, shiftCount);
2557 tcAssign(remainder, lhs, parts);
2558 tcSet(lhs, 0, parts);
2560 /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2563 int compare = tcCompare(remainder, srhs, parts);
2565 tcSubtract(remainder, srhs, 0, parts);
2569 if (shiftCount == 0)
2572 tcShiftRight(srhs, parts, 1);
2573 if ((mask >>= 1) == 0) {
2574 mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2582 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2583 /// no restrictions on Count.
2584 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2585 // Don't bother performing a no-op shift.
2589 // WordShift is the inter-part shift; BitShift is the intra-part shift.
2590 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2591 unsigned BitShift = Count % APINT_BITS_PER_WORD;
2593 // Fastpath for moving by whole words.
2594 if (BitShift == 0) {
2595 std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2597 while (Words-- > WordShift) {
2598 Dst[Words] = Dst[Words - WordShift] << BitShift;
2599 if (Words > WordShift)
2601 Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2605 // Fill in the remainder with 0s.
2606 std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2609 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2610 /// are no restrictions on Count.
2611 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2612 // Don't bother performing a no-op shift.
2616 // WordShift is the inter-part shift; BitShift is the intra-part shift.
2617 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2618 unsigned BitShift = Count % APINT_BITS_PER_WORD;
2620 unsigned WordsToMove = Words - WordShift;
2621 // Fastpath for moving by whole words.
2622 if (BitShift == 0) {
2623 std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2625 for (unsigned i = 0; i != WordsToMove; ++i) {
2626 Dst[i] = Dst[i + WordShift] >> BitShift;
2627 if (i + 1 != WordsToMove)
2628 Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2632 // Fill in the remainder with 0s.
2633 std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2636 /* Bitwise and of two bignums. */
2637 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2638 for (unsigned i = 0; i < parts; i++)
2642 /* Bitwise inclusive or of two bignums. */
2643 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2644 for (unsigned i = 0; i < parts; i++)
2648 /* Bitwise exclusive or of two bignums. */
2649 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2650 for (unsigned i = 0; i < parts; i++)
2654 /* Complement a bignum in-place. */
2655 void APInt::tcComplement(WordType *dst, unsigned parts) {
2656 for (unsigned i = 0; i < parts; i++)
2660 /* Comparison (unsigned) of two bignums. */
2661 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2665 if (lhs[parts] != rhs[parts])
2666 return (lhs[parts] > rhs[parts]) ? 1 : -1;
2672 /* Set the least significant BITS bits of a bignum, clear the
2674 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2677 while (bits > APINT_BITS_PER_WORD) {
2678 dst[i++] = ~(WordType) 0;
2679 bits -= APINT_BITS_PER_WORD;
2683 dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2689 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2690 APInt::Rounding RM) {
2691 // Currently udivrem always rounds down.
2693 case APInt::Rounding::DOWN:
2694 case APInt::Rounding::TOWARD_ZERO:
2696 case APInt::Rounding::UP: {
2698 APInt::udivrem(A, B, Quo, Rem);
2704 llvm_unreachable("Unknown APInt::Rounding enum");
2707 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2708 APInt::Rounding RM) {
2710 case APInt::Rounding::DOWN:
2711 case APInt::Rounding::UP: {
2713 APInt::sdivrem(A, B, Quo, Rem);
2716 // This algorithm deals with arbitrary rounding mode used by sdivrem.
2717 // We want to check whether the non-integer part of the mathematical value
2718 // is negative or not. If the non-integer part is negative, we need to round
2719 // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2720 // already rounded down.
2721 if (RM == APInt::Rounding::DOWN) {
2722 if (Rem.isNegative() != B.isNegative())
2726 if (Rem.isNegative() != B.isNegative())
2730 // Currently sdiv rounds twards zero.
2731 case APInt::Rounding::TOWARD_ZERO:
2734 llvm_unreachable("Unknown APInt::Rounding enum");
2738 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2739 unsigned RangeWidth) {
2740 unsigned CoeffWidth = A.getBitWidth();
2741 assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2742 assert(RangeWidth <= CoeffWidth &&
2743 "Value range width should be less than coefficient width");
2744 assert(RangeWidth > 1 && "Value range bit width should be > 1");
2746 LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2747 << "x + " << C << ", rw:" << RangeWidth << '\n');
2749 // Identify 0 as a (non)solution immediately.
2750 if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2751 LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2752 return APInt(CoeffWidth, 0);
2755 // The result of APInt arithmetic has the same bit width as the operands,
2756 // so it can actually lose high bits. A product of two n-bit integers needs
2757 // 2n-1 bits to represent the full value.
2758 // The operation done below (on quadratic coefficients) that can produce
2759 // the largest value is the evaluation of the equation during bisection,
2760 // which needs 3 times the bitwidth of the coefficient, so the total number
2761 // of required bits is 3n.
2763 // The purpose of this extension is to simulate the set Z of all integers,
2764 // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2765 // and negative numbers (not so much in a modulo arithmetic). The method
2766 // used to solve the equation is based on the standard formula for real
2767 // numbers, and uses the concepts of "positive" and "negative" with their
2770 A = A.sext(CoeffWidth);
2771 B = B.sext(CoeffWidth);
2772 C = C.sext(CoeffWidth);
2774 // Make A > 0 for simplicity. Negate cannot overflow at this point because
2775 // the bit width has increased.
2776 if (A.isNegative()) {
2782 // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2783 // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2784 // and R = 2^BitWidth.
2785 // Since we're trying not only to find exact solutions, but also values
2786 // that "wrap around", such a set will always have a solution, i.e. an x
2787 // that satisfies at least one of the equations, or such that |q(x)|
2788 // exceeds kR, while |q(x-1)| for the same k does not.
2790 // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2791 // positive solution n (in the above sense), and also such that the n
2792 // will be the least among all solutions corresponding to k = 0, 1, ...
2793 // (more precisely, the least element in the set
2794 // { n(k) | k is such that a solution n(k) exists }).
2796 // Consider the parabola (over real numbers) that corresponds to the
2797 // quadratic equation. Since A > 0, the arms of the parabola will point
2798 // up. Picking different values of k will shift it up and down by R.
2800 // We want to shift the parabola in such a way as to reduce the problem
2801 // of solving q(x) = kR to solving shifted_q(x) = 0.
2802 // (The interesting solutions are the ceilings of the real number
2804 APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2809 auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2810 assert(A.isStrictlyPositive());
2811 APInt T = V.abs().urem(A);
2812 if (T.isNullValue())
2814 return V.isNegative() ? V+T : V+(A-T);
2817 // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2818 // iff B is positive.
2819 if (B.isNonNegative()) {
2820 // If B >= 0, the vertex it at a negative location (or at 0), so in
2821 // order to have a non-negative solution we need to pick k that makes
2822 // C-kR negative. To satisfy all the requirements for the solution
2823 // that we are looking for, it needs to be closest to 0 of all k.
2825 if (C.isStrictlyPositive())
2827 // Pick the greater solution.
2830 // If B < 0, the vertex is at a positive location. For any solution
2831 // to exist, the discriminant must be non-negative. This means that
2832 // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2833 // lower bound on values of k: kR >= C - B^2/4A.
2834 APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2835 // Round LowkR up (towards +inf) to the nearest kR.
2836 LowkR = RoundUp(LowkR, R);
2838 // If there exists k meeting the condition above, and such that
2839 // C-kR > 0, there will be two positive real number solutions of
2840 // q(x) = kR. Out of all such values of k, pick the one that makes
2841 // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2842 // In other words, find maximum k such that LowkR <= kR < C.
2844 // If LowkR < C, then such a k is guaranteed to exist because
2845 // LowkR itself is a multiple of R.
2846 C -= -RoundUp(-C, R); // C = C - RoundDown(C, R)
2847 // Pick the smaller solution.
2850 // If C-kR < 0 for all potential k's, it means that one solution
2851 // will be negative, while the other will be positive. The positive
2852 // solution will shift towards 0 if the parabola is moved up.
2853 // Pick the kR closest to the lower bound (i.e. make C-kR closest
2854 // to 0, or in other words, out of all parabolas that have solutions,
2855 // pick the one that is the farthest "up").
2856 // Since LowkR is itself a multiple of R, simply take C-LowkR.
2858 // Pick the greater solution.
2863 LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2864 << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2866 APInt D = SqrB - 4*A*C;
2867 assert(D.isNonNegative() && "Negative discriminant");
2868 APInt SQ = D.sqrt();
2871 bool InexactSQ = Q != D;
2872 // The calculated SQ may actually be greater than the exact (non-integer)
2873 // value. If that's the case, decremement SQ to get a value that is lower.
2880 // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
2881 // When using the quadratic formula directly, the calculated low root
2882 // may be greater than the exact one, since we would be subtracting SQ.
2883 // To make sure that the calculated root is not greater than the exact
2884 // one, subtract SQ+1 when calculating the low root (for inexact value
2887 APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
2889 APInt::sdivrem(-B + SQ, TwoA, X, Rem);
2891 // The updated coefficients should be such that the (exact) solution is
2892 // positive. Since APInt division rounds towards 0, the calculated one
2893 // can be 0, but cannot be negative.
2894 assert(X.isNonNegative() && "Solution should be non-negative");
2896 if (!InexactSQ && Rem.isNullValue()) {
2897 LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
2901 assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
2902 // The exact value of the square root of D should be between SQ and SQ+1.
2903 // This implies that the solution should be between that corresponding to
2904 // SQ (i.e. X) and that corresponding to SQ+1.
2906 // The calculated X cannot be greater than the exact (real) solution.
2907 // Actually it must be strictly less than the exact solution, while
2908 // X+1 will be greater than or equal to it.
2910 APInt VX = (A*X + B)*X + C;
2911 APInt VY = VX + TwoA*X + A + B;
2912 bool SignChange = VX.isNegative() != VY.isNegative() ||
2913 VX.isNullValue() != VY.isNullValue();
2914 // If the sign did not change between X and X+1, X is not a valid solution.
2915 // This could happen when the actual (exact) roots don't have an integer
2916 // between them, so they would both be contained between X and X+1.
2918 LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
2923 LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');