1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision integer
11 // constant values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/ErrorHandling.h"
23 #include "llvm/Support/MathExtras.h"
24 #include "llvm/Support/raw_ostream.h"
31 #define DEBUG_TYPE "apint"
33 /// A utility function for allocating memory, checking for allocation failures,
34 /// and ensuring the contents are zeroed.
35 inline static uint64_t* getClearedMemory(unsigned numWords) {
36 uint64_t * result = new uint64_t[numWords];
37 assert(result && "APInt memory allocation fails!");
38 memset(result, 0, numWords * sizeof(uint64_t));
42 /// A utility function for allocating memory and checking for allocation
43 /// failure. The content is not zeroed.
44 inline static uint64_t* getMemory(unsigned numWords) {
45 uint64_t * result = new uint64_t[numWords];
46 assert(result && "APInt memory allocation fails!");
50 /// A utility function that converts a character to a digit.
51 inline static unsigned getDigit(char cdigit, uint8_t radix) {
54 if (radix == 16 || radix == 36) {
78 void APInt::initSlowCase(uint64_t val, bool isSigned) {
80 pVal = getClearedMemory(getNumWords());
82 if (isSigned && int64_t(val) < 0)
83 for (unsigned i = 1; i < getNumWords(); ++i)
88 void APInt::initSlowCase(const APInt& that) {
90 pVal = getMemory(getNumWords());
91 memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE);
94 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
95 assert(BitWidth && "Bitwidth too small");
96 assert(bigVal.data() && "Null pointer detected!");
100 // Get memory, cleared to 0
102 pVal = getClearedMemory(getNumWords());
103 // Calculate the number of words to copy
104 unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
105 // Copy the words from bigVal to pVal
106 memcpy(pVal, bigVal.data(), words * APINT_WORD_SIZE);
108 // Make sure unused high bits are cleared
112 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
113 : BitWidth(numBits) {
114 initFromArray(bigVal);
117 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
118 : BitWidth(numBits) {
119 initFromArray(makeArrayRef(bigVal, numWords));
122 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
123 : VAL(0), BitWidth(numbits) {
124 assert(BitWidth && "Bitwidth too small");
125 fromString(numbits, Str, radix);
128 void APInt::AssignSlowCase(const APInt& RHS) {
129 // Don't do anything for X = X
133 if (BitWidth == RHS.getBitWidth()) {
134 // assume same bit-width single-word case is already handled
135 assert(!isSingleWord());
136 memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE);
140 if (isSingleWord()) {
141 // assume case where both are single words is already handled
142 assert(!RHS.isSingleWord());
144 pVal = getMemory(RHS.getNumWords());
145 memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
146 } else if (getNumWords() == RHS.getNumWords())
147 memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
148 else if (RHS.isSingleWord()) {
153 pVal = getMemory(RHS.getNumWords());
154 memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
156 BitWidth = RHS.BitWidth;
160 /// This method 'profiles' an APInt for use with FoldingSet.
161 void APInt::Profile(FoldingSetNodeID& ID) const {
162 ID.AddInteger(BitWidth);
164 if (isSingleWord()) {
169 unsigned NumWords = getNumWords();
170 for (unsigned i = 0; i < NumWords; ++i)
171 ID.AddInteger(pVal[i]);
174 /// @brief Prefix increment operator. Increments the APInt by one.
175 APInt& APInt::operator++() {
179 tcIncrement(pVal, getNumWords());
180 return clearUnusedBits();
183 /// @brief Prefix decrement operator. Decrements the APInt by one.
184 APInt& APInt::operator--() {
188 tcDecrement(pVal, getNumWords());
189 return clearUnusedBits();
192 /// Adds the RHS APint to this APInt.
193 /// @returns this, after addition of RHS.
194 /// @brief Addition assignment operator.
195 APInt& APInt::operator+=(const APInt& RHS) {
196 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
200 tcAdd(pVal, RHS.pVal, 0, getNumWords());
201 return clearUnusedBits();
204 APInt& APInt::operator+=(uint64_t RHS) {
208 tcAddPart(pVal, RHS, getNumWords());
209 return clearUnusedBits();
212 /// Subtracts the RHS APInt from this APInt
213 /// @returns this, after subtraction
214 /// @brief Subtraction assignment operator.
215 APInt& APInt::operator-=(const APInt& RHS) {
216 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
220 tcSubtract(pVal, RHS.pVal, 0, getNumWords());
221 return clearUnusedBits();
224 APInt& APInt::operator-=(uint64_t RHS) {
228 tcSubtractPart(pVal, RHS, getNumWords());
229 return clearUnusedBits();
232 /// Multiplies an integer array, x, by a uint64_t integer and places the result
234 /// @returns the carry out of the multiplication.
235 /// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
236 static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
237 // Split y into high 32-bit part (hy) and low 32-bit part (ly)
238 uint64_t ly = y & 0xffffffffULL, hy = y >> 32;
241 // For each digit of x.
242 for (unsigned i = 0; i < len; ++i) {
243 // Split x into high and low words
244 uint64_t lx = x[i] & 0xffffffffULL;
245 uint64_t hx = x[i] >> 32;
246 // hasCarry - A flag to indicate if there is a carry to the next digit.
247 // hasCarry == 0, no carry
248 // hasCarry == 1, has carry
249 // hasCarry == 2, no carry and the calculation result == 0.
250 uint8_t hasCarry = 0;
251 dest[i] = carry + lx * ly;
252 // Determine if the add above introduces carry.
253 hasCarry = (dest[i] < carry) ? 1 : 0;
254 carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0);
255 // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
256 // (2^32 - 1) + 2^32 = 2^64.
257 hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
259 carry += (lx * hy) & 0xffffffffULL;
260 dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL);
261 carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) +
262 (carry >> 32) + ((lx * hy) >> 32) + hx * hy;
267 /// Multiplies integer array x by integer array y and stores the result into
268 /// the integer array dest. Note that dest's size must be >= xlen + ylen.
269 /// @brief Generalized multiplication of integer arrays.
270 static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[],
272 dest[xlen] = mul_1(dest, x, xlen, y[0]);
273 for (unsigned i = 1; i < ylen; ++i) {
274 uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32;
275 uint64_t carry = 0, lx = 0, hx = 0;
276 for (unsigned j = 0; j < xlen; ++j) {
277 lx = x[j] & 0xffffffffULL;
279 // hasCarry - A flag to indicate if has carry.
280 // hasCarry == 0, no carry
281 // hasCarry == 1, has carry
282 // hasCarry == 2, no carry and the calculation result == 0.
283 uint8_t hasCarry = 0;
284 uint64_t resul = carry + lx * ly;
285 hasCarry = (resul < carry) ? 1 : 0;
286 carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32);
287 hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
289 carry += (lx * hy) & 0xffffffffULL;
290 resul = (carry << 32) | (resul & 0xffffffffULL);
292 carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+
293 (carry >> 32) + (dest[i+j] < resul ? 1 : 0) +
294 ((lx * hy) >> 32) + hx * hy;
296 dest[i+xlen] = carry;
300 APInt& APInt::operator*=(const APInt& RHS) {
301 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
302 if (isSingleWord()) {
308 // Get some bit facts about LHS and check for zero
309 unsigned lhsBits = getActiveBits();
310 unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
315 // Get some bit facts about RHS and check for zero
316 unsigned rhsBits = RHS.getActiveBits();
317 unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
324 // Allocate space for the result
325 unsigned destWords = rhsWords + lhsWords;
326 uint64_t *dest = getMemory(destWords);
328 // Perform the long multiply
329 mul(dest, pVal, lhsWords, RHS.pVal, rhsWords);
331 // Copy result back into *this
333 unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
334 memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
337 // delete dest array and return
342 void APInt::AndAssignSlowCase(const APInt& RHS) {
343 tcAnd(pVal, RHS.pVal, getNumWords());
346 void APInt::OrAssignSlowCase(const APInt& RHS) {
347 tcOr(pVal, RHS.pVal, getNumWords());
350 void APInt::XorAssignSlowCase(const APInt& RHS) {
351 tcXor(pVal, RHS.pVal, getNumWords());
354 APInt APInt::operator*(const APInt& RHS) const {
355 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
357 return APInt(BitWidth, VAL * RHS.VAL);
363 bool APInt::EqualSlowCase(const APInt& RHS) const {
364 return std::equal(pVal, pVal + getNumWords(), RHS.pVal);
367 int APInt::compare(const APInt& RHS) const {
368 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
370 return VAL < RHS.VAL ? -1 : VAL > RHS.VAL;
372 return tcCompare(pVal, RHS.pVal, getNumWords());
375 int APInt::compareSigned(const APInt& RHS) const {
376 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
377 if (isSingleWord()) {
378 int64_t lhsSext = SignExtend64(VAL, BitWidth);
379 int64_t rhsSext = SignExtend64(RHS.VAL, BitWidth);
380 return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
383 bool lhsNeg = isNegative();
384 bool rhsNeg = RHS.isNegative();
386 // If the sign bits don't match, then (LHS < RHS) if LHS is negative
387 if (lhsNeg != rhsNeg)
388 return lhsNeg ? -1 : 1;
390 // Otherwise we can just use an unsigned comparison, because even negative
391 // numbers compare correctly this way if both have the same signed-ness.
392 return tcCompare(pVal, RHS.pVal, getNumWords());
395 void APInt::setBit(unsigned bitPosition) {
397 VAL |= maskBit(bitPosition);
399 pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
402 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
403 unsigned loWord = whichWord(loBit);
404 unsigned hiWord = whichWord(hiBit);
406 // Create an initial mask for the low word with zeros below loBit.
407 uint64_t loMask = WORD_MAX << whichBit(loBit);
409 // If hiBit is not aligned, we need a high mask.
410 unsigned hiShiftAmt = whichBit(hiBit);
411 if (hiShiftAmt != 0) {
412 // Create a high mask with zeros above hiBit.
413 uint64_t hiMask = WORD_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
414 // If loWord and hiWord are equal, then we combine the masks. Otherwise,
415 // set the bits in hiWord.
416 if (hiWord == loWord)
419 pVal[hiWord] |= hiMask;
421 // Apply the mask to the low word.
422 pVal[loWord] |= loMask;
424 // Fill any words between loWord and hiWord with all ones.
425 for (unsigned word = loWord + 1; word < hiWord; ++word)
426 pVal[word] = WORD_MAX;
429 /// Set the given bit to 0 whose position is given as "bitPosition".
430 /// @brief Set a given bit to 0.
431 void APInt::clearBit(unsigned bitPosition) {
433 VAL &= ~maskBit(bitPosition);
435 pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
438 /// @brief Toggle every bit to its opposite value.
439 void APInt::flipAllBitsSlowCase() {
440 tcComplement(pVal, getNumWords());
444 /// Toggle a given bit to its opposite value whose position is given
445 /// as "bitPosition".
446 /// @brief Toggles a given bit to its opposite value.
447 void APInt::flipBit(unsigned bitPosition) {
448 assert(bitPosition < BitWidth && "Out of the bit-width range!");
449 if ((*this)[bitPosition]) clearBit(bitPosition);
450 else setBit(bitPosition);
453 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
454 unsigned subBitWidth = subBits.getBitWidth();
455 assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
456 "Illegal bit insertion");
458 // Insertion is a direct copy.
459 if (subBitWidth == BitWidth) {
464 // Single word result can be done as a direct bitmask.
465 if (isSingleWord()) {
466 uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
467 VAL &= ~(mask << bitPosition);
468 VAL |= (subBits.VAL << bitPosition);
472 unsigned loBit = whichBit(bitPosition);
473 unsigned loWord = whichWord(bitPosition);
474 unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
476 // Insertion within a single word can be done as a direct bitmask.
477 if (loWord == hi1Word) {
478 uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
479 pVal[loWord] &= ~(mask << loBit);
480 pVal[loWord] |= (subBits.VAL << loBit);
484 // Insert on word boundaries.
486 // Direct copy whole words.
487 unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
488 memcpy(pVal + loWord, subBits.getRawData(),
489 numWholeSubWords * APINT_WORD_SIZE);
491 // Mask+insert remaining bits.
492 unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
493 if (remainingBits != 0) {
494 uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - remainingBits);
495 pVal[hi1Word] &= ~mask;
496 pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
501 // General case - set/clear individual bits in dst based on src.
502 // TODO - there is scope for optimization here, but at the moment this code
503 // path is barely used so prefer readability over performance.
504 for (unsigned i = 0; i != subBitWidth; ++i) {
506 setBit(bitPosition + i);
508 clearBit(bitPosition + i);
512 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
513 assert(numBits > 0 && "Can't extract zero bits");
514 assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
515 "Illegal bit extraction");
518 return APInt(numBits, VAL >> bitPosition);
520 unsigned loBit = whichBit(bitPosition);
521 unsigned loWord = whichWord(bitPosition);
522 unsigned hiWord = whichWord(bitPosition + numBits - 1);
524 // Single word result extracting bits from a single word source.
525 if (loWord == hiWord)
526 return APInt(numBits, pVal[loWord] >> loBit);
528 // Extracting bits that start on a source word boundary can be done
529 // as a fast memory copy.
531 return APInt(numBits, makeArrayRef(pVal + loWord, 1 + hiWord - loWord));
533 // General case - shift + copy source words directly into place.
534 APInt Result(numBits, 0);
535 unsigned NumSrcWords = getNumWords();
536 unsigned NumDstWords = Result.getNumWords();
538 for (unsigned word = 0; word < NumDstWords; ++word) {
539 uint64_t w0 = pVal[loWord + word];
541 (loWord + word + 1) < NumSrcWords ? pVal[loWord + word + 1] : 0;
542 Result.pVal[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
545 return Result.clearUnusedBits();
548 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
549 assert(!str.empty() && "Invalid string length");
550 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
552 "Radix should be 2, 8, 10, 16, or 36!");
554 size_t slen = str.size();
556 // Each computation below needs to know if it's negative.
557 StringRef::iterator p = str.begin();
558 unsigned isNegative = *p == '-';
559 if (*p == '-' || *p == '+') {
562 assert(slen && "String is only a sign, needs a value.");
565 // For radixes of power-of-two values, the bits required is accurately and
568 return slen + isNegative;
570 return slen * 3 + isNegative;
572 return slen * 4 + isNegative;
576 // This is grossly inefficient but accurate. We could probably do something
577 // with a computation of roughly slen*64/20 and then adjust by the value of
578 // the first few digits. But, I'm not sure how accurate that could be.
580 // Compute a sufficient number of bits that is always large enough but might
581 // be too large. This avoids the assertion in the constructor. This
582 // calculation doesn't work appropriately for the numbers 0-9, so just use 4
583 // bits in that case.
585 = radix == 10? (slen == 1 ? 4 : slen * 64/18)
586 : (slen == 1 ? 7 : slen * 16/3);
588 // Convert to the actual binary value.
589 APInt tmp(sufficient, StringRef(p, slen), radix);
591 // Compute how many bits are required. If the log is infinite, assume we need
593 unsigned log = tmp.logBase2();
594 if (log == (unsigned)-1) {
595 return isNegative + 1;
597 return isNegative + log + 1;
601 hash_code llvm::hash_value(const APInt &Arg) {
602 if (Arg.isSingleWord())
603 return hash_combine(Arg.VAL);
605 return hash_combine_range(Arg.pVal, Arg.pVal + Arg.getNumWords());
608 bool APInt::isSplat(unsigned SplatSizeInBits) const {
609 assert(getBitWidth() % SplatSizeInBits == 0 &&
610 "SplatSizeInBits must divide width!");
611 // We can check that all parts of an integer are equal by making use of a
612 // little trick: rotate and check if it's still the same value.
613 return *this == rotl(SplatSizeInBits);
616 /// This function returns the high "numBits" bits of this APInt.
617 APInt APInt::getHiBits(unsigned numBits) const {
618 return this->lshr(BitWidth - numBits);
621 /// This function returns the low "numBits" bits of this APInt.
622 APInt APInt::getLoBits(unsigned numBits) const {
623 APInt Result(getLowBitsSet(BitWidth, numBits));
628 unsigned APInt::countLeadingZerosSlowCase() const {
630 for (int i = getNumWords()-1; i >= 0; --i) {
631 uint64_t V = pVal[i];
633 Count += APINT_BITS_PER_WORD;
635 Count += llvm::countLeadingZeros(V);
639 // Adjust for unused bits in the most significant word (they are zero).
640 unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
641 Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
645 unsigned APInt::countLeadingOnes() const {
647 return llvm::countLeadingOnes(VAL << (APINT_BITS_PER_WORD - BitWidth));
649 unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
652 highWordBits = APINT_BITS_PER_WORD;
655 shift = APINT_BITS_PER_WORD - highWordBits;
657 int i = getNumWords() - 1;
658 unsigned Count = llvm::countLeadingOnes(pVal[i] << shift);
659 if (Count == highWordBits) {
660 for (i--; i >= 0; --i) {
661 if (pVal[i] == WORD_MAX)
662 Count += APINT_BITS_PER_WORD;
664 Count += llvm::countLeadingOnes(pVal[i]);
672 unsigned APInt::countTrailingZeros() const {
674 return std::min(unsigned(llvm::countTrailingZeros(VAL)), BitWidth);
677 for (; i < getNumWords() && pVal[i] == 0; ++i)
678 Count += APINT_BITS_PER_WORD;
679 if (i < getNumWords())
680 Count += llvm::countTrailingZeros(pVal[i]);
681 return std::min(Count, BitWidth);
684 unsigned APInt::countTrailingOnesSlowCase() const {
687 for (; i < getNumWords() && pVal[i] == WORD_MAX; ++i)
688 Count += APINT_BITS_PER_WORD;
689 if (i < getNumWords())
690 Count += llvm::countTrailingOnes(pVal[i]);
691 assert(Count <= BitWidth);
695 unsigned APInt::countPopulationSlowCase() const {
697 for (unsigned i = 0; i < getNumWords(); ++i)
698 Count += llvm::countPopulation(pVal[i]);
702 bool APInt::intersectsSlowCase(const APInt &RHS) const {
703 for (unsigned i = 0, e = getNumWords(); i != e; ++i)
704 if ((pVal[i] & RHS.pVal[i]) != 0)
710 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
711 for (unsigned i = 0, e = getNumWords(); i != e; ++i)
712 if ((pVal[i] & ~RHS.pVal[i]) != 0)
718 APInt APInt::byteSwap() const {
719 assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
721 return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
723 return APInt(BitWidth, ByteSwap_32(unsigned(VAL)));
724 if (BitWidth == 48) {
725 unsigned Tmp1 = unsigned(VAL >> 16);
726 Tmp1 = ByteSwap_32(Tmp1);
727 uint16_t Tmp2 = uint16_t(VAL);
728 Tmp2 = ByteSwap_16(Tmp2);
729 return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
732 return APInt(BitWidth, ByteSwap_64(VAL));
734 APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
735 for (unsigned I = 0, N = getNumWords(); I != N; ++I)
736 Result.pVal[I] = ByteSwap_64(pVal[N - I - 1]);
737 if (Result.BitWidth != BitWidth) {
738 Result.lshrInPlace(Result.BitWidth - BitWidth);
739 Result.BitWidth = BitWidth;
744 APInt APInt::reverseBits() const {
747 return APInt(BitWidth, llvm::reverseBits<uint64_t>(VAL));
749 return APInt(BitWidth, llvm::reverseBits<uint32_t>(VAL));
751 return APInt(BitWidth, llvm::reverseBits<uint16_t>(VAL));
753 return APInt(BitWidth, llvm::reverseBits<uint8_t>(VAL));
759 APInt Reversed(BitWidth, 0);
760 unsigned S = BitWidth;
762 for (; Val != 0; Val.lshrInPlace(1)) {
772 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
773 // Fast-path a common case.
774 if (A == B) return A;
776 // Corner cases: if either operand is zero, the other is the gcd.
780 // Count common powers of 2 and remove all other powers of 2.
783 unsigned Pow2_A = A.countTrailingZeros();
784 unsigned Pow2_B = B.countTrailingZeros();
785 if (Pow2_A > Pow2_B) {
786 A.lshrInPlace(Pow2_A - Pow2_B);
788 } else if (Pow2_B > Pow2_A) {
789 B.lshrInPlace(Pow2_B - Pow2_A);
796 // Both operands are odd multiples of 2^Pow_2:
798 // gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
800 // This is a modified version of Stein's algorithm, taking advantage of
801 // efficient countTrailingZeros().
805 A.lshrInPlace(A.countTrailingZeros() - Pow2);
808 B.lshrInPlace(B.countTrailingZeros() - Pow2);
815 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
822 // Get the sign bit from the highest order bit
823 bool isNeg = T.I >> 63;
825 // Get the 11-bit exponent and adjust for the 1023 bit bias
826 int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
828 // If the exponent is negative, the value is < 0 so just return 0.
830 return APInt(width, 0u);
832 // Extract the mantissa by clearing the top 12 bits (sign + exponent).
833 uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
835 // If the exponent doesn't shift all bits out of the mantissa
837 return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
838 APInt(width, mantissa >> (52 - exp));
840 // If the client didn't provide enough bits for us to shift the mantissa into
841 // then the result is undefined, just return 0
842 if (width <= exp - 52)
843 return APInt(width, 0);
845 // Otherwise, we have to shift the mantissa bits up to the right location
846 APInt Tmp(width, mantissa);
847 Tmp = Tmp.shl((unsigned)exp - 52);
848 return isNeg ? -Tmp : Tmp;
851 /// This function converts this APInt to a double.
852 /// The layout for double is as following (IEEE Standard 754):
853 /// --------------------------------------
854 /// | Sign Exponent Fraction Bias |
855 /// |-------------------------------------- |
856 /// | 1[63] 11[62-52] 52[51-00] 1023 |
857 /// --------------------------------------
858 double APInt::roundToDouble(bool isSigned) const {
860 // Handle the simple case where the value is contained in one uint64_t.
861 // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
862 if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
864 int64_t sext = SignExtend64(getWord(0), BitWidth);
867 return double(getWord(0));
870 // Determine if the value is negative.
871 bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
873 // Construct the absolute value if we're negative.
874 APInt Tmp(isNeg ? -(*this) : (*this));
876 // Figure out how many bits we're using.
877 unsigned n = Tmp.getActiveBits();
879 // The exponent (without bias normalization) is just the number of bits
880 // we are using. Note that the sign bit is gone since we constructed the
884 // Return infinity for exponent overflow
886 if (!isSigned || !isNeg)
887 return std::numeric_limits<double>::infinity();
889 return -std::numeric_limits<double>::infinity();
891 exp += 1023; // Increment for 1023 bias
893 // Number of bits in mantissa is 52. To obtain the mantissa value, we must
894 // extract the high 52 bits from the correct words in pVal.
896 unsigned hiWord = whichWord(n-1);
898 mantissa = Tmp.pVal[0];
900 mantissa >>= n - 52; // shift down, we want the top 52 bits.
902 assert(hiWord > 0 && "huh?");
903 uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
904 uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
905 mantissa = hibits | lobits;
908 // The leading bit of mantissa is implicit, so get rid of it.
909 uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
914 T.I = sign | (exp << 52) | mantissa;
918 // Truncate to new width.
919 APInt APInt::trunc(unsigned width) const {
920 assert(width < BitWidth && "Invalid APInt Truncate request");
921 assert(width && "Can't truncate to 0 bits");
923 if (width <= APINT_BITS_PER_WORD)
924 return APInt(width, getRawData()[0]);
926 APInt Result(getMemory(getNumWords(width)), width);
930 for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
931 Result.pVal[i] = pVal[i];
933 // Truncate and copy any partial word.
934 unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
936 Result.pVal[i] = pVal[i] << bits >> bits;
941 // Sign extend to a new width.
942 APInt APInt::sext(unsigned Width) const {
943 assert(Width > BitWidth && "Invalid APInt SignExtend request");
945 if (Width <= APINT_BITS_PER_WORD)
946 return APInt(Width, SignExtend64(VAL, BitWidth));
948 APInt Result(getMemory(getNumWords(Width)), Width);
951 std::memcpy(Result.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
953 // Sign extend the last word since there may be unused bits in the input.
954 Result.pVal[getNumWords() - 1] =
955 SignExtend64(Result.pVal[getNumWords() - 1],
956 ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
958 // Fill with sign bits.
959 std::memset(Result.pVal + getNumWords(), isNegative() ? -1 : 0,
960 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
961 Result.clearUnusedBits();
965 // Zero extend to a new width.
966 APInt APInt::zext(unsigned width) const {
967 assert(width > BitWidth && "Invalid APInt ZeroExtend request");
969 if (width <= APINT_BITS_PER_WORD)
970 return APInt(width, VAL);
972 APInt Result(getMemory(getNumWords(width)), width);
975 std::memcpy(Result.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
977 // Zero remaining words.
978 std::memset(Result.pVal + getNumWords(), 0,
979 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
984 APInt APInt::zextOrTrunc(unsigned width) const {
985 if (BitWidth < width)
987 if (BitWidth > width)
992 APInt APInt::sextOrTrunc(unsigned width) const {
993 if (BitWidth < width)
995 if (BitWidth > width)
1000 APInt APInt::zextOrSelf(unsigned width) const {
1001 if (BitWidth < width)
1006 APInt APInt::sextOrSelf(unsigned width) const {
1007 if (BitWidth < width)
1012 /// Arithmetic right-shift this APInt by shiftAmt.
1013 /// @brief Arithmetic right-shift function.
1014 void APInt::ashrInPlace(const APInt &shiftAmt) {
1015 ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
1018 /// Arithmetic right-shift this APInt by shiftAmt.
1019 /// @brief Arithmetic right-shift function.
1020 void APInt::ashrSlowCase(unsigned ShiftAmt) {
1021 // Don't bother performing a no-op shift.
1025 // Save the original sign bit for later.
1026 bool Negative = isNegative();
1028 // WordShift is the inter-part shift; BitShift is is intra-part shift.
1029 unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
1030 unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
1032 unsigned WordsToMove = getNumWords() - WordShift;
1033 if (WordsToMove != 0) {
1034 // Sign extend the last word to fill in the unused bits.
1035 pVal[getNumWords() - 1] = SignExtend64(
1036 pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
1038 // Fastpath for moving by whole words.
1039 if (BitShift == 0) {
1040 std::memmove(pVal, pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
1042 // Move the words containing significant bits.
1043 for (unsigned i = 0; i != WordsToMove - 1; ++i)
1044 pVal[i] = (pVal[i + WordShift] >> BitShift) |
1045 (pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
1047 // Handle the last word which has no high bits to copy.
1048 pVal[WordsToMove - 1] = pVal[WordShift + WordsToMove - 1] >> BitShift;
1049 // Sign extend one more time.
1050 pVal[WordsToMove - 1] =
1051 SignExtend64(pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
1055 // Fill in the remainder based on the original sign.
1056 std::memset(pVal + WordsToMove, Negative ? -1 : 0,
1057 WordShift * APINT_WORD_SIZE);
1061 /// Logical right-shift this APInt by shiftAmt.
1062 /// @brief Logical right-shift function.
1063 void APInt::lshrInPlace(const APInt &shiftAmt) {
1064 lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
1067 /// Logical right-shift this APInt by shiftAmt.
1068 /// @brief Logical right-shift function.
1069 void APInt::lshrSlowCase(unsigned ShiftAmt) {
1070 tcShiftRight(pVal, getNumWords(), ShiftAmt);
1073 /// Left-shift this APInt by shiftAmt.
1074 /// @brief Left-shift function.
1075 APInt APInt::shl(const APInt &shiftAmt) const {
1076 // It's undefined behavior in C to shift by BitWidth or greater.
1077 return shl((unsigned)shiftAmt.getLimitedValue(BitWidth));
1080 void APInt::shlSlowCase(unsigned ShiftAmt) {
1081 tcShiftLeft(pVal, getNumWords(), ShiftAmt);
1085 // Calculate the rotate amount modulo the bit width.
1086 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
1087 unsigned rotBitWidth = rotateAmt.getBitWidth();
1088 APInt rot = rotateAmt;
1089 if (rotBitWidth < BitWidth) {
1090 // Extend the rotate APInt, so that the urem doesn't divide by 0.
1091 // e.g. APInt(1, 32) would give APInt(1, 0).
1092 rot = rotateAmt.zext(BitWidth);
1094 rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
1095 return rot.getLimitedValue(BitWidth);
1098 APInt APInt::rotl(const APInt &rotateAmt) const {
1099 return rotl(rotateModulo(BitWidth, rotateAmt));
1102 APInt APInt::rotl(unsigned rotateAmt) const {
1103 rotateAmt %= BitWidth;
1106 return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1109 APInt APInt::rotr(const APInt &rotateAmt) const {
1110 return rotr(rotateModulo(BitWidth, rotateAmt));
1113 APInt APInt::rotr(unsigned rotateAmt) const {
1114 rotateAmt %= BitWidth;
1117 return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1120 // Square Root - this method computes and returns the square root of "this".
1121 // Three mechanisms are used for computation. For small values (<= 5 bits),
1122 // a table lookup is done. This gets some performance for common cases. For
1123 // values using less than 52 bits, the value is converted to double and then
1124 // the libc sqrt function is called. The result is rounded and then converted
1125 // back to a uint64_t which is then used to construct the result. Finally,
1126 // the Babylonian method for computing square roots is used.
1127 APInt APInt::sqrt() const {
1129 // Determine the magnitude of the value.
1130 unsigned magnitude = getActiveBits();
1132 // Use a fast table for some small values. This also gets rid of some
1133 // rounding errors in libc sqrt for small values.
1134 if (magnitude <= 5) {
1135 static const uint8_t results[32] = {
1138 /* 3- 6 */ 2, 2, 2, 2,
1139 /* 7-12 */ 3, 3, 3, 3, 3, 3,
1140 /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1141 /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1144 return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]);
1147 // If the magnitude of the value fits in less than 52 bits (the precision of
1148 // an IEEE double precision floating point value), then we can use the
1149 // libc sqrt function which will probably use a hardware sqrt computation.
1150 // This should be faster than the algorithm below.
1151 if (magnitude < 52) {
1152 return APInt(BitWidth,
1153 uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
1156 // Okay, all the short cuts are exhausted. We must compute it. The following
1157 // is a classical Babylonian method for computing the square root. This code
1158 // was adapted to APInt from a wikipedia article on such computations.
1159 // See http://www.wikipedia.org/ and go to the page named
1160 // Calculate_an_integer_square_root.
1161 unsigned nbits = BitWidth, i = 4;
1162 APInt testy(BitWidth, 16);
1163 APInt x_old(BitWidth, 1);
1164 APInt x_new(BitWidth, 0);
1165 APInt two(BitWidth, 2);
1167 // Select a good starting value using binary logarithms.
1168 for (;; i += 2, testy = testy.shl(2))
1169 if (i >= nbits || this->ule(testy)) {
1170 x_old = x_old.shl(i / 2);
1174 // Use the Babylonian method to arrive at the integer square root:
1176 x_new = (this->udiv(x_old) + x_old).udiv(two);
1177 if (x_old.ule(x_new))
1182 // Make sure we return the closest approximation
1183 // NOTE: The rounding calculation below is correct. It will produce an
1184 // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1185 // determined to be a rounding issue with pari/gp as it begins to use a
1186 // floating point representation after 192 bits. There are no discrepancies
1187 // between this algorithm and pari/gp for bit widths < 192 bits.
1188 APInt square(x_old * x_old);
1189 APInt nextSquare((x_old + 1) * (x_old +1));
1190 if (this->ult(square))
1192 assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1193 APInt midpoint((nextSquare - square).udiv(two));
1194 APInt offset(*this - square);
1195 if (offset.ult(midpoint))
1200 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1201 /// iterative extended Euclidean algorithm is used to solve for this value,
1202 /// however we simplify it to speed up calculating only the inverse, and take
1203 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1204 /// (potentially large) APInts around.
1205 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1206 assert(ult(modulo) && "This APInt must be smaller than the modulo");
1208 // Using the properties listed at the following web page (accessed 06/21/08):
1209 // http://www.numbertheory.org/php/euclid.html
1210 // (especially the properties numbered 3, 4 and 9) it can be proved that
1211 // BitWidth bits suffice for all the computations in the algorithm implemented
1212 // below. More precisely, this number of bits suffice if the multiplicative
1213 // inverse exists, but may not suffice for the general extended Euclidean
1216 APInt r[2] = { modulo, *this };
1217 APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1218 APInt q(BitWidth, 0);
1221 for (i = 0; r[i^1] != 0; i ^= 1) {
1222 // An overview of the math without the confusing bit-flipping:
1223 // q = r[i-2] / r[i-1]
1224 // r[i] = r[i-2] % r[i-1]
1225 // t[i] = t[i-2] - t[i-1] * q
1226 udivrem(r[i], r[i^1], q, r[i]);
1230 // If this APInt and the modulo are not coprime, there is no multiplicative
1231 // inverse, so return 0. We check this by looking at the next-to-last
1232 // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1235 return APInt(BitWidth, 0);
1237 // The next-to-last t is the multiplicative inverse. However, we are
1238 // interested in a positive inverse. Calcuate a positive one from a negative
1239 // one if necessary. A simple addition of the modulo suffices because
1240 // abs(t[i]) is known to be less than *this/2 (see the link above).
1241 return t[i].isNegative() ? t[i] + modulo : t[i];
1244 /// Calculate the magic numbers required to implement a signed integer division
1245 /// by a constant as a sequence of multiplies, adds and shifts. Requires that
1246 /// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S.
1247 /// Warren, Jr., chapter 10.
1248 APInt::ms APInt::magic() const {
1249 const APInt& d = *this;
1251 APInt ad, anc, delta, q1, r1, q2, r2, t;
1252 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1256 t = signedMin + (d.lshr(d.getBitWidth() - 1));
1257 anc = t - 1 - t.urem(ad); // absolute value of nc
1258 p = d.getBitWidth() - 1; // initialize p
1259 q1 = signedMin.udiv(anc); // initialize q1 = 2p/abs(nc)
1260 r1 = signedMin - q1*anc; // initialize r1 = rem(2p,abs(nc))
1261 q2 = signedMin.udiv(ad); // initialize q2 = 2p/abs(d)
1262 r2 = signedMin - q2*ad; // initialize r2 = rem(2p,abs(d))
1265 q1 = q1<<1; // update q1 = 2p/abs(nc)
1266 r1 = r1<<1; // update r1 = rem(2p/abs(nc))
1267 if (r1.uge(anc)) { // must be unsigned comparison
1271 q2 = q2<<1; // update q2 = 2p/abs(d)
1272 r2 = r2<<1; // update r2 = rem(2p/abs(d))
1273 if (r2.uge(ad)) { // must be unsigned comparison
1278 } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1281 if (d.isNegative()) mag.m = -mag.m; // resulting magic number
1282 mag.s = p - d.getBitWidth(); // resulting shift
1286 /// Calculate the magic numbers required to implement an unsigned integer
1287 /// division by a constant as a sequence of multiplies, adds and shifts.
1288 /// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry
1289 /// S. Warren, Jr., chapter 10.
1290 /// LeadingZeros can be used to simplify the calculation if the upper bits
1291 /// of the divided value are known zero.
1292 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1293 const APInt& d = *this;
1295 APInt nc, delta, q1, r1, q2, r2;
1297 magu.a = 0; // initialize "add" indicator
1298 APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1299 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1300 APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1302 nc = allOnes - (allOnes - d).urem(d);
1303 p = d.getBitWidth() - 1; // initialize p
1304 q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc
1305 r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc)
1306 q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d
1307 r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d)
1310 if (r1.uge(nc - r1)) {
1311 q1 = q1 + q1 + 1; // update q1
1312 r1 = r1 + r1 - nc; // update r1
1315 q1 = q1+q1; // update q1
1316 r1 = r1+r1; // update r1
1318 if ((r2 + 1).uge(d - r2)) {
1319 if (q2.uge(signedMax)) magu.a = 1;
1320 q2 = q2+q2 + 1; // update q2
1321 r2 = r2+r2 + 1 - d; // update r2
1324 if (q2.uge(signedMin)) magu.a = 1;
1325 q2 = q2+q2; // update q2
1326 r2 = r2+r2 + 1; // update r2
1329 } while (p < d.getBitWidth()*2 &&
1330 (q1.ult(delta) || (q1 == delta && r1 == 0)));
1331 magu.m = q2 + 1; // resulting magic number
1332 magu.s = p - d.getBitWidth(); // resulting shift
1336 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1337 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1338 /// variables here have the same names as in the algorithm. Comments explain
1339 /// the algorithm and any deviation from it.
1340 static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
1341 unsigned m, unsigned n) {
1342 assert(u && "Must provide dividend");
1343 assert(v && "Must provide divisor");
1344 assert(q && "Must provide quotient");
1345 assert(u != v && u != q && v != q && "Must use different memory");
1346 assert(n>1 && "n must be > 1");
1348 // b denotes the base of the number system. In our case b is 2^32.
1349 const uint64_t b = uint64_t(1) << 32;
1351 DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1352 DEBUG(dbgs() << "KnuthDiv: original:");
1353 DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1354 DEBUG(dbgs() << " by");
1355 DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1356 DEBUG(dbgs() << '\n');
1357 // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1358 // u and v by d. Note that we have taken Knuth's advice here to use a power
1359 // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1360 // 2 allows us to shift instead of multiply and it is easy to determine the
1361 // shift amount from the leading zeros. We are basically normalizing the u
1362 // and v so that its high bits are shifted to the top of v's range without
1363 // overflow. Note that this can require an extra word in u so that u must
1364 // be of length m+n+1.
1365 unsigned shift = countLeadingZeros(v[n-1]);
1366 unsigned v_carry = 0;
1367 unsigned u_carry = 0;
1369 for (unsigned i = 0; i < m+n; ++i) {
1370 unsigned u_tmp = u[i] >> (32 - shift);
1371 u[i] = (u[i] << shift) | u_carry;
1374 for (unsigned i = 0; i < n; ++i) {
1375 unsigned v_tmp = v[i] >> (32 - shift);
1376 v[i] = (v[i] << shift) | v_carry;
1382 DEBUG(dbgs() << "KnuthDiv: normal:");
1383 DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1384 DEBUG(dbgs() << " by");
1385 DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1386 DEBUG(dbgs() << '\n');
1388 // D2. [Initialize j.] Set j to m. This is the loop counter over the places.
1391 DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1392 // D3. [Calculate q'.].
1393 // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1394 // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1395 // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1396 // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
1397 // on v[n-2] determines at high speed most of the cases in which the trial
1398 // value qp is one too large, and it eliminates all cases where qp is two
1400 uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
1401 DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1402 uint64_t qp = dividend / v[n-1];
1403 uint64_t rp = dividend % v[n-1];
1404 if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1407 if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1410 DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1412 // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1413 // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1414 // consists of a simple multiplication by a one-place number, combined with
1416 // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1417 // this step is actually negative, (u[j+n]...u[j]) should be left as the
1418 // true value plus b**(n+1), namely as the b's complement of
1419 // the true value, and a "borrow" to the left should be remembered.
1421 for (unsigned i = 0; i < n; ++i) {
1422 uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1423 int64_t subres = int64_t(u[j+i]) - borrow - (unsigned)p;
1424 u[j+i] = (unsigned)subres;
1425 borrow = (p >> 32) - (subres >> 32);
1426 DEBUG(dbgs() << "KnuthDiv: u[j+i] = " << u[j+i]
1427 << ", borrow = " << borrow << '\n');
1429 bool isNeg = u[j+n] < borrow;
1430 u[j+n] -= (unsigned)borrow;
1432 DEBUG(dbgs() << "KnuthDiv: after subtraction:");
1433 DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1434 DEBUG(dbgs() << '\n');
1436 // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1437 // negative, go to step D6; otherwise go on to step D7.
1438 q[j] = (unsigned)qp;
1440 // D6. [Add back]. The probability that this step is necessary is very
1441 // small, on the order of only 2/b. Make sure that test data accounts for
1442 // this possibility. Decrease q[j] by 1
1444 // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1445 // A carry will occur to the left of u[j+n], and it should be ignored
1446 // since it cancels with the borrow that occurred in D4.
1448 for (unsigned i = 0; i < n; i++) {
1449 unsigned limit = std::min(u[j+i],v[i]);
1450 u[j+i] += v[i] + carry;
1451 carry = u[j+i] < limit || (carry && u[j+i] == limit);
1455 DEBUG(dbgs() << "KnuthDiv: after correction:");
1456 DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1457 DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1459 // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3.
1462 DEBUG(dbgs() << "KnuthDiv: quotient:");
1463 DEBUG(for (int i = m; i >=0; i--) dbgs() <<" " << q[i]);
1464 DEBUG(dbgs() << '\n');
1466 // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1467 // remainder may be obtained by dividing u[...] by d. If r is non-null we
1468 // compute the remainder (urem uses this).
1470 // The value d is expressed by the "shift" value above since we avoided
1471 // multiplication by d by using a shift left. So, all we have to do is
1472 // shift right here.
1475 DEBUG(dbgs() << "KnuthDiv: remainder:");
1476 for (int i = n-1; i >= 0; i--) {
1477 r[i] = (u[i] >> shift) | carry;
1478 carry = u[i] << (32 - shift);
1479 DEBUG(dbgs() << " " << r[i]);
1482 for (int i = n-1; i >= 0; i--) {
1484 DEBUG(dbgs() << " " << r[i]);
1487 DEBUG(dbgs() << '\n');
1489 DEBUG(dbgs() << '\n');
1492 void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS,
1493 unsigned rhsWords, APInt *Quotient, APInt *Remainder) {
1494 assert(lhsWords >= rhsWords && "Fractional result");
1496 // First, compose the values into an array of 32-bit words instead of
1497 // 64-bit words. This is a necessity of both the "short division" algorithm
1498 // and the Knuth "classical algorithm" which requires there to be native
1499 // operations for +, -, and * on an m bit value with an m*2 bit result. We
1500 // can't use 64-bit operands here because we don't have native results of
1501 // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1502 // work on large-endian machines.
1503 uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT);
1504 unsigned n = rhsWords * 2;
1505 unsigned m = (lhsWords * 2) - n;
1507 // Allocate space for the temporary values we need either on the stack, if
1508 // it will fit, or on the heap if it won't.
1509 unsigned SPACE[128];
1510 unsigned *U = nullptr;
1511 unsigned *V = nullptr;
1512 unsigned *Q = nullptr;
1513 unsigned *R = nullptr;
1514 if ((Remainder?4:3)*n+2*m+1 <= 128) {
1517 Q = &SPACE[(m+n+1) + n];
1519 R = &SPACE[(m+n+1) + n + (m+n)];
1521 U = new unsigned[m + n + 1];
1522 V = new unsigned[n];
1523 Q = new unsigned[m+n];
1525 R = new unsigned[n];
1528 // Initialize the dividend
1529 memset(U, 0, (m+n+1)*sizeof(unsigned));
1530 for (unsigned i = 0; i < lhsWords; ++i) {
1531 uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
1532 U[i * 2] = (unsigned)(tmp & mask);
1533 U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1535 U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1537 // Initialize the divisor
1538 memset(V, 0, (n)*sizeof(unsigned));
1539 for (unsigned i = 0; i < rhsWords; ++i) {
1540 uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]);
1541 V[i * 2] = (unsigned)(tmp & mask);
1542 V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1545 // initialize the quotient and remainder
1546 memset(Q, 0, (m+n) * sizeof(unsigned));
1548 memset(R, 0, n * sizeof(unsigned));
1550 // Now, adjust m and n for the Knuth division. n is the number of words in
1551 // the divisor. m is the number of words by which the dividend exceeds the
1552 // divisor (i.e. m+n is the length of the dividend). These sizes must not
1553 // contain any zero words or the Knuth algorithm fails.
1554 for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1558 for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1561 // If we're left with only a single word for the divisor, Knuth doesn't work
1562 // so we implement the short division algorithm here. This is much simpler
1563 // and faster because we are certain that we can divide a 64-bit quantity
1564 // by a 32-bit quantity at hardware speed and short division is simply a
1565 // series of such operations. This is just like doing short division but we
1566 // are using base 2^32 instead of base 10.
1567 assert(n != 0 && "Divide by zero?");
1569 unsigned divisor = V[0];
1570 unsigned remainder = 0;
1571 for (int i = m+n-1; i >= 0; i--) {
1572 uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i];
1573 if (partial_dividend == 0) {
1576 } else if (partial_dividend < divisor) {
1578 remainder = (unsigned)partial_dividend;
1579 } else if (partial_dividend == divisor) {
1583 Q[i] = (unsigned)(partial_dividend / divisor);
1584 remainder = (unsigned)(partial_dividend - (Q[i] * divisor));
1590 // Now we're ready to invoke the Knuth classical divide algorithm. In this
1592 KnuthDiv(U, V, Q, R, m, n);
1595 // If the caller wants the quotient
1597 // Set up the Quotient value's memory.
1598 if (Quotient->BitWidth != LHS.BitWidth) {
1599 if (Quotient->isSingleWord())
1602 delete [] Quotient->pVal;
1603 Quotient->BitWidth = LHS.BitWidth;
1604 if (!Quotient->isSingleWord())
1605 Quotient->pVal = getClearedMemory(Quotient->getNumWords());
1607 Quotient->clearAllBits();
1609 // The quotient is in Q. Reconstitute the quotient into Quotient's low
1611 // This case is currently dead as all users of divide() handle trivial cases
1613 if (lhsWords == 1) {
1615 uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
1616 if (Quotient->isSingleWord())
1617 Quotient->VAL = tmp;
1619 Quotient->pVal[0] = tmp;
1621 assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
1622 for (unsigned i = 0; i < lhsWords; ++i)
1624 uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1628 // If the caller wants the remainder
1630 // Set up the Remainder value's memory.
1631 if (Remainder->BitWidth != RHS.BitWidth) {
1632 if (Remainder->isSingleWord())
1635 delete [] Remainder->pVal;
1636 Remainder->BitWidth = RHS.BitWidth;
1637 if (!Remainder->isSingleWord())
1638 Remainder->pVal = getClearedMemory(Remainder->getNumWords());
1640 Remainder->clearAllBits();
1642 // The remainder is in R. Reconstitute the remainder into Remainder's low
1644 if (rhsWords == 1) {
1646 uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
1647 if (Remainder->isSingleWord())
1648 Remainder->VAL = tmp;
1650 Remainder->pVal[0] = tmp;
1652 assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
1653 for (unsigned i = 0; i < rhsWords; ++i)
1654 Remainder->pVal[i] =
1655 uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1659 // Clean up the memory we allocated.
1660 if (U != &SPACE[0]) {
1668 APInt APInt::udiv(const APInt& RHS) const {
1669 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1671 // First, deal with the easy case
1672 if (isSingleWord()) {
1673 assert(RHS.VAL != 0 && "Divide by zero?");
1674 return APInt(BitWidth, VAL / RHS.VAL);
1677 // Get some facts about the LHS and RHS number of bits and words
1678 unsigned rhsBits = RHS.getActiveBits();
1679 unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1680 assert(rhsWords && "Divided by zero???");
1681 unsigned lhsBits = this->getActiveBits();
1682 unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1684 // Deal with some degenerate cases
1687 return APInt(BitWidth, 0);
1688 else if (lhsWords < rhsWords || this->ult(RHS)) {
1689 // X / Y ===> 0, iff X < Y
1690 return APInt(BitWidth, 0);
1691 } else if (*this == RHS) {
1693 return APInt(BitWidth, 1);
1694 } else if (lhsWords == 1 && rhsWords == 1) {
1695 // All high words are zero, just use native divide
1696 return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]);
1699 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1700 APInt Quotient(1,0); // to hold result.
1701 divide(*this, lhsWords, RHS, rhsWords, &Quotient, nullptr);
1705 APInt APInt::sdiv(const APInt &RHS) const {
1707 if (RHS.isNegative())
1708 return (-(*this)).udiv(-RHS);
1709 return -((-(*this)).udiv(RHS));
1711 if (RHS.isNegative())
1712 return -(this->udiv(-RHS));
1713 return this->udiv(RHS);
1716 APInt APInt::urem(const APInt& RHS) const {
1717 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1718 if (isSingleWord()) {
1719 assert(RHS.VAL != 0 && "Remainder by zero?");
1720 return APInt(BitWidth, VAL % RHS.VAL);
1723 // Get some facts about the LHS
1724 unsigned lhsBits = getActiveBits();
1725 unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
1727 // Get some facts about the RHS
1728 unsigned rhsBits = RHS.getActiveBits();
1729 unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1730 assert(rhsWords && "Performing remainder operation by zero ???");
1732 // Check the degenerate cases
1733 if (lhsWords == 0) {
1735 return APInt(BitWidth, 0);
1736 } else if (lhsWords < rhsWords || this->ult(RHS)) {
1737 // X % Y ===> X, iff X < Y
1739 } else if (*this == RHS) {
1741 return APInt(BitWidth, 0);
1742 } else if (lhsWords == 1) {
1743 // All high words are zero, just use native remainder
1744 return APInt(BitWidth, pVal[0] % RHS.pVal[0]);
1747 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1748 APInt Remainder(1,0);
1749 divide(*this, lhsWords, RHS, rhsWords, nullptr, &Remainder);
1753 APInt APInt::srem(const APInt &RHS) const {
1755 if (RHS.isNegative())
1756 return -((-(*this)).urem(-RHS));
1757 return -((-(*this)).urem(RHS));
1759 if (RHS.isNegative())
1760 return this->urem(-RHS);
1761 return this->urem(RHS);
1764 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1765 APInt &Quotient, APInt &Remainder) {
1766 assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1768 // First, deal with the easy case
1769 if (LHS.isSingleWord()) {
1770 assert(RHS.VAL != 0 && "Divide by zero?");
1771 uint64_t QuotVal = LHS.VAL / RHS.VAL;
1772 uint64_t RemVal = LHS.VAL % RHS.VAL;
1773 Quotient = APInt(LHS.BitWidth, QuotVal);
1774 Remainder = APInt(LHS.BitWidth, RemVal);
1778 // Get some size facts about the dividend and divisor
1779 unsigned lhsBits = LHS.getActiveBits();
1780 unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1781 unsigned rhsBits = RHS.getActiveBits();
1782 unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1784 // Check the degenerate cases
1785 if (lhsWords == 0) {
1786 Quotient = 0; // 0 / Y ===> 0
1787 Remainder = 0; // 0 % Y ===> 0
1791 if (lhsWords < rhsWords || LHS.ult(RHS)) {
1792 Remainder = LHS; // X % Y ===> X, iff X < Y
1793 Quotient = 0; // X / Y ===> 0, iff X < Y
1798 Quotient = 1; // X / X ===> 1
1799 Remainder = 0; // X % X ===> 0;
1803 if (lhsWords == 1 && rhsWords == 1) {
1804 // There is only one word to consider so use the native versions.
1805 uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0];
1806 uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0];
1807 Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
1808 Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
1812 // Okay, lets do it the long way
1813 divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
1816 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1817 APInt &Quotient, APInt &Remainder) {
1818 if (LHS.isNegative()) {
1819 if (RHS.isNegative())
1820 APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1822 APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1823 Quotient = -Quotient;
1825 Remainder = -Remainder;
1826 } else if (RHS.isNegative()) {
1827 APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1828 Quotient = -Quotient;
1830 APInt::udivrem(LHS, RHS, Quotient, Remainder);
1834 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1835 APInt Res = *this+RHS;
1836 Overflow = isNonNegative() == RHS.isNonNegative() &&
1837 Res.isNonNegative() != isNonNegative();
1841 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1842 APInt Res = *this+RHS;
1843 Overflow = Res.ult(RHS);
1847 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1848 APInt Res = *this - RHS;
1849 Overflow = isNonNegative() != RHS.isNonNegative() &&
1850 Res.isNonNegative() != isNonNegative();
1854 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1855 APInt Res = *this-RHS;
1856 Overflow = Res.ugt(*this);
1860 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1861 // MININT/-1 --> overflow.
1862 Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1866 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1867 APInt Res = *this * RHS;
1869 if (*this != 0 && RHS != 0)
1870 Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1876 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1877 APInt Res = *this * RHS;
1879 if (*this != 0 && RHS != 0)
1880 Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS;
1886 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1887 Overflow = ShAmt.uge(getBitWidth());
1889 return APInt(BitWidth, 0);
1891 if (isNonNegative()) // Don't allow sign change.
1892 Overflow = ShAmt.uge(countLeadingZeros());
1894 Overflow = ShAmt.uge(countLeadingOnes());
1896 return *this << ShAmt;
1899 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
1900 Overflow = ShAmt.uge(getBitWidth());
1902 return APInt(BitWidth, 0);
1904 Overflow = ShAmt.ugt(countLeadingZeros());
1906 return *this << ShAmt;
1912 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
1913 // Check our assumptions here
1914 assert(!str.empty() && "Invalid string length");
1915 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
1917 "Radix should be 2, 8, 10, 16, or 36!");
1919 StringRef::iterator p = str.begin();
1920 size_t slen = str.size();
1921 bool isNeg = *p == '-';
1922 if (*p == '-' || *p == '+') {
1925 assert(slen && "String is only a sign, needs a value.");
1927 assert((slen <= numbits || radix != 2) && "Insufficient bit width");
1928 assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
1929 assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
1930 assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
1931 "Insufficient bit width");
1934 if (!isSingleWord())
1935 pVal = getClearedMemory(getNumWords());
1937 // Figure out if we can shift instead of multiply
1938 unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
1940 // Set up an APInt for the radix multiplier outside the loop so we don't
1941 // constantly construct/destruct it.
1942 APInt apradix(getBitWidth(), radix);
1944 // Enter digit traversal loop
1945 for (StringRef::iterator e = str.end(); p != e; ++p) {
1946 unsigned digit = getDigit(*p, radix);
1947 assert(digit < radix && "Invalid character in digit string");
1949 // Shift or multiply the value by the radix
1957 // Add in the digit we just interpreted
1960 // If its negative, put it in two's complement form
1963 this->flipAllBits();
1967 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
1968 bool Signed, bool formatAsCLiteral) const {
1969 assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
1971 "Radix should be 2, 8, 10, 16, or 36!");
1973 const char *Prefix = "";
1974 if (formatAsCLiteral) {
1977 // Binary literals are a non-standard extension added in gcc 4.3:
1978 // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
1990 llvm_unreachable("Invalid radix!");
1994 // First, check for a zero value and just short circuit the logic below.
1997 Str.push_back(*Prefix);
2004 static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2006 if (isSingleWord()) {
2008 char *BufPtr = Buffer+65;
2014 int64_t I = getSExtValue();
2024 Str.push_back(*Prefix);
2029 *--BufPtr = Digits[N % Radix];
2032 Str.append(BufPtr, Buffer+65);
2038 if (Signed && isNegative()) {
2039 // They want to print the signed version and it is a negative value
2040 // Flip the bits and add one to turn it into the equivalent positive
2041 // value and put a '-' in the result.
2048 Str.push_back(*Prefix);
2052 // We insert the digits backward, then reverse them to get the right order.
2053 unsigned StartDig = Str.size();
2055 // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2056 // because the number of bits per digit (1, 3 and 4 respectively) divides
2057 // equally. We just shift until the value is zero.
2058 if (Radix == 2 || Radix == 8 || Radix == 16) {
2059 // Just shift tmp right for each digit width until it becomes zero
2060 unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2061 unsigned MaskAmt = Radix - 1;
2064 unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2065 Str.push_back(Digits[Digit]);
2066 Tmp.lshrInPlace(ShiftAmt);
2069 APInt divisor(Radix == 10? 4 : 8, Radix);
2071 APInt APdigit(1, 0);
2072 APInt tmp2(Tmp.getBitWidth(), 0);
2073 divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
2075 unsigned Digit = (unsigned)APdigit.getZExtValue();
2076 assert(Digit < Radix && "divide failed");
2077 Str.push_back(Digits[Digit]);
2082 // Reverse the digits before returning.
2083 std::reverse(Str.begin()+StartDig, Str.end());
2086 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2087 /// It is better to pass in a SmallVector/SmallString to the methods above.
2088 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2090 toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2094 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2095 LLVM_DUMP_METHOD void APInt::dump() const {
2096 SmallString<40> S, U;
2097 this->toStringUnsigned(U);
2098 this->toStringSigned(S);
2099 dbgs() << "APInt(" << BitWidth << "b, "
2100 << U << "u " << S << "s)\n";
2104 void APInt::print(raw_ostream &OS, bool isSigned) const {
2106 this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2110 // This implements a variety of operations on a representation of
2111 // arbitrary precision, two's-complement, bignum integer values.
2113 // Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe
2114 // and unrestricting assumption.
2115 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2116 "Part width must be divisible by 2!");
2118 /* Some handy functions local to this file. */
2120 /* Returns the integer part with the least significant BITS set.
2121 BITS cannot be zero. */
2122 static inline APInt::WordType lowBitMask(unsigned bits) {
2123 assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2125 return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2128 /* Returns the value of the lower half of PART. */
2129 static inline APInt::WordType lowHalf(APInt::WordType part) {
2130 return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2133 /* Returns the value of the upper half of PART. */
2134 static inline APInt::WordType highHalf(APInt::WordType part) {
2135 return part >> (APInt::APINT_BITS_PER_WORD / 2);
2138 /* Returns the bit number of the most significant set bit of a part.
2139 If the input number has no bits set -1U is returned. */
2140 static unsigned partMSB(APInt::WordType value) {
2141 return findLastSet(value, ZB_Max);
2144 /* Returns the bit number of the least significant set bit of a
2145 part. If the input number has no bits set -1U is returned. */
2146 static unsigned partLSB(APInt::WordType value) {
2147 return findFirstSet(value, ZB_Max);
2150 /* Sets the least significant part of a bignum to the input value, and
2151 zeroes out higher parts. */
2152 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2156 for (unsigned i = 1; i < parts; i++)
2160 /* Assign one bignum to another. */
2161 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2162 for (unsigned i = 0; i < parts; i++)
2166 /* Returns true if a bignum is zero, false otherwise. */
2167 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2168 for (unsigned i = 0; i < parts; i++)
2175 /* Extract the given bit of a bignum; returns 0 or 1. */
2176 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2177 return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2180 /* Set the given bit of a bignum. */
2181 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2182 parts[whichWord(bit)] |= maskBit(bit);
2185 /* Clears the given bit of a bignum. */
2186 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2187 parts[whichWord(bit)] &= ~maskBit(bit);
2190 /* Returns the bit number of the least significant set bit of a
2191 number. If the input number has no bits set -1U is returned. */
2192 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2193 for (unsigned i = 0; i < n; i++) {
2194 if (parts[i] != 0) {
2195 unsigned lsb = partLSB(parts[i]);
2197 return lsb + i * APINT_BITS_PER_WORD;
2204 /* Returns the bit number of the most significant set bit of a number.
2205 If the input number has no bits set -1U is returned. */
2206 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2210 if (parts[n] != 0) {
2211 unsigned msb = partMSB(parts[n]);
2213 return msb + n * APINT_BITS_PER_WORD;
2220 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2221 srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2222 the least significant bit of DST. All high bits above srcBITS in
2223 DST are zero-filled. */
2225 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2226 unsigned srcBits, unsigned srcLSB) {
2227 unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2228 assert(dstParts <= dstCount);
2230 unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2231 tcAssign (dst, src + firstSrcPart, dstParts);
2233 unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2234 tcShiftRight (dst, dstParts, shift);
2236 /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2237 in DST. If this is less that srcBits, append the rest, else
2238 clear the high bits. */
2239 unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2241 WordType mask = lowBitMask (srcBits - n);
2242 dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2243 << n % APINT_BITS_PER_WORD);
2244 } else if (n > srcBits) {
2245 if (srcBits % APINT_BITS_PER_WORD)
2246 dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2249 /* Clear high parts. */
2250 while (dstParts < dstCount)
2251 dst[dstParts++] = 0;
2254 /* DST += RHS + C where C is zero or one. Returns the carry flag. */
2255 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2256 WordType c, unsigned parts) {
2259 for (unsigned i = 0; i < parts; i++) {
2260 WordType l = dst[i];
2262 dst[i] += rhs[i] + 1;
2273 /// This function adds a single "word" integer, src, to the multiple
2274 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2275 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2276 /// @returns the carry of the addition.
2277 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2279 for (unsigned i = 0; i < parts; ++i) {
2282 return 0; // No need to carry so exit early.
2283 src = 1; // Carry one to next digit.
2289 /* DST -= RHS + C where C is zero or one. Returns the carry flag. */
2290 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2291 WordType c, unsigned parts) {
2294 for (unsigned i = 0; i < parts; i++) {
2295 WordType l = dst[i];
2297 dst[i] -= rhs[i] + 1;
2308 /// This function subtracts a single "word" (64-bit word), src, from
2309 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2310 /// no further borrowing is needed or it runs out of "words" in dst. The result
2311 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2312 /// exhausted. In other words, if src > dst then this function returns 1,
2314 /// @returns the borrow out of the subtraction
2315 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2317 for (unsigned i = 0; i < parts; ++i) {
2318 WordType Dst = dst[i];
2321 return 0; // No need to borrow so exit early.
2322 src = 1; // We have to "borrow 1" from next "word"
2328 /* Negate a bignum in-place. */
2329 void APInt::tcNegate(WordType *dst, unsigned parts) {
2330 tcComplement(dst, parts);
2331 tcIncrement(dst, parts);
2334 /* DST += SRC * MULTIPLIER + CARRY if add is true
2335 DST = SRC * MULTIPLIER + CARRY if add is false
2337 Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC
2338 they must start at the same point, i.e. DST == SRC.
2340 If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2341 returned. Otherwise DST is filled with the least significant
2342 DSTPARTS parts of the result, and if all of the omitted higher
2343 parts were zero return zero, otherwise overflow occurred and
2345 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2346 WordType multiplier, WordType carry,
2347 unsigned srcParts, unsigned dstParts,
2349 /* Otherwise our writes of DST kill our later reads of SRC. */
2350 assert(dst <= src || dst >= src + srcParts);
2351 assert(dstParts <= srcParts + 1);
2353 /* N loops; minimum of dstParts and srcParts. */
2354 unsigned n = dstParts < srcParts ? dstParts: srcParts;
2357 for (i = 0; i < n; i++) {
2358 WordType low, mid, high, srcPart;
2360 /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2362 This cannot overflow, because
2364 (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2366 which is less than n^2. */
2370 if (multiplier == 0 || srcPart == 0) {
2374 low = lowHalf(srcPart) * lowHalf(multiplier);
2375 high = highHalf(srcPart) * highHalf(multiplier);
2377 mid = lowHalf(srcPart) * highHalf(multiplier);
2378 high += highHalf(mid);
2379 mid <<= APINT_BITS_PER_WORD / 2;
2380 if (low + mid < low)
2384 mid = highHalf(srcPart) * lowHalf(multiplier);
2385 high += highHalf(mid);
2386 mid <<= APINT_BITS_PER_WORD / 2;
2387 if (low + mid < low)
2391 /* Now add carry. */
2392 if (low + carry < low)
2398 /* And now DST[i], and store the new low part there. */
2399 if (low + dst[i] < low)
2409 /* Full multiplication, there is no overflow. */
2410 assert(i + 1 == dstParts);
2414 /* We overflowed if there is carry. */
2418 /* We would overflow if any significant unwritten parts would be
2419 non-zero. This is true if any remaining src parts are non-zero
2420 and the multiplier is non-zero. */
2422 for (; i < srcParts; i++)
2426 /* We fitted in the narrow destination. */
2431 /* DST = LHS * RHS, where DST has the same width as the operands and
2432 is filled with the least significant parts of the result. Returns
2433 one if overflow occurred, otherwise zero. DST must be disjoint
2434 from both operands. */
2435 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2436 const WordType *rhs, unsigned parts) {
2437 assert(dst != lhs && dst != rhs);
2440 tcSet(dst, 0, parts);
2442 for (unsigned i = 0; i < parts; i++)
2443 overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2449 /* DST = LHS * RHS, where DST has width the sum of the widths of the
2450 operands. No overflow occurs. DST must be disjoint from both
2451 operands. Returns the number of parts required to hold the
2453 unsigned APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2454 const WordType *rhs, unsigned lhsParts,
2455 unsigned rhsParts) {
2456 /* Put the narrower number on the LHS for less loops below. */
2457 if (lhsParts > rhsParts) {
2458 return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2460 assert(dst != lhs && dst != rhs);
2462 tcSet(dst, 0, rhsParts);
2464 for (unsigned i = 0; i < lhsParts; i++)
2465 tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2467 unsigned n = lhsParts + rhsParts;
2469 return n - (dst[n - 1] == 0);
2473 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2474 Otherwise set LHS to LHS / RHS with the fractional part discarded,
2475 set REMAINDER to the remainder, return zero. i.e.
2477 OLD_LHS = RHS * LHS + REMAINDER
2479 SCRATCH is a bignum of the same size as the operands and result for
2480 use by the routine; its contents need not be initialized and are
2481 destroyed. LHS, REMAINDER and SCRATCH must be distinct.
2483 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2484 WordType *remainder, WordType *srhs,
2486 assert(lhs != remainder && lhs != srhs && remainder != srhs);
2488 unsigned shiftCount = tcMSB(rhs, parts) + 1;
2489 if (shiftCount == 0)
2492 shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2493 unsigned n = shiftCount / APINT_BITS_PER_WORD;
2494 WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2496 tcAssign(srhs, rhs, parts);
2497 tcShiftLeft(srhs, parts, shiftCount);
2498 tcAssign(remainder, lhs, parts);
2499 tcSet(lhs, 0, parts);
2501 /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2506 compare = tcCompare(remainder, srhs, parts);
2508 tcSubtract(remainder, srhs, 0, parts);
2512 if (shiftCount == 0)
2515 tcShiftRight(srhs, parts, 1);
2516 if ((mask >>= 1) == 0) {
2517 mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2525 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2526 /// no restrictions on Count.
2527 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2528 // Don't bother performing a no-op shift.
2532 // WordShift is the inter-part shift; BitShift is the intra-part shift.
2533 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2534 unsigned BitShift = Count % APINT_BITS_PER_WORD;
2536 // Fastpath for moving by whole words.
2537 if (BitShift == 0) {
2538 std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2540 while (Words-- > WordShift) {
2541 Dst[Words] = Dst[Words - WordShift] << BitShift;
2542 if (Words > WordShift)
2544 Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2548 // Fill in the remainder with 0s.
2549 std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2552 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2553 /// are no restrictions on Count.
2554 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2555 // Don't bother performing a no-op shift.
2559 // WordShift is the inter-part shift; BitShift is the intra-part shift.
2560 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2561 unsigned BitShift = Count % APINT_BITS_PER_WORD;
2563 unsigned WordsToMove = Words - WordShift;
2564 // Fastpath for moving by whole words.
2565 if (BitShift == 0) {
2566 std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2568 for (unsigned i = 0; i != WordsToMove; ++i) {
2569 Dst[i] = Dst[i + WordShift] >> BitShift;
2570 if (i + 1 != WordsToMove)
2571 Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2575 // Fill in the remainder with 0s.
2576 std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2579 /* Bitwise and of two bignums. */
2580 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2581 for (unsigned i = 0; i < parts; i++)
2585 /* Bitwise inclusive or of two bignums. */
2586 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2587 for (unsigned i = 0; i < parts; i++)
2591 /* Bitwise exclusive or of two bignums. */
2592 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2593 for (unsigned i = 0; i < parts; i++)
2597 /* Complement a bignum in-place. */
2598 void APInt::tcComplement(WordType *dst, unsigned parts) {
2599 for (unsigned i = 0; i < parts; i++)
2603 /* Comparison (unsigned) of two bignums. */
2604 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2608 if (lhs[parts] != rhs[parts])
2609 return (lhs[parts] > rhs[parts]) ? 1 : -1;
2615 /* Set the least significant BITS bits of a bignum, clear the
2617 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2620 while (bits > APINT_BITS_PER_WORD) {
2621 dst[i++] = ~(WordType) 0;
2622 bits -= APINT_BITS_PER_WORD;
2626 dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);