lib/Support/APInt.cpp

   1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements a class to represent arbitrary precision integer
  11 // constant values and provide a variety of arithmetic operations on them.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "llvm/ADT/APInt.h"
  16 #include "llvm/ADT/ArrayRef.h"
  17 #include "llvm/ADT/FoldingSet.h"
  18 #include "llvm/ADT/Hashing.h"
  19 #include "llvm/ADT/SmallString.h"
  20 #include "llvm/ADT/StringRef.h"
  21 #include "llvm/Support/Debug.h"
  22 #include "llvm/Support/ErrorHandling.h"
  23 #include "llvm/Support/MathExtras.h"
  24 #include "llvm/Support/raw_ostream.h"
  25 #include <climits>
  26 #include <cmath>
  27 #include <cstdlib>
  28 #include <cstring>
  29 using namespace llvm;
  30
  31 #define DEBUG_TYPE "apint"
  32
  33 /// A utility function for allocating memory, checking for allocation failures,
  34 /// and ensuring the contents are zeroed.
  35 inline static uint64_t* getClearedMemory(unsigned numWords) {
  36   uint64_t * result = new uint64_t[numWords];
  37   assert(result && "APInt memory allocation fails!");
  38   memset(result, 0, numWords * sizeof(uint64_t));
  39   return result;
  40 }
  41
  42 /// A utility function for allocating memory and checking for allocation
  43 /// failure.  The content is not zeroed.
  44 inline static uint64_t* getMemory(unsigned numWords) {
  45   uint64_t * result = new uint64_t[numWords];
  46   assert(result && "APInt memory allocation fails!");
  47   return result;
  48 }
  49
  50 /// A utility function that converts a character to a digit.
  51 inline static unsigned getDigit(char cdigit, uint8_t radix) {
  52   unsigned r;
  53
  54   if (radix == 16 || radix == 36) {
  55     r = cdigit - '0';
  56     if (r <= 9)
  57       return r;
  58
  59     r = cdigit - 'A';
  60     if (r <= radix - 11U)
  61       return r + 10;
  62
  63     r = cdigit - 'a';
  64     if (r <= radix - 11U)
  65       return r + 10;
  66
  67     radix = 10;
  68   }
  69
  70   r = cdigit - '0';
  71   if (r < radix)
  72     return r;
  73
  74   return -1U;
  75 }
  76
  77
  78 void APInt::initSlowCase(uint64_t val, bool isSigned) {
  79   U.pVal = getClearedMemory(getNumWords());
  80   U.pVal[0] = val;
  81   if (isSigned && int64_t(val) < 0)
  82     for (unsigned i = 1; i < getNumWords(); ++i)
  83       U.pVal[i] = WORD_MAX;
  84   clearUnusedBits();
  85 }
  86
  87 void APInt::initSlowCase(const APInt& that) {
  88   U.pVal = getMemory(getNumWords());
  89   memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
  90 }
  91
  92 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
  93   assert(BitWidth && "Bitwidth too small");
  94   assert(bigVal.data() && "Null pointer detected!");
  95   if (isSingleWord())
  96     U.VAL = bigVal[0];
  97   else {
  98     // Get memory, cleared to 0
  99     U.pVal = getClearedMemory(getNumWords());
 100     // Calculate the number of words to copy
 101     unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
 102     // Copy the words from bigVal to pVal
 103     memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
 104   }
 105   // Make sure unused high bits are cleared
 106   clearUnusedBits();
 107 }
 108
 109 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
 110   : BitWidth(numBits) {
 111   initFromArray(bigVal);
 112 }
 113
 114 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
 115   : BitWidth(numBits) {
 116   initFromArray(makeArrayRef(bigVal, numWords));
 117 }
 118
 119 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
 120   : BitWidth(numbits) {
 121   assert(BitWidth && "Bitwidth too small");
 122   fromString(numbits, Str, radix);
 123 }
 124
 125 void APInt::reallocate(unsigned NewBitWidth) {
 126   // If the number of words is the same we can just change the width and stop.
 127   if (getNumWords() == getNumWords(NewBitWidth)) {
 128     BitWidth = NewBitWidth;
 129     return;
 130   }
 131
 132   // If we have an allocation, delete it.
 133   if (!isSingleWord())
 134     delete [] U.pVal;
 135
 136   // Update BitWidth.
 137   BitWidth = NewBitWidth;
 138
 139   // If we are supposed to have an allocation, create it.
 140   if (!isSingleWord())
 141     U.pVal = getMemory(getNumWords());
 142 }
 143
 144 void APInt::AssignSlowCase(const APInt& RHS) {
 145   // Don't do anything for X = X
 146   if (this == &RHS)
 147     return;
 148
 149   // Adjust the bit width and handle allocations as necessary.
 150   reallocate(RHS.getBitWidth());
 151
 152   // Copy the data.
 153   if (isSingleWord())
 154     U.VAL = RHS.U.VAL;
 155   else
 156     memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
 157 }
 158
 159 /// This method 'profiles' an APInt for use with FoldingSet.
 160 void APInt::Profile(FoldingSetNodeID& ID) const {
 161   ID.AddInteger(BitWidth);
 162
 163   if (isSingleWord()) {
 164     ID.AddInteger(U.VAL);
 165     return;
 166   }
 167
 168   unsigned NumWords = getNumWords();
 169   for (unsigned i = 0; i < NumWords; ++i)
 170     ID.AddInteger(U.pVal[i]);
 171 }
 172
 173 /// @brief Prefix increment operator. Increments the APInt by one.
 174 APInt& APInt::operator++() {
 175   if (isSingleWord())
 176     ++U.VAL;
 177   else
 178     tcIncrement(U.pVal, getNumWords());
 179   return clearUnusedBits();
 180 }
 181
 182 /// @brief Prefix decrement operator. Decrements the APInt by one.
 183 APInt& APInt::operator--() {
 184   if (isSingleWord())
 185     --U.VAL;
 186   else
 187     tcDecrement(U.pVal, getNumWords());
 188   return clearUnusedBits();
 189 }
 190
 191 /// Adds the RHS APint to this APInt.
 192 /// @returns this, after addition of RHS.
 193 /// @brief Addition assignment operator.
 194 APInt& APInt::operator+=(const APInt& RHS) {
 195   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 196   if (isSingleWord())
 197     U.VAL += RHS.U.VAL;
 198   else
 199     tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
 200   return clearUnusedBits();
 201 }
 202
 203 APInt& APInt::operator+=(uint64_t RHS) {
 204   if (isSingleWord())
 205     U.VAL += RHS;
 206   else
 207     tcAddPart(U.pVal, RHS, getNumWords());
 208   return clearUnusedBits();
 209 }
 210
 211 /// Subtracts the RHS APInt from this APInt
 212 /// @returns this, after subtraction
 213 /// @brief Subtraction assignment operator.
 214 APInt& APInt::operator-=(const APInt& RHS) {
 215   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 216   if (isSingleWord())
 217     U.VAL -= RHS.U.VAL;
 218   else
 219     tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
 220   return clearUnusedBits();
 221 }
 222
 223 APInt& APInt::operator-=(uint64_t RHS) {
 224   if (isSingleWord())
 225     U.VAL -= RHS;
 226   else
 227     tcSubtractPart(U.pVal, RHS, getNumWords());
 228   return clearUnusedBits();
 229 }
 230
 231 APInt APInt::operator*(const APInt& RHS) const {
 232   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 233   if (isSingleWord())
 234     return APInt(BitWidth, U.VAL * RHS.U.VAL);
 235
 236   APInt Result(getMemory(getNumWords()), getBitWidth());
 237
 238   tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
 239
 240   Result.clearUnusedBits();
 241   return Result;
 242 }
 243
 244 void APInt::AndAssignSlowCase(const APInt& RHS) {
 245   tcAnd(U.pVal, RHS.U.pVal, getNumWords());
 246 }
 247
 248 void APInt::OrAssignSlowCase(const APInt& RHS) {
 249   tcOr(U.pVal, RHS.U.pVal, getNumWords());
 250 }
 251
 252 void APInt::XorAssignSlowCase(const APInt& RHS) {
 253   tcXor(U.pVal, RHS.U.pVal, getNumWords());
 254 }
 255
 256 APInt& APInt::operator*=(const APInt& RHS) {
 257   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 258   *this = *this * RHS;
 259   return *this;
 260 }
 261
 262 APInt& APInt::operator*=(uint64_t RHS) {
 263   if (isSingleWord()) {
 264     U.VAL *= RHS;
 265   } else {
 266     unsigned NumWords = getNumWords();
 267     tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
 268   }
 269   return clearUnusedBits();
 270 }
 271
 272 bool APInt::EqualSlowCase(const APInt& RHS) const {
 273   return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
 274 }
 275
 276 int APInt::compare(const APInt& RHS) const {
 277   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
 278   if (isSingleWord())
 279     return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
 280
 281   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
 282 }
 283
 284 int APInt::compareSigned(const APInt& RHS) const {
 285   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
 286   if (isSingleWord()) {
 287     int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
 288     int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
 289     return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
 290   }
 291
 292   bool lhsNeg = isNegative();
 293   bool rhsNeg = RHS.isNegative();
 294
 295   // If the sign bits don't match, then (LHS < RHS) if LHS is negative
 296   if (lhsNeg != rhsNeg)
 297     return lhsNeg ? -1 : 1;
 298
 299   // Otherwise we can just use an unsigned comparison, because even negative
 300   // numbers compare correctly this way if both have the same signed-ness.
 301   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
 302 }
 303
 304 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
 305   unsigned loWord = whichWord(loBit);
 306   unsigned hiWord = whichWord(hiBit);
 307
 308   // Create an initial mask for the low word with zeros below loBit.
 309   uint64_t loMask = WORD_MAX << whichBit(loBit);
 310
 311   // If hiBit is not aligned, we need a high mask.
 312   unsigned hiShiftAmt = whichBit(hiBit);
 313   if (hiShiftAmt != 0) {
 314     // Create a high mask with zeros above hiBit.
 315     uint64_t hiMask = WORD_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
 316     // If loWord and hiWord are equal, then we combine the masks. Otherwise,
 317     // set the bits in hiWord.
 318     if (hiWord == loWord)
 319       loMask &= hiMask;
 320     else
 321       U.pVal[hiWord] |= hiMask;
 322   }
 323   // Apply the mask to the low word.
 324   U.pVal[loWord] |= loMask;
 325
 326   // Fill any words between loWord and hiWord with all ones.
 327   for (unsigned word = loWord + 1; word < hiWord; ++word)
 328     U.pVal[word] = WORD_MAX;
 329 }
 330
 331 /// @brief Toggle every bit to its opposite value.
 332 void APInt::flipAllBitsSlowCase() {
 333   tcComplement(U.pVal, getNumWords());
 334   clearUnusedBits();
 335 }
 336
 337 /// Toggle a given bit to its opposite value whose position is given
 338 /// as "bitPosition".
 339 /// @brief Toggles a given bit to its opposite value.
 340 void APInt::flipBit(unsigned bitPosition) {
 341   assert(bitPosition < BitWidth && "Out of the bit-width range!");
 342   if ((*this)[bitPosition]) clearBit(bitPosition);
 343   else setBit(bitPosition);
 344 }
 345
 346 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
 347   unsigned subBitWidth = subBits.getBitWidth();
 348   assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
 349          "Illegal bit insertion");
 350
 351   // Insertion is a direct copy.
 352   if (subBitWidth == BitWidth) {
 353     *this = subBits;
 354     return;
 355   }
 356
 357   // Single word result can be done as a direct bitmask.
 358   if (isSingleWord()) {
 359     uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
 360     U.VAL &= ~(mask << bitPosition);
 361     U.VAL |= (subBits.U.VAL << bitPosition);
 362     return;
 363   }
 364
 365   unsigned loBit = whichBit(bitPosition);
 366   unsigned loWord = whichWord(bitPosition);
 367   unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
 368
 369   // Insertion within a single word can be done as a direct bitmask.
 370   if (loWord == hi1Word) {
 371     uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
 372     U.pVal[loWord] &= ~(mask << loBit);
 373     U.pVal[loWord] |= (subBits.U.VAL << loBit);
 374     return;
 375   }
 376
 377   // Insert on word boundaries.
 378   if (loBit == 0) {
 379     // Direct copy whole words.
 380     unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
 381     memcpy(U.pVal + loWord, subBits.getRawData(),
 382            numWholeSubWords * APINT_WORD_SIZE);
 383
 384     // Mask+insert remaining bits.
 385     unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
 386     if (remainingBits != 0) {
 387       uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - remainingBits);
 388       U.pVal[hi1Word] &= ~mask;
 389       U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
 390     }
 391     return;
 392   }
 393
 394   // General case - set/clear individual bits in dst based on src.
 395   // TODO - there is scope for optimization here, but at the moment this code
 396   // path is barely used so prefer readability over performance.
 397   for (unsigned i = 0; i != subBitWidth; ++i) {
 398     if (subBits[i])
 399       setBit(bitPosition + i);
 400     else
 401       clearBit(bitPosition + i);
 402   }
 403 }
 404
 405 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
 406   assert(numBits > 0 && "Can't extract zero bits");
 407   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
 408          "Illegal bit extraction");
 409
 410   if (isSingleWord())
 411     return APInt(numBits, U.VAL >> bitPosition);
 412
 413   unsigned loBit = whichBit(bitPosition);
 414   unsigned loWord = whichWord(bitPosition);
 415   unsigned hiWord = whichWord(bitPosition + numBits - 1);
 416
 417   // Single word result extracting bits from a single word source.
 418   if (loWord == hiWord)
 419     return APInt(numBits, U.pVal[loWord] >> loBit);
 420
 421   // Extracting bits that start on a source word boundary can be done
 422   // as a fast memory copy.
 423   if (loBit == 0)
 424     return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
 425
 426   // General case - shift + copy source words directly into place.
 427   APInt Result(numBits, 0);
 428   unsigned NumSrcWords = getNumWords();
 429   unsigned NumDstWords = Result.getNumWords();
 430
 431   for (unsigned word = 0; word < NumDstWords; ++word) {
 432     uint64_t w0 = U.pVal[loWord + word];
 433     uint64_t w1 =
 434         (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
 435     Result.U.pVal[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
 436   }
 437
 438   return Result.clearUnusedBits();
 439 }
 440
 441 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
 442   assert(!str.empty() && "Invalid string length");
 443   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
 444           radix == 36) &&
 445          "Radix should be 2, 8, 10, 16, or 36!");
 446
 447   size_t slen = str.size();
 448
 449   // Each computation below needs to know if it's negative.
 450   StringRef::iterator p = str.begin();
 451   unsigned isNegative = *p == '-';
 452   if (*p == '-' || *p == '+') {
 453     p++;
 454     slen--;
 455     assert(slen && "String is only a sign, needs a value.");
 456   }
 457
 458   // For radixes of power-of-two values, the bits required is accurately and
 459   // easily computed
 460   if (radix == 2)
 461     return slen + isNegative;
 462   if (radix == 8)
 463     return slen * 3 + isNegative;
 464   if (radix == 16)
 465     return slen * 4 + isNegative;
 466
 467   // FIXME: base 36
 468
 469   // This is grossly inefficient but accurate. We could probably do something
 470   // with a computation of roughly slen*64/20 and then adjust by the value of
 471   // the first few digits. But, I'm not sure how accurate that could be.
 472
 473   // Compute a sufficient number of bits that is always large enough but might
 474   // be too large. This avoids the assertion in the constructor. This
 475   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
 476   // bits in that case.
 477   unsigned sufficient
 478     = radix == 10? (slen == 1 ? 4 : slen * 64/18)
 479                  : (slen == 1 ? 7 : slen * 16/3);
 480
 481   // Convert to the actual binary value.
 482   APInt tmp(sufficient, StringRef(p, slen), radix);
 483
 484   // Compute how many bits are required. If the log is infinite, assume we need
 485   // just bit.
 486   unsigned log = tmp.logBase2();
 487   if (log == (unsigned)-1) {
 488     return isNegative + 1;
 489   } else {
 490     return isNegative + log + 1;
 491   }
 492 }
 493
 494 hash_code llvm::hash_value(const APInt &Arg) {
 495   if (Arg.isSingleWord())
 496     return hash_combine(Arg.U.VAL);
 497
 498   return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords());
 499 }
 500
 501 bool APInt::isSplat(unsigned SplatSizeInBits) const {
 502   assert(getBitWidth() % SplatSizeInBits == 0 &&
 503          "SplatSizeInBits must divide width!");
 504   // We can check that all parts of an integer are equal by making use of a
 505   // little trick: rotate and check if it's still the same value.
 506   return *this == rotl(SplatSizeInBits);
 507 }
 508
 509 /// This function returns the high "numBits" bits of this APInt.
 510 APInt APInt::getHiBits(unsigned numBits) const {
 511   return this->lshr(BitWidth - numBits);
 512 }
 513
 514 /// This function returns the low "numBits" bits of this APInt.
 515 APInt APInt::getLoBits(unsigned numBits) const {
 516   APInt Result(getLowBitsSet(BitWidth, numBits));
 517   Result &= *this;
 518   return Result;
 519 }
 520
 521 /// Return a value containing V broadcasted over NewLen bits.
 522 APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
 523   assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
 524
 525   APInt Val = V.zextOrSelf(NewLen);
 526   for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
 527     Val |= Val << I;
 528
 529   return Val;
 530 }
 531
 532 unsigned APInt::countLeadingZerosSlowCase() const {
 533   unsigned Count = 0;
 534   for (int i = getNumWords()-1; i >= 0; --i) {
 535     uint64_t V = U.pVal[i];
 536     if (V == 0)
 537       Count += APINT_BITS_PER_WORD;
 538     else {
 539       Count += llvm::countLeadingZeros(V);
 540       break;
 541     }
 542   }
 543   // Adjust for unused bits in the most significant word (they are zero).
 544   unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
 545   Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
 546   return Count;
 547 }
 548
 549 unsigned APInt::countLeadingOnesSlowCase() const {
 550   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
 551   unsigned shift;
 552   if (!highWordBits) {
 553     highWordBits = APINT_BITS_PER_WORD;
 554     shift = 0;
 555   } else {
 556     shift = APINT_BITS_PER_WORD - highWordBits;
 557   }
 558   int i = getNumWords() - 1;
 559   unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
 560   if (Count == highWordBits) {
 561     for (i--; i >= 0; --i) {
 562       if (U.pVal[i] == WORD_MAX)
 563         Count += APINT_BITS_PER_WORD;
 564       else {
 565         Count += llvm::countLeadingOnes(U.pVal[i]);
 566         break;
 567       }
 568     }
 569   }
 570   return Count;
 571 }
 572
 573 unsigned APInt::countTrailingZerosSlowCase() const {
 574   unsigned Count = 0;
 575   unsigned i = 0;
 576   for (; i < getNumWords() && U.pVal[i] == 0; ++i)
 577     Count += APINT_BITS_PER_WORD;
 578   if (i < getNumWords())
 579     Count += llvm::countTrailingZeros(U.pVal[i]);
 580   return std::min(Count, BitWidth);
 581 }
 582
 583 unsigned APInt::countTrailingOnesSlowCase() const {
 584   unsigned Count = 0;
 585   unsigned i = 0;
 586   for (; i < getNumWords() && U.pVal[i] == WORD_MAX; ++i)
 587     Count += APINT_BITS_PER_WORD;
 588   if (i < getNumWords())
 589     Count += llvm::countTrailingOnes(U.pVal[i]);
 590   assert(Count <= BitWidth);
 591   return Count;
 592 }
 593
 594 unsigned APInt::countPopulationSlowCase() const {
 595   unsigned Count = 0;
 596   for (unsigned i = 0; i < getNumWords(); ++i)
 597     Count += llvm::countPopulation(U.pVal[i]);
 598   return Count;
 599 }
 600
 601 bool APInt::intersectsSlowCase(const APInt &RHS) const {
 602   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
 603     if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
 604       return true;
 605
 606   return false;
 607 }
 608
 609 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
 610   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
 611     if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
 612       return false;
 613
 614   return true;
 615 }
 616
 617 APInt APInt::byteSwap() const {
 618   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
 619   if (BitWidth == 16)
 620     return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
 621   if (BitWidth == 32)
 622     return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
 623   if (BitWidth == 48) {
 624     unsigned Tmp1 = unsigned(U.VAL >> 16);
 625     Tmp1 = ByteSwap_32(Tmp1);
 626     uint16_t Tmp2 = uint16_t(U.VAL);
 627     Tmp2 = ByteSwap_16(Tmp2);
 628     return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
 629   }
 630   if (BitWidth == 64)
 631     return APInt(BitWidth, ByteSwap_64(U.VAL));
 632
 633   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
 634   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
 635     Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
 636   if (Result.BitWidth != BitWidth) {
 637     Result.lshrInPlace(Result.BitWidth - BitWidth);
 638     Result.BitWidth = BitWidth;
 639   }
 640   return Result;
 641 }
 642
 643 APInt APInt::reverseBits() const {
 644   switch (BitWidth) {
 645   case 64:
 646     return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
 647   case 32:
 648     return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
 649   case 16:
 650     return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
 651   case 8:
 652     return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
 653   default:
 654     break;
 655   }
 656
 657   APInt Val(*this);
 658   APInt Reversed(BitWidth, 0);
 659   unsigned S = BitWidth;
 660
 661   for (; Val != 0; Val.lshrInPlace(1)) {
 662     Reversed <<= 1;
 663     Reversed |= Val[0];
 664     --S;
 665   }
 666
 667   Reversed <<= S;
 668   return Reversed;
 669 }
 670
 671 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
 672   // Fast-path a common case.
 673   if (A == B) return A;
 674
 675   // Corner cases: if either operand is zero, the other is the gcd.
 676   if (!A) return B;
 677   if (!B) return A;
 678
 679   // Count common powers of 2 and remove all other powers of 2.
 680   unsigned Pow2;
 681   {
 682     unsigned Pow2_A = A.countTrailingZeros();
 683     unsigned Pow2_B = B.countTrailingZeros();
 684     if (Pow2_A > Pow2_B) {
 685       A.lshrInPlace(Pow2_A - Pow2_B);
 686       Pow2 = Pow2_B;
 687     } else if (Pow2_B > Pow2_A) {
 688       B.lshrInPlace(Pow2_B - Pow2_A);
 689       Pow2 = Pow2_A;
 690     } else {
 691       Pow2 = Pow2_A;
 692     }
 693   }
 694
 695   // Both operands are odd multiples of 2^Pow_2:
 696   //
 697   //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
 698   //
 699   // This is a modified version of Stein's algorithm, taking advantage of
 700   // efficient countTrailingZeros().
 701   while (A != B) {
 702     if (A.ugt(B)) {
 703       A -= B;
 704       A.lshrInPlace(A.countTrailingZeros() - Pow2);
 705     } else {
 706       B -= A;
 707       B.lshrInPlace(B.countTrailingZeros() - Pow2);
 708     }
 709   }
 710
 711   return A;
 712 }
 713
 714 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
 715   union {
 716     double D;
 717     uint64_t I;
 718   } T;
 719   T.D = Double;
 720
 721   // Get the sign bit from the highest order bit
 722   bool isNeg = T.I >> 63;
 723
 724   // Get the 11-bit exponent and adjust for the 1023 bit bias
 725   int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
 726
 727   // If the exponent is negative, the value is < 0 so just return 0.
 728   if (exp < 0)
 729     return APInt(width, 0u);
 730
 731   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
 732   uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
 733
 734   // If the exponent doesn't shift all bits out of the mantissa
 735   if (exp < 52)
 736     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
 737                     APInt(width, mantissa >> (52 - exp));
 738
 739   // If the client didn't provide enough bits for us to shift the mantissa into
 740   // then the result is undefined, just return 0
 741   if (width <= exp - 52)
 742     return APInt(width, 0);
 743
 744   // Otherwise, we have to shift the mantissa bits up to the right location
 745   APInt Tmp(width, mantissa);
 746   Tmp <<= (unsigned)exp - 52;
 747   return isNeg ? -Tmp : Tmp;
 748 }
 749
 750 /// This function converts this APInt to a double.
 751 /// The layout for double is as following (IEEE Standard 754):
 752 ///  --------------------------------------
 753 /// |  Sign    Exponent    Fraction    Bias |
 754 /// |-------------------------------------- |
 755 /// |  1[63]   11[62-52]   52[51-00]   1023 |
 756 ///  --------------------------------------
 757 double APInt::roundToDouble(bool isSigned) const {
 758
 759   // Handle the simple case where the value is contained in one uint64_t.
 760   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
 761   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
 762     if (isSigned) {
 763       int64_t sext = SignExtend64(getWord(0), BitWidth);
 764       return double(sext);
 765     } else
 766       return double(getWord(0));
 767   }
 768
 769   // Determine if the value is negative.
 770   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
 771
 772   // Construct the absolute value if we're negative.
 773   APInt Tmp(isNeg ? -(*this) : (*this));
 774
 775   // Figure out how many bits we're using.
 776   unsigned n = Tmp.getActiveBits();
 777
 778   // The exponent (without bias normalization) is just the number of bits
 779   // we are using. Note that the sign bit is gone since we constructed the
 780   // absolute value.
 781   uint64_t exp = n;
 782
 783   // Return infinity for exponent overflow
 784   if (exp > 1023) {
 785     if (!isSigned || !isNeg)
 786       return std::numeric_limits<double>::infinity();
 787     else
 788       return -std::numeric_limits<double>::infinity();
 789   }
 790   exp += 1023; // Increment for 1023 bias
 791
 792   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
 793   // extract the high 52 bits from the correct words in pVal.
 794   uint64_t mantissa;
 795   unsigned hiWord = whichWord(n-1);
 796   if (hiWord == 0) {
 797     mantissa = Tmp.U.pVal[0];
 798     if (n > 52)
 799       mantissa >>= n - 52; // shift down, we want the top 52 bits.
 800   } else {
 801     assert(hiWord > 0 && "huh?");
 802     uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
 803     uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
 804     mantissa = hibits | lobits;
 805   }
 806
 807   // The leading bit of mantissa is implicit, so get rid of it.
 808   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
 809   union {
 810     double D;
 811     uint64_t I;
 812   } T;
 813   T.I = sign | (exp << 52) | mantissa;
 814   return T.D;
 815 }
 816
 817 // Truncate to new width.
 818 APInt APInt::trunc(unsigned width) const {
 819   assert(width < BitWidth && "Invalid APInt Truncate request");
 820   assert(width && "Can't truncate to 0 bits");
 821
 822   if (width <= APINT_BITS_PER_WORD)
 823     return APInt(width, getRawData()[0]);
 824
 825   APInt Result(getMemory(getNumWords(width)), width);
 826
 827   // Copy full words.
 828   unsigned i;
 829   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
 830     Result.U.pVal[i] = U.pVal[i];
 831
 832   // Truncate and copy any partial word.
 833   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
 834   if (bits != 0)
 835     Result.U.pVal[i] = U.pVal[i] << bits >> bits;
 836
 837   return Result;
 838 }
 839
 840 // Sign extend to a new width.
 841 APInt APInt::sext(unsigned Width) const {
 842   assert(Width > BitWidth && "Invalid APInt SignExtend request");
 843
 844   if (Width <= APINT_BITS_PER_WORD)
 845     return APInt(Width, SignExtend64(U.VAL, BitWidth));
 846
 847   APInt Result(getMemory(getNumWords(Width)), Width);
 848
 849   // Copy words.
 850   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
 851
 852   // Sign extend the last word since there may be unused bits in the input.
 853   Result.U.pVal[getNumWords() - 1] =
 854       SignExtend64(Result.U.pVal[getNumWords() - 1],
 855                    ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
 856
 857   // Fill with sign bits.
 858   std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
 859               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
 860   Result.clearUnusedBits();
 861   return Result;
 862 }
 863
 864 //  Zero extend to a new width.
 865 APInt APInt::zext(unsigned width) const {
 866   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
 867
 868   if (width <= APINT_BITS_PER_WORD)
 869     return APInt(width, U.VAL);
 870
 871   APInt Result(getMemory(getNumWords(width)), width);
 872
 873   // Copy words.
 874   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
 875
 876   // Zero remaining words.
 877   std::memset(Result.U.pVal + getNumWords(), 0,
 878               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
 879
 880   return Result;
 881 }
 882
 883 APInt APInt::zextOrTrunc(unsigned width) const {
 884   if (BitWidth < width)
 885     return zext(width);
 886   if (BitWidth > width)
 887     return trunc(width);
 888   return *this;
 889 }
 890
 891 APInt APInt::sextOrTrunc(unsigned width) const {
 892   if (BitWidth < width)
 893     return sext(width);
 894   if (BitWidth > width)
 895     return trunc(width);
 896   return *this;
 897 }
 898
 899 APInt APInt::zextOrSelf(unsigned width) const {
 900   if (BitWidth < width)
 901     return zext(width);
 902   return *this;
 903 }
 904
 905 APInt APInt::sextOrSelf(unsigned width) const {
 906   if (BitWidth < width)
 907     return sext(width);
 908   return *this;
 909 }
 910
 911 /// Arithmetic right-shift this APInt by shiftAmt.
 912 /// @brief Arithmetic right-shift function.
 913 void APInt::ashrInPlace(const APInt &shiftAmt) {
 914   ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
 915 }
 916
 917 /// Arithmetic right-shift this APInt by shiftAmt.
 918 /// @brief Arithmetic right-shift function.
 919 void APInt::ashrSlowCase(unsigned ShiftAmt) {
 920   // Don't bother performing a no-op shift.
 921   if (!ShiftAmt)
 922     return;
 923
 924   // Save the original sign bit for later.
 925   bool Negative = isNegative();
 926
 927   // WordShift is the inter-part shift; BitShift is is intra-part shift.
 928   unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
 929   unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
 930
 931   unsigned WordsToMove = getNumWords() - WordShift;
 932   if (WordsToMove != 0) {
 933     // Sign extend the last word to fill in the unused bits.
 934     U.pVal[getNumWords() - 1] = SignExtend64(
 935         U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
 936
 937     // Fastpath for moving by whole words.
 938     if (BitShift == 0) {
 939       std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
 940     } else {
 941       // Move the words containing significant bits.
 942       for (unsigned i = 0; i != WordsToMove - 1; ++i)
 943         U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
 944                     (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
 945
 946       // Handle the last word which has no high bits to copy.
 947       U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
 948       // Sign extend one more time.
 949       U.pVal[WordsToMove - 1] =
 950           SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
 951     }
 952   }
 953
 954   // Fill in the remainder based on the original sign.
 955   std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
 956               WordShift * APINT_WORD_SIZE);
 957   clearUnusedBits();
 958 }
 959
 960 /// Logical right-shift this APInt by shiftAmt.
 961 /// @brief Logical right-shift function.
 962 void APInt::lshrInPlace(const APInt &shiftAmt) {
 963   lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
 964 }
 965
 966 /// Logical right-shift this APInt by shiftAmt.
 967 /// @brief Logical right-shift function.
 968 void APInt::lshrSlowCase(unsigned ShiftAmt) {
 969   tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
 970 }
 971
 972 /// Left-shift this APInt by shiftAmt.
 973 /// @brief Left-shift function.
 974 APInt &APInt::operator<<=(const APInt &shiftAmt) {
 975   // It's undefined behavior in C to shift by BitWidth or greater.
 976   *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
 977   return *this;
 978 }
 979
 980 void APInt::shlSlowCase(unsigned ShiftAmt) {
 981   tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
 982   clearUnusedBits();
 983 }
 984
 985 // Calculate the rotate amount modulo the bit width.
 986 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
 987   unsigned rotBitWidth = rotateAmt.getBitWidth();
 988   APInt rot = rotateAmt;
 989   if (rotBitWidth < BitWidth) {
 990     // Extend the rotate APInt, so that the urem doesn't divide by 0.
 991     // e.g. APInt(1, 32) would give APInt(1, 0).
 992     rot = rotateAmt.zext(BitWidth);
 993   }
 994   rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
 995   return rot.getLimitedValue(BitWidth);
 996 }
 997
 998 APInt APInt::rotl(const APInt &rotateAmt) const {
 999   return rotl(rotateModulo(BitWidth, rotateAmt));
1000 }
1001
1002 APInt APInt::rotl(unsigned rotateAmt) const {
1003   rotateAmt %= BitWidth;
1004   if (rotateAmt == 0)
1005     return *this;
1006   return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1007 }
1008
1009 APInt APInt::rotr(const APInt &rotateAmt) const {
1010   return rotr(rotateModulo(BitWidth, rotateAmt));
1011 }
1012
1013 APInt APInt::rotr(unsigned rotateAmt) const {
1014   rotateAmt %= BitWidth;
1015   if (rotateAmt == 0)
1016     return *this;
1017   return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1018 }
1019
1020 // Square Root - this method computes and returns the square root of "this".
1021 // Three mechanisms are used for computation. For small values (<= 5 bits),
1022 // a table lookup is done. This gets some performance for common cases. For
1023 // values using less than 52 bits, the value is converted to double and then
1024 // the libc sqrt function is called. The result is rounded and then converted
1025 // back to a uint64_t which is then used to construct the result. Finally,
1026 // the Babylonian method for computing square roots is used.
1027 APInt APInt::sqrt() const {
1028
1029   // Determine the magnitude of the value.
1030   unsigned magnitude = getActiveBits();
1031
1032   // Use a fast table for some small values. This also gets rid of some
1033   // rounding errors in libc sqrt for small values.
1034   if (magnitude <= 5) {
1035     static const uint8_t results[32] = {
1036       /*     0 */ 0,
1037       /*  1- 2 */ 1, 1,
1038       /*  3- 6 */ 2, 2, 2, 2,
1039       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1040       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1041       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1042       /*    31 */ 6
1043     };
1044     return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1045   }
1046
1047   // If the magnitude of the value fits in less than 52 bits (the precision of
1048   // an IEEE double precision floating point value), then we can use the
1049   // libc sqrt function which will probably use a hardware sqrt computation.
1050   // This should be faster than the algorithm below.
1051   if (magnitude < 52) {
1052     return APInt(BitWidth,
1053                  uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1054                                                                : U.pVal[0])))));
1055   }
1056
1057   // Okay, all the short cuts are exhausted. We must compute it. The following
1058   // is a classical Babylonian method for computing the square root. This code
1059   // was adapted to APInt from a wikipedia article on such computations.
1060   // See http://www.wikipedia.org/ and go to the page named
1061   // Calculate_an_integer_square_root.
1062   unsigned nbits = BitWidth, i = 4;
1063   APInt testy(BitWidth, 16);
1064   APInt x_old(BitWidth, 1);
1065   APInt x_new(BitWidth, 0);
1066   APInt two(BitWidth, 2);
1067
1068   // Select a good starting value using binary logarithms.
1069   for (;; i += 2, testy = testy.shl(2))
1070     if (i >= nbits || this->ule(testy)) {
1071       x_old = x_old.shl(i / 2);
1072       break;
1073     }
1074
1075   // Use the Babylonian method to arrive at the integer square root:
1076   for (;;) {
1077     x_new = (this->udiv(x_old) + x_old).udiv(two);
1078     if (x_old.ule(x_new))
1079       break;
1080     x_old = x_new;
1081   }
1082
1083   // Make sure we return the closest approximation
1084   // NOTE: The rounding calculation below is correct. It will produce an
1085   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1086   // determined to be a rounding issue with pari/gp as it begins to use a
1087   // floating point representation after 192 bits. There are no discrepancies
1088   // between this algorithm and pari/gp for bit widths < 192 bits.
1089   APInt square(x_old * x_old);
1090   APInt nextSquare((x_old + 1) * (x_old +1));
1091   if (this->ult(square))
1092     return x_old;
1093   assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1094   APInt midpoint((nextSquare - square).udiv(two));
1095   APInt offset(*this - square);
1096   if (offset.ult(midpoint))
1097     return x_old;
1098   return x_old + 1;
1099 }
1100
1101 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1102 /// iterative extended Euclidean algorithm is used to solve for this value,
1103 /// however we simplify it to speed up calculating only the inverse, and take
1104 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1105 /// (potentially large) APInts around.
1106 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1107   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1108
1109   // Using the properties listed at the following web page (accessed 06/21/08):
1110   //   http://www.numbertheory.org/php/euclid.html
1111   // (especially the properties numbered 3, 4 and 9) it can be proved that
1112   // BitWidth bits suffice for all the computations in the algorithm implemented
1113   // below. More precisely, this number of bits suffice if the multiplicative
1114   // inverse exists, but may not suffice for the general extended Euclidean
1115   // algorithm.
1116
1117   APInt r[2] = { modulo, *this };
1118   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1119   APInt q(BitWidth, 0);
1120
1121   unsigned i;
1122   for (i = 0; r[i^1] != 0; i ^= 1) {
1123     // An overview of the math without the confusing bit-flipping:
1124     // q = r[i-2] / r[i-1]
1125     // r[i] = r[i-2] % r[i-1]
1126     // t[i] = t[i-2] - t[i-1] * q
1127     udivrem(r[i], r[i^1], q, r[i]);
1128     t[i] -= t[i^1] * q;
1129   }
1130
1131   // If this APInt and the modulo are not coprime, there is no multiplicative
1132   // inverse, so return 0. We check this by looking at the next-to-last
1133   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1134   // algorithm.
1135   if (r[i] != 1)
1136     return APInt(BitWidth, 0);
1137
1138   // The next-to-last t is the multiplicative inverse.  However, we are
1139   // interested in a positive inverse. Calculate a positive one from a negative
1140   // one if necessary. A simple addition of the modulo suffices because
1141   // abs(t[i]) is known to be less than *this/2 (see the link above).
1142   if (t[i].isNegative())
1143     t[i] += modulo;
1144
1145   return std::move(t[i]);
1146 }
1147
1148 /// Calculate the magic numbers required to implement a signed integer division
1149 /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1150 /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1151 /// Warren, Jr., chapter 10.
1152 APInt::ms APInt::magic() const {
1153   const APInt& d = *this;
1154   unsigned p;
1155   APInt ad, anc, delta, q1, r1, q2, r2, t;
1156   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1157   struct ms mag;
1158
1159   ad = d.abs();
1160   t = signedMin + (d.lshr(d.getBitWidth() - 1));
1161   anc = t - 1 - t.urem(ad);   // absolute value of nc
1162   p = d.getBitWidth() - 1;    // initialize p
1163   q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1164   r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1165   q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1166   r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1167   do {
1168     p = p + 1;
1169     q1 = q1<<1;          // update q1 = 2p/abs(nc)
1170     r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1171     if (r1.uge(anc)) {  // must be unsigned comparison
1172       q1 = q1 + 1;
1173       r1 = r1 - anc;
1174     }
1175     q2 = q2<<1;          // update q2 = 2p/abs(d)
1176     r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1177     if (r2.uge(ad)) {   // must be unsigned comparison
1178       q2 = q2 + 1;
1179       r2 = r2 - ad;
1180     }
1181     delta = ad - r2;
1182   } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1183
1184   mag.m = q2 + 1;
1185   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1186   mag.s = p - d.getBitWidth();          // resulting shift
1187   return mag;
1188 }
1189
1190 /// Calculate the magic numbers required to implement an unsigned integer
1191 /// division by a constant as a sequence of multiplies, adds and shifts.
1192 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1193 /// S. Warren, Jr., chapter 10.
1194 /// LeadingZeros can be used to simplify the calculation if the upper bits
1195 /// of the divided value are known zero.
1196 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1197   const APInt& d = *this;
1198   unsigned p;
1199   APInt nc, delta, q1, r1, q2, r2;
1200   struct mu magu;
1201   magu.a = 0;               // initialize "add" indicator
1202   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1203   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1204   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1205
1206   nc = allOnes - (allOnes - d).urem(d);
1207   p = d.getBitWidth() - 1;  // initialize p
1208   q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1209   r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1210   q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1211   r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1212   do {
1213     p = p + 1;
1214     if (r1.uge(nc - r1)) {
1215       q1 = q1 + q1 + 1;  // update q1
1216       r1 = r1 + r1 - nc; // update r1
1217     }
1218     else {
1219       q1 = q1+q1; // update q1
1220       r1 = r1+r1; // update r1
1221     }
1222     if ((r2 + 1).uge(d - r2)) {
1223       if (q2.uge(signedMax)) magu.a = 1;
1224       q2 = q2+q2 + 1;     // update q2
1225       r2 = r2+r2 + 1 - d; // update r2
1226     }
1227     else {
1228       if (q2.uge(signedMin)) magu.a = 1;
1229       q2 = q2+q2;     // update q2
1230       r2 = r2+r2 + 1; // update r2
1231     }
1232     delta = d - 1 - r2;
1233   } while (p < d.getBitWidth()*2 &&
1234            (q1.ult(delta) || (q1 == delta && r1 == 0)));
1235   magu.m = q2 + 1; // resulting magic number
1236   magu.s = p - d.getBitWidth();  // resulting shift
1237   return magu;
1238 }
1239
1240 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1241 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1242 /// variables here have the same names as in the algorithm. Comments explain
1243 /// the algorithm and any deviation from it.
1244 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1245                      unsigned m, unsigned n) {
1246   assert(u && "Must provide dividend");
1247   assert(v && "Must provide divisor");
1248   assert(q && "Must provide quotient");
1249   assert(u != v && u != q && v != q && "Must use different memory");
1250   assert(n>1 && "n must be > 1");
1251
1252   // b denotes the base of the number system. In our case b is 2^32.
1253   const uint64_t b = uint64_t(1) << 32;
1254
1255 // The DEBUG macros here tend to be spam in the debug output if you're not
1256 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1257 #pragma push_macro("DEBUG")
1258 #ifndef KNUTH_DEBUG
1259 #undef DEBUG
1260 #define DEBUG(X) do {} while (false)
1261 #endif
1262
1263   DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1264   DEBUG(dbgs() << "KnuthDiv: original:");
1265   DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1266   DEBUG(dbgs() << " by");
1267   DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1268   DEBUG(dbgs() << '\n');
1269   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1270   // u and v by d. Note that we have taken Knuth's advice here to use a power
1271   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1272   // 2 allows us to shift instead of multiply and it is easy to determine the
1273   // shift amount from the leading zeros.  We are basically normalizing the u
1274   // and v so that its high bits are shifted to the top of v's range without
1275   // overflow. Note that this can require an extra word in u so that u must
1276   // be of length m+n+1.
1277   unsigned shift = countLeadingZeros(v[n-1]);
1278   uint32_t v_carry = 0;
1279   uint32_t u_carry = 0;
1280   if (shift) {
1281     for (unsigned i = 0; i < m+n; ++i) {
1282       uint32_t u_tmp = u[i] >> (32 - shift);
1283       u[i] = (u[i] << shift) | u_carry;
1284       u_carry = u_tmp;
1285     }
1286     for (unsigned i = 0; i < n; ++i) {
1287       uint32_t v_tmp = v[i] >> (32 - shift);
1288       v[i] = (v[i] << shift) | v_carry;
1289       v_carry = v_tmp;
1290     }
1291   }
1292   u[m+n] = u_carry;
1293
1294   DEBUG(dbgs() << "KnuthDiv:   normal:");
1295   DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1296   DEBUG(dbgs() << " by");
1297   DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1298   DEBUG(dbgs() << '\n');
1299
1300   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1301   int j = m;
1302   do {
1303     DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1304     // D3. [Calculate q'.].
1305     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1306     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1307     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1308     // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1309     // on v[n-2] determines at high speed most of the cases in which the trial
1310     // value qp is one too large, and it eliminates all cases where qp is two
1311     // too large.
1312     uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1313     DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1314     uint64_t qp = dividend / v[n-1];
1315     uint64_t rp = dividend % v[n-1];
1316     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1317       qp--;
1318       rp += v[n-1];
1319       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1320         qp--;
1321     }
1322     DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1323
1324     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1325     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1326     // consists of a simple multiplication by a one-place number, combined with
1327     // a subtraction.
1328     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1329     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1330     // true value plus b**(n+1), namely as the b's complement of
1331     // the true value, and a "borrow" to the left should be remembered.
1332     int64_t borrow = 0;
1333     for (unsigned i = 0; i < n; ++i) {
1334       uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1335       int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1336       u[j+i] = Lo_32(subres);
1337       borrow = Hi_32(p) - Hi_32(subres);
1338       DEBUG(dbgs() << "KnuthDiv: u[j+i] = " << u[j+i]
1339                    << ", borrow = " << borrow << '\n');
1340     }
1341     bool isNeg = u[j+n] < borrow;
1342     u[j+n] -= Lo_32(borrow);
1343
1344     DEBUG(dbgs() << "KnuthDiv: after subtraction:");
1345     DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1346     DEBUG(dbgs() << '\n');
1347
1348     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1349     // negative, go to step D6; otherwise go on to step D7.
1350     q[j] = Lo_32(qp);
1351     if (isNeg) {
1352       // D6. [Add back]. The probability that this step is necessary is very
1353       // small, on the order of only 2/b. Make sure that test data accounts for
1354       // this possibility. Decrease q[j] by 1
1355       q[j]--;
1356       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1357       // A carry will occur to the left of u[j+n], and it should be ignored
1358       // since it cancels with the borrow that occurred in D4.
1359       bool carry = false;
1360       for (unsigned i = 0; i < n; i++) {
1361         uint32_t limit = std::min(u[j+i],v[i]);
1362         u[j+i] += v[i] + carry;
1363         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1364       }
1365       u[j+n] += carry;
1366     }
1367     DEBUG(dbgs() << "KnuthDiv: after correction:");
1368     DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1369     DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1370
1371   // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1372   } while (--j >= 0);
1373
1374   DEBUG(dbgs() << "KnuthDiv: quotient:");
1375   DEBUG(for (int i = m; i >=0; i--) dbgs() <<" " << q[i]);
1376   DEBUG(dbgs() << '\n');
1377
1378   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1379   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1380   // compute the remainder (urem uses this).
1381   if (r) {
1382     // The value d is expressed by the "shift" value above since we avoided
1383     // multiplication by d by using a shift left. So, all we have to do is
1384     // shift right here.
1385     if (shift) {
1386       uint32_t carry = 0;
1387       DEBUG(dbgs() << "KnuthDiv: remainder:");
1388       for (int i = n-1; i >= 0; i--) {
1389         r[i] = (u[i] >> shift) | carry;
1390         carry = u[i] << (32 - shift);
1391         DEBUG(dbgs() << " " << r[i]);
1392       }
1393     } else {
1394       for (int i = n-1; i >= 0; i--) {
1395         r[i] = u[i];
1396         DEBUG(dbgs() << " " << r[i]);
1397       }
1398     }
1399     DEBUG(dbgs() << '\n');
1400   }
1401   DEBUG(dbgs() << '\n');
1402
1403 #pragma pop_macro("DEBUG")
1404 }
1405
1406 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1407                    unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1408   assert(lhsWords >= rhsWords && "Fractional result");
1409
1410   // First, compose the values into an array of 32-bit words instead of
1411   // 64-bit words. This is a necessity of both the "short division" algorithm
1412   // and the Knuth "classical algorithm" which requires there to be native
1413   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1414   // can't use 64-bit operands here because we don't have native results of
1415   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1416   // work on large-endian machines.
1417   unsigned n = rhsWords * 2;
1418   unsigned m = (lhsWords * 2) - n;
1419
1420   // Allocate space for the temporary values we need either on the stack, if
1421   // it will fit, or on the heap if it won't.
1422   uint32_t SPACE[128];
1423   uint32_t *U = nullptr;
1424   uint32_t *V = nullptr;
1425   uint32_t *Q = nullptr;
1426   uint32_t *R = nullptr;
1427   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1428     U = &SPACE[0];
1429     V = &SPACE[m+n+1];
1430     Q = &SPACE[(m+n+1) + n];
1431     if (Remainder)
1432       R = &SPACE[(m+n+1) + n + (m+n)];
1433   } else {
1434     U = new uint32_t[m + n + 1];
1435     V = new uint32_t[n];
1436     Q = new uint32_t[m+n];
1437     if (Remainder)
1438       R = new uint32_t[n];
1439   }
1440
1441   // Initialize the dividend
1442   memset(U, 0, (m+n+1)*sizeof(uint32_t));
1443   for (unsigned i = 0; i < lhsWords; ++i) {
1444     uint64_t tmp = LHS[i];
1445     U[i * 2] = Lo_32(tmp);
1446     U[i * 2 + 1] = Hi_32(tmp);
1447   }
1448   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1449
1450   // Initialize the divisor
1451   memset(V, 0, (n)*sizeof(uint32_t));
1452   for (unsigned i = 0; i < rhsWords; ++i) {
1453     uint64_t tmp = RHS[i];
1454     V[i * 2] = Lo_32(tmp);
1455     V[i * 2 + 1] = Hi_32(tmp);
1456   }
1457
1458   // initialize the quotient and remainder
1459   memset(Q, 0, (m+n) * sizeof(uint32_t));
1460   if (Remainder)
1461     memset(R, 0, n * sizeof(uint32_t));
1462
1463   // Now, adjust m and n for the Knuth division. n is the number of words in
1464   // the divisor. m is the number of words by which the dividend exceeds the
1465   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1466   // contain any zero words or the Knuth algorithm fails.
1467   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1468     n--;
1469     m++;
1470   }
1471   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1472     m--;
1473
1474   // If we're left with only a single word for the divisor, Knuth doesn't work
1475   // so we implement the short division algorithm here. This is much simpler
1476   // and faster because we are certain that we can divide a 64-bit quantity
1477   // by a 32-bit quantity at hardware speed and short division is simply a
1478   // series of such operations. This is just like doing short division but we
1479   // are using base 2^32 instead of base 10.
1480   assert(n != 0 && "Divide by zero?");
1481   if (n == 1) {
1482     uint32_t divisor = V[0];
1483     uint32_t remainder = 0;
1484     for (int i = m; i >= 0; i--) {
1485       uint64_t partial_dividend = Make_64(remainder, U[i]);
1486       if (partial_dividend == 0) {
1487         Q[i] = 0;
1488         remainder = 0;
1489       } else if (partial_dividend < divisor) {
1490         Q[i] = 0;
1491         remainder = Lo_32(partial_dividend);
1492       } else if (partial_dividend == divisor) {
1493         Q[i] = 1;
1494         remainder = 0;
1495       } else {
1496         Q[i] = Lo_32(partial_dividend / divisor);
1497         remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1498       }
1499     }
1500     if (R)
1501       R[0] = remainder;
1502   } else {
1503     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1504     // case n > 1.
1505     KnuthDiv(U, V, Q, R, m, n);
1506   }
1507
1508   // If the caller wants the quotient
1509   if (Quotient) {
1510     for (unsigned i = 0; i < lhsWords; ++i)
1511       Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1512   }
1513
1514   // If the caller wants the remainder
1515   if (Remainder) {
1516     for (unsigned i = 0; i < rhsWords; ++i)
1517       Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1518   }
1519
1520   // Clean up the memory we allocated.
1521   if (U != &SPACE[0]) {
1522     delete [] U;
1523     delete [] V;
1524     delete [] Q;
1525     delete [] R;
1526   }
1527 }
1528
1529 APInt APInt::udiv(const APInt &RHS) const {
1530   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1531
1532   // First, deal with the easy case
1533   if (isSingleWord()) {
1534     assert(RHS.U.VAL != 0 && "Divide by zero?");
1535     return APInt(BitWidth, U.VAL / RHS.U.VAL);
1536   }
1537
1538   // Get some facts about the LHS and RHS number of bits and words
1539   unsigned lhsWords = getNumWords(getActiveBits());
1540   unsigned rhsBits  = RHS.getActiveBits();
1541   unsigned rhsWords = getNumWords(rhsBits);
1542   assert(rhsWords && "Divided by zero???");
1543
1544   // Deal with some degenerate cases
1545   if (!lhsWords)
1546     // 0 / X ===> 0
1547     return APInt(BitWidth, 0);
1548   if (rhsBits == 1)
1549     // X / 1 ===> X
1550     return *this;
1551   if (lhsWords < rhsWords || this->ult(RHS))
1552     // X / Y ===> 0, iff X < Y
1553     return APInt(BitWidth, 0);
1554   if (*this == RHS)
1555     // X / X ===> 1
1556     return APInt(BitWidth, 1);
1557   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1558     // All high words are zero, just use native divide
1559     return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1560
1561   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1562   APInt Quotient(BitWidth, 0); // to hold result.
1563   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1564   return Quotient;
1565 }
1566
1567 APInt APInt::udiv(uint64_t RHS) const {
1568   assert(RHS != 0 && "Divide by zero?");
1569
1570   // First, deal with the easy case
1571   if (isSingleWord())
1572     return APInt(BitWidth, U.VAL / RHS);
1573
1574   // Get some facts about the LHS words.
1575   unsigned lhsWords = getNumWords(getActiveBits());
1576
1577   // Deal with some degenerate cases
1578   if (!lhsWords)
1579     // 0 / X ===> 0
1580     return APInt(BitWidth, 0);
1581   if (RHS == 1)
1582     // X / 1 ===> X
1583     return *this;
1584   if (this->ult(RHS))
1585     // X / Y ===> 0, iff X < Y
1586     return APInt(BitWidth, 0);
1587   if (*this == RHS)
1588     // X / X ===> 1
1589     return APInt(BitWidth, 1);
1590   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1591     // All high words are zero, just use native divide
1592     return APInt(BitWidth, this->U.pVal[0] / RHS);
1593
1594   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1595   APInt Quotient(BitWidth, 0); // to hold result.
1596   divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1597   return Quotient;
1598 }
1599
1600 APInt APInt::sdiv(const APInt &RHS) const {
1601   if (isNegative()) {
1602     if (RHS.isNegative())
1603       return (-(*this)).udiv(-RHS);
1604     return -((-(*this)).udiv(RHS));
1605   }
1606   if (RHS.isNegative())
1607     return -(this->udiv(-RHS));
1608   return this->udiv(RHS);
1609 }
1610
1611 APInt APInt::sdiv(int64_t RHS) const {
1612   if (isNegative()) {
1613     if (RHS < 0)
1614       return (-(*this)).udiv(-RHS);
1615     return -((-(*this)).udiv(RHS));
1616   }
1617   if (RHS < 0)
1618     return -(this->udiv(-RHS));
1619   return this->udiv(RHS);
1620 }
1621
1622 APInt APInt::urem(const APInt &RHS) const {
1623   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1624   if (isSingleWord()) {
1625     assert(RHS.U.VAL != 0 && "Remainder by zero?");
1626     return APInt(BitWidth, U.VAL % RHS.U.VAL);
1627   }
1628
1629   // Get some facts about the LHS
1630   unsigned lhsWords = getNumWords(getActiveBits());
1631
1632   // Get some facts about the RHS
1633   unsigned rhsBits = RHS.getActiveBits();
1634   unsigned rhsWords = getNumWords(rhsBits);
1635   assert(rhsWords && "Performing remainder operation by zero ???");
1636
1637   // Check the degenerate cases
1638   if (lhsWords == 0)
1639     // 0 % Y ===> 0
1640     return APInt(BitWidth, 0);
1641   if (rhsBits == 1)
1642     // X % 1 ===> 0
1643     return APInt(BitWidth, 0);
1644   if (lhsWords < rhsWords || this->ult(RHS))
1645     // X % Y ===> X, iff X < Y
1646     return *this;
1647   if (*this == RHS)
1648     // X % X == 0;
1649     return APInt(BitWidth, 0);
1650   if (lhsWords == 1)
1651     // All high words are zero, just use native remainder
1652     return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1653
1654   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1655   APInt Remainder(BitWidth, 0);
1656   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1657   return Remainder;
1658 }
1659
1660 uint64_t APInt::urem(uint64_t RHS) const {
1661   assert(RHS != 0 && "Remainder by zero?");
1662
1663   if (isSingleWord())
1664     return U.VAL % RHS;
1665
1666   // Get some facts about the LHS
1667   unsigned lhsWords = getNumWords(getActiveBits());
1668
1669   // Check the degenerate cases
1670   if (lhsWords == 0)
1671     // 0 % Y ===> 0
1672     return 0;
1673   if (RHS == 1)
1674     // X % 1 ===> 0
1675     return 0;
1676   if (this->ult(RHS))
1677     // X % Y ===> X, iff X < Y
1678     return getZExtValue();
1679   if (*this == RHS)
1680     // X % X == 0;
1681     return 0;
1682   if (lhsWords == 1)
1683     // All high words are zero, just use native remainder
1684     return U.pVal[0] % RHS;
1685
1686   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1687   uint64_t Remainder;
1688   divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1689   return Remainder;
1690 }
1691
1692 APInt APInt::srem(const APInt &RHS) const {
1693   if (isNegative()) {
1694     if (RHS.isNegative())
1695       return -((-(*this)).urem(-RHS));
1696     return -((-(*this)).urem(RHS));
1697   }
1698   if (RHS.isNegative())
1699     return this->urem(-RHS);
1700   return this->urem(RHS);
1701 }
1702
1703 int64_t APInt::srem(int64_t RHS) const {
1704   if (isNegative()) {
1705     if (RHS < 0)
1706       return -((-(*this)).urem(-RHS));
1707     return -((-(*this)).urem(RHS));
1708   }
1709   if (RHS < 0)
1710     return this->urem(-RHS);
1711   return this->urem(RHS);
1712 }
1713
1714 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1715                     APInt &Quotient, APInt &Remainder) {
1716   assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1717   unsigned BitWidth = LHS.BitWidth;
1718
1719   // First, deal with the easy case
1720   if (LHS.isSingleWord()) {
1721     assert(RHS.U.VAL != 0 && "Divide by zero?");
1722     uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1723     uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1724     Quotient = APInt(BitWidth, QuotVal);
1725     Remainder = APInt(BitWidth, RemVal);
1726     return;
1727   }
1728
1729   // Get some size facts about the dividend and divisor
1730   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1731   unsigned rhsBits  = RHS.getActiveBits();
1732   unsigned rhsWords = getNumWords(rhsBits);
1733   assert(rhsWords && "Performing divrem operation by zero ???");
1734
1735   // Check the degenerate cases
1736   if (lhsWords == 0) {
1737     Quotient = 0;                // 0 / Y ===> 0
1738     Remainder = 0;               // 0 % Y ===> 0
1739     return;
1740   }
1741
1742   if (rhsBits == 1) {
1743     Quotient = LHS;             // X / 1 ===> X
1744     Remainder = 0;              // X % 1 ===> 0
1745   }
1746
1747   if (lhsWords < rhsWords || LHS.ult(RHS)) {
1748     Remainder = LHS;            // X % Y ===> X, iff X < Y
1749     Quotient = 0;               // X / Y ===> 0, iff X < Y
1750     return;
1751   }
1752
1753   if (LHS == RHS) {
1754     Quotient  = 1;              // X / X ===> 1
1755     Remainder = 0;              // X % X ===> 0;
1756     return;
1757   }
1758
1759   // Make sure there is enough space to hold the results.
1760   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1761   // change the size. This is necessary if Quotient or Remainder is aliased
1762   // with LHS or RHS.
1763   Quotient.reallocate(BitWidth);
1764   Remainder.reallocate(BitWidth);
1765
1766   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1767     // There is only one word to consider so use the native versions.
1768     uint64_t lhsValue = LHS.U.pVal[0];
1769     uint64_t rhsValue = RHS.U.pVal[0];
1770     Quotient = lhsValue / rhsValue;
1771     Remainder = lhsValue % rhsValue;
1772     return;
1773   }
1774
1775   // Okay, lets do it the long way
1776   divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1777          Remainder.U.pVal);
1778   // Clear the rest of the Quotient and Remainder.
1779   std::memset(Quotient.U.pVal + lhsWords, 0,
1780               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1781   std::memset(Remainder.U.pVal + rhsWords, 0,
1782               (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1783 }
1784
1785 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1786                     uint64_t &Remainder) {
1787   assert(RHS != 0 && "Divide by zero?");
1788   unsigned BitWidth = LHS.BitWidth;
1789
1790   // First, deal with the easy case
1791   if (LHS.isSingleWord()) {
1792     uint64_t QuotVal = LHS.U.VAL / RHS;
1793     Remainder = LHS.U.VAL % RHS;
1794     Quotient = APInt(BitWidth, QuotVal);
1795     return;
1796   }
1797
1798   // Get some size facts about the dividend and divisor
1799   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1800
1801   // Check the degenerate cases
1802   if (lhsWords == 0) {
1803     Quotient = 0;                // 0 / Y ===> 0
1804     Remainder = 0;               // 0 % Y ===> 0
1805     return;
1806   }
1807
1808   if (RHS == 1) {
1809     Quotient = LHS;             // X / 1 ===> X
1810     Remainder = 0;              // X % 1 ===> 0
1811   }
1812
1813   if (LHS.ult(RHS)) {
1814     Remainder = LHS.getZExtValue(); // X % Y ===> X, iff X < Y
1815     Quotient = 0;                   // X / Y ===> 0, iff X < Y
1816     return;
1817   }
1818
1819   if (LHS == RHS) {
1820     Quotient  = 1;              // X / X ===> 1
1821     Remainder = 0;              // X % X ===> 0;
1822     return;
1823   }
1824
1825   // Make sure there is enough space to hold the results.
1826   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1827   // change the size. This is necessary if Quotient is aliased with LHS.
1828   Quotient.reallocate(BitWidth);
1829
1830   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1831     // There is only one word to consider so use the native versions.
1832     uint64_t lhsValue = LHS.U.pVal[0];
1833     Quotient = lhsValue / RHS;
1834     Remainder = lhsValue % RHS;
1835     return;
1836   }
1837
1838   // Okay, lets do it the long way
1839   divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1840   // Clear the rest of the Quotient.
1841   std::memset(Quotient.U.pVal + lhsWords, 0,
1842               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1843 }
1844
1845 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1846                     APInt &Quotient, APInt &Remainder) {
1847   if (LHS.isNegative()) {
1848     if (RHS.isNegative())
1849       APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1850     else {
1851       APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1852       Quotient.negate();
1853     }
1854     Remainder.negate();
1855   } else if (RHS.isNegative()) {
1856     APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1857     Quotient.negate();
1858   } else {
1859     APInt::udivrem(LHS, RHS, Quotient, Remainder);
1860   }
1861 }
1862
1863 void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1864                     APInt &Quotient, int64_t &Remainder) {
1865   uint64_t R = Remainder;
1866   if (LHS.isNegative()) {
1867     if (RHS < 0)
1868       APInt::udivrem(-LHS, -RHS, Quotient, R);
1869     else {
1870       APInt::udivrem(-LHS, RHS, Quotient, R);
1871       Quotient.negate();
1872     }
1873     R = -R;
1874   } else if (RHS < 0) {
1875     APInt::udivrem(LHS, -RHS, Quotient, R);
1876     Quotient.negate();
1877   } else {
1878     APInt::udivrem(LHS, RHS, Quotient, R);
1879   }
1880   Remainder = R;
1881 }
1882
1883 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1884   APInt Res = *this+RHS;
1885   Overflow = isNonNegative() == RHS.isNonNegative() &&
1886              Res.isNonNegative() != isNonNegative();
1887   return Res;
1888 }
1889
1890 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1891   APInt Res = *this+RHS;
1892   Overflow = Res.ult(RHS);
1893   return Res;
1894 }
1895
1896 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1897   APInt Res = *this - RHS;
1898   Overflow = isNonNegative() != RHS.isNonNegative() &&
1899              Res.isNonNegative() != isNonNegative();
1900   return Res;
1901 }
1902
1903 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1904   APInt Res = *this-RHS;
1905   Overflow = Res.ugt(*this);
1906   return Res;
1907 }
1908
1909 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1910   // MININT/-1  -->  overflow.
1911   Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1912   return sdiv(RHS);
1913 }
1914
1915 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1916   APInt Res = *this * RHS;
1917
1918   if (*this != 0 && RHS != 0)
1919     Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1920   else
1921     Overflow = false;
1922   return Res;
1923 }
1924
1925 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1926   APInt Res = *this * RHS;
1927
1928   if (*this != 0 && RHS != 0)
1929     Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS;
1930   else
1931     Overflow = false;
1932   return Res;
1933 }
1934
1935 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1936   Overflow = ShAmt.uge(getBitWidth());
1937   if (Overflow)
1938     return APInt(BitWidth, 0);
1939
1940   if (isNonNegative()) // Don't allow sign change.
1941     Overflow = ShAmt.uge(countLeadingZeros());
1942   else
1943     Overflow = ShAmt.uge(countLeadingOnes());
1944
1945   return *this << ShAmt;
1946 }
1947
1948 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
1949   Overflow = ShAmt.uge(getBitWidth());
1950   if (Overflow)
1951     return APInt(BitWidth, 0);
1952
1953   Overflow = ShAmt.ugt(countLeadingZeros());
1954
1955   return *this << ShAmt;
1956 }
1957
1958
1959
1960
1961 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
1962   // Check our assumptions here
1963   assert(!str.empty() && "Invalid string length");
1964   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
1965           radix == 36) &&
1966          "Radix should be 2, 8, 10, 16, or 36!");
1967
1968   StringRef::iterator p = str.begin();
1969   size_t slen = str.size();
1970   bool isNeg = *p == '-';
1971   if (*p == '-' || *p == '+') {
1972     p++;
1973     slen--;
1974     assert(slen && "String is only a sign, needs a value.");
1975   }
1976   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
1977   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
1978   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
1979   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
1980          "Insufficient bit width");
1981
1982   // Allocate memory if needed
1983   if (isSingleWord())
1984     U.VAL = 0;
1985   else
1986     U.pVal = getClearedMemory(getNumWords());
1987
1988   // Figure out if we can shift instead of multiply
1989   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
1990
1991   // Enter digit traversal loop
1992   for (StringRef::iterator e = str.end(); p != e; ++p) {
1993     unsigned digit = getDigit(*p, radix);
1994     assert(digit < radix && "Invalid character in digit string");
1995
1996     // Shift or multiply the value by the radix
1997     if (slen > 1) {
1998       if (shift)
1999         *this <<= shift;
2000       else
2001         *this *= radix;
2002     }
2003
2004     // Add in the digit we just interpreted
2005     *this += digit;
2006   }
2007   // If its negative, put it in two's complement form
2008   if (isNeg)
2009     this->negate();
2010 }
2011
2012 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2013                      bool Signed, bool formatAsCLiteral) const {
2014   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2015           Radix == 36) &&
2016          "Radix should be 2, 8, 10, 16, or 36!");
2017
2018   const char *Prefix = "";
2019   if (formatAsCLiteral) {
2020     switch (Radix) {
2021       case 2:
2022         // Binary literals are a non-standard extension added in gcc 4.3:
2023         // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2024         Prefix = "0b";
2025         break;
2026       case 8:
2027         Prefix = "0";
2028         break;
2029       case 10:
2030         break; // No prefix
2031       case 16:
2032         Prefix = "0x";
2033         break;
2034       default:
2035         llvm_unreachable("Invalid radix!");
2036     }
2037   }
2038
2039   // First, check for a zero value and just short circuit the logic below.
2040   if (*this == 0) {
2041     while (*Prefix) {
2042       Str.push_back(*Prefix);
2043       ++Prefix;
2044     };
2045     Str.push_back('0');
2046     return;
2047   }
2048
2049   static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2050
2051   if (isSingleWord()) {
2052     char Buffer[65];
2053     char *BufPtr = std::end(Buffer);
2054
2055     uint64_t N;
2056     if (!Signed) {
2057       N = getZExtValue();
2058     } else {
2059       int64_t I = getSExtValue();
2060       if (I >= 0) {
2061         N = I;
2062       } else {
2063         Str.push_back('-');
2064         N = -(uint64_t)I;
2065       }
2066     }
2067
2068     while (*Prefix) {
2069       Str.push_back(*Prefix);
2070       ++Prefix;
2071     };
2072
2073     while (N) {
2074       *--BufPtr = Digits[N % Radix];
2075       N /= Radix;
2076     }
2077     Str.append(BufPtr, std::end(Buffer));
2078     return;
2079   }
2080
2081   APInt Tmp(*this);
2082
2083   if (Signed && isNegative()) {
2084     // They want to print the signed version and it is a negative value
2085     // Flip the bits and add one to turn it into the equivalent positive
2086     // value and put a '-' in the result.
2087     Tmp.negate();
2088     Str.push_back('-');
2089   }
2090
2091   while (*Prefix) {
2092     Str.push_back(*Prefix);
2093     ++Prefix;
2094   };
2095
2096   // We insert the digits backward, then reverse them to get the right order.
2097   unsigned StartDig = Str.size();
2098
2099   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2100   // because the number of bits per digit (1, 3 and 4 respectively) divides
2101   // equally.  We just shift until the value is zero.
2102   if (Radix == 2 || Radix == 8 || Radix == 16) {
2103     // Just shift tmp right for each digit width until it becomes zero
2104     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2105     unsigned MaskAmt = Radix - 1;
2106
2107     while (Tmp.getBoolValue()) {
2108       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2109       Str.push_back(Digits[Digit]);
2110       Tmp.lshrInPlace(ShiftAmt);
2111     }
2112   } else {
2113     while (Tmp.getBoolValue()) {
2114       uint64_t Digit;
2115       udivrem(Tmp, Radix, Tmp, Digit);
2116       assert(Digit < Radix && "divide failed");
2117       Str.push_back(Digits[Digit]);
2118     }
2119   }
2120
2121   // Reverse the digits before returning.
2122   std::reverse(Str.begin()+StartDig, Str.end());
2123 }
2124
2125 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2126 /// It is better to pass in a SmallVector/SmallString to the methods above.
2127 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2128   SmallString<40> S;
2129   toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2130   return S.str();
2131 }
2132
2133 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2134 LLVM_DUMP_METHOD void APInt::dump() const {
2135   SmallString<40> S, U;
2136   this->toStringUnsigned(U);
2137   this->toStringSigned(S);
2138   dbgs() << "APInt(" << BitWidth << "b, "
2139          << U << "u " << S << "s)\n";
2140 }
2141 #endif
2142
2143 void APInt::print(raw_ostream &OS, bool isSigned) const {
2144   SmallString<40> S;
2145   this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2146   OS << S;
2147 }
2148
2149 // This implements a variety of operations on a representation of
2150 // arbitrary precision, two's-complement, bignum integer values.
2151
2152 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2153 // and unrestricting assumption.
2154 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2155               "Part width must be divisible by 2!");
2156
2157 /* Some handy functions local to this file.  */
2158
2159 /* Returns the integer part with the least significant BITS set.
2160    BITS cannot be zero.  */
2161 static inline APInt::WordType lowBitMask(unsigned bits) {
2162   assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2163
2164   return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2165 }
2166
2167 /* Returns the value of the lower half of PART.  */
2168 static inline APInt::WordType lowHalf(APInt::WordType part) {
2169   return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2170 }
2171
2172 /* Returns the value of the upper half of PART.  */
2173 static inline APInt::WordType highHalf(APInt::WordType part) {
2174   return part >> (APInt::APINT_BITS_PER_WORD / 2);
2175 }
2176
2177 /* Returns the bit number of the most significant set bit of a part.
2178    If the input number has no bits set -1U is returned.  */
2179 static unsigned partMSB(APInt::WordType value) {
2180   return findLastSet(value, ZB_Max);
2181 }
2182
2183 /* Returns the bit number of the least significant set bit of a
2184    part.  If the input number has no bits set -1U is returned.  */
2185 static unsigned partLSB(APInt::WordType value) {
2186   return findFirstSet(value, ZB_Max);
2187 }
2188
2189 /* Sets the least significant part of a bignum to the input value, and
2190    zeroes out higher parts.  */
2191 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2192   assert(parts > 0);
2193
2194   dst[0] = part;
2195   for (unsigned i = 1; i < parts; i++)
2196     dst[i] = 0;
2197 }
2198
2199 /* Assign one bignum to another.  */
2200 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2201   for (unsigned i = 0; i < parts; i++)
2202     dst[i] = src[i];
2203 }
2204
2205 /* Returns true if a bignum is zero, false otherwise.  */
2206 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2207   for (unsigned i = 0; i < parts; i++)
2208     if (src[i])
2209       return false;
2210
2211   return true;
2212 }
2213
2214 /* Extract the given bit of a bignum; returns 0 or 1.  */
2215 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2216   return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2217 }
2218
2219 /* Set the given bit of a bignum. */
2220 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2221   parts[whichWord(bit)] |= maskBit(bit);
2222 }
2223
2224 /* Clears the given bit of a bignum. */
2225 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2226   parts[whichWord(bit)] &= ~maskBit(bit);
2227 }
2228
2229 /* Returns the bit number of the least significant set bit of a
2230    number.  If the input number has no bits set -1U is returned.  */
2231 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2232   for (unsigned i = 0; i < n; i++) {
2233     if (parts[i] != 0) {
2234       unsigned lsb = partLSB(parts[i]);
2235
2236       return lsb + i * APINT_BITS_PER_WORD;
2237     }
2238   }
2239
2240   return -1U;
2241 }
2242
2243 /* Returns the bit number of the most significant set bit of a number.
2244    If the input number has no bits set -1U is returned.  */
2245 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2246   do {
2247     --n;
2248
2249     if (parts[n] != 0) {
2250       unsigned msb = partMSB(parts[n]);
2251
2252       return msb + n * APINT_BITS_PER_WORD;
2253     }
2254   } while (n);
2255
2256   return -1U;
2257 }
2258
2259 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2260    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2261    the least significant bit of DST.  All high bits above srcBITS in
2262    DST are zero-filled.  */
2263 void
2264 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2265                  unsigned srcBits, unsigned srcLSB) {
2266   unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2267   assert(dstParts <= dstCount);
2268
2269   unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2270   tcAssign (dst, src + firstSrcPart, dstParts);
2271
2272   unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2273   tcShiftRight (dst, dstParts, shift);
2274
2275   /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2276      in DST.  If this is less that srcBits, append the rest, else
2277      clear the high bits.  */
2278   unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2279   if (n < srcBits) {
2280     WordType mask = lowBitMask (srcBits - n);
2281     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2282                           << n % APINT_BITS_PER_WORD);
2283   } else if (n > srcBits) {
2284     if (srcBits % APINT_BITS_PER_WORD)
2285       dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2286   }
2287
2288   /* Clear high parts.  */
2289   while (dstParts < dstCount)
2290     dst[dstParts++] = 0;
2291 }
2292
2293 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2294 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2295                              WordType c, unsigned parts) {
2296   assert(c <= 1);
2297
2298   for (unsigned i = 0; i < parts; i++) {
2299     WordType l = dst[i];
2300     if (c) {
2301       dst[i] += rhs[i] + 1;
2302       c = (dst[i] <= l);
2303     } else {
2304       dst[i] += rhs[i];
2305       c = (dst[i] < l);
2306     }
2307   }
2308
2309   return c;
2310 }
2311
2312 /// This function adds a single "word" integer, src, to the multiple
2313 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2314 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2315 /// @returns the carry of the addition.
2316 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2317                                  unsigned parts) {
2318   for (unsigned i = 0; i < parts; ++i) {
2319     dst[i] += src;
2320     if (dst[i] >= src)
2321       return 0; // No need to carry so exit early.
2322     src = 1; // Carry one to next digit.
2323   }
2324
2325   return 1;
2326 }
2327
2328 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2329 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2330                                   WordType c, unsigned parts) {
2331   assert(c <= 1);
2332
2333   for (unsigned i = 0; i < parts; i++) {
2334     WordType l = dst[i];
2335     if (c) {
2336       dst[i] -= rhs[i] + 1;
2337       c = (dst[i] >= l);
2338     } else {
2339       dst[i] -= rhs[i];
2340       c = (dst[i] > l);
2341     }
2342   }
2343
2344   return c;
2345 }
2346
2347 /// This function subtracts a single "word" (64-bit word), src, from
2348 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2349 /// no further borrowing is needed or it runs out of "words" in dst.  The result
2350 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2351 /// exhausted. In other words, if src > dst then this function returns 1,
2352 /// otherwise 0.
2353 /// @returns the borrow out of the subtraction
2354 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2355                                       unsigned parts) {
2356   for (unsigned i = 0; i < parts; ++i) {
2357     WordType Dst = dst[i];
2358     dst[i] -= src;
2359     if (src <= Dst)
2360       return 0; // No need to borrow so exit early.
2361     src = 1; // We have to "borrow 1" from next "word"
2362   }
2363
2364   return 1;
2365 }
2366
2367 /* Negate a bignum in-place.  */
2368 void APInt::tcNegate(WordType *dst, unsigned parts) {
2369   tcComplement(dst, parts);
2370   tcIncrement(dst, parts);
2371 }
2372
2373 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2374     DST  = SRC * MULTIPLIER + CARRY   if add is false
2375
2376     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2377     they must start at the same point, i.e. DST == SRC.
2378
2379     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2380     returned.  Otherwise DST is filled with the least significant
2381     DSTPARTS parts of the result, and if all of the omitted higher
2382     parts were zero return zero, otherwise overflow occurred and
2383     return one.  */
2384 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2385                           WordType multiplier, WordType carry,
2386                           unsigned srcParts, unsigned dstParts,
2387                           bool add) {
2388   /* Otherwise our writes of DST kill our later reads of SRC.  */
2389   assert(dst <= src || dst >= src + srcParts);
2390   assert(dstParts <= srcParts + 1);
2391
2392   /* N loops; minimum of dstParts and srcParts.  */
2393   unsigned n = std::min(dstParts, srcParts);
2394
2395   for (unsigned i = 0; i < n; i++) {
2396     WordType low, mid, high, srcPart;
2397
2398       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2399
2400          This cannot overflow, because
2401
2402          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2403
2404          which is less than n^2.  */
2405
2406     srcPart = src[i];
2407
2408     if (multiplier == 0 || srcPart == 0) {
2409       low = carry;
2410       high = 0;
2411     } else {
2412       low = lowHalf(srcPart) * lowHalf(multiplier);
2413       high = highHalf(srcPart) * highHalf(multiplier);
2414
2415       mid = lowHalf(srcPart) * highHalf(multiplier);
2416       high += highHalf(mid);
2417       mid <<= APINT_BITS_PER_WORD / 2;
2418       if (low + mid < low)
2419         high++;
2420       low += mid;
2421
2422       mid = highHalf(srcPart) * lowHalf(multiplier);
2423       high += highHalf(mid);
2424       mid <<= APINT_BITS_PER_WORD / 2;
2425       if (low + mid < low)
2426         high++;
2427       low += mid;
2428
2429       /* Now add carry.  */
2430       if (low + carry < low)
2431         high++;
2432       low += carry;
2433     }
2434
2435     if (add) {
2436       /* And now DST[i], and store the new low part there.  */
2437       if (low + dst[i] < low)
2438         high++;
2439       dst[i] += low;
2440     } else
2441       dst[i] = low;
2442
2443     carry = high;
2444   }
2445
2446   if (srcParts < dstParts) {
2447     /* Full multiplication, there is no overflow.  */
2448     assert(srcParts + 1 == dstParts);
2449     dst[srcParts] = carry;
2450     return 0;
2451   }
2452
2453   /* We overflowed if there is carry.  */
2454   if (carry)
2455     return 1;
2456
2457   /* We would overflow if any significant unwritten parts would be
2458      non-zero.  This is true if any remaining src parts are non-zero
2459      and the multiplier is non-zero.  */
2460   if (multiplier)
2461     for (unsigned i = dstParts; i < srcParts; i++)
2462       if (src[i])
2463         return 1;
2464
2465   /* We fitted in the narrow destination.  */
2466   return 0;
2467 }
2468
2469 /* DST = LHS * RHS, where DST has the same width as the operands and
2470    is filled with the least significant parts of the result.  Returns
2471    one if overflow occurred, otherwise zero.  DST must be disjoint
2472    from both operands.  */
2473 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2474                       const WordType *rhs, unsigned parts) {
2475   assert(dst != lhs && dst != rhs);
2476
2477   int overflow = 0;
2478   tcSet(dst, 0, parts);
2479
2480   for (unsigned i = 0; i < parts; i++)
2481     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2482                                parts - i, true);
2483
2484   return overflow;
2485 }
2486
2487 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2488 /// operands. No overflow occurs. DST must be disjoint from both operands.
2489 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2490                            const WordType *rhs, unsigned lhsParts,
2491                            unsigned rhsParts) {
2492   /* Put the narrower number on the LHS for less loops below.  */
2493   if (lhsParts > rhsParts)
2494     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2495
2496   assert(dst != lhs && dst != rhs);
2497
2498   tcSet(dst, 0, rhsParts);
2499
2500   for (unsigned i = 0; i < lhsParts; i++)
2501     tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2502 }
2503
2504 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2505    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2506    set REMAINDER to the remainder, return zero.  i.e.
2507
2508    OLD_LHS = RHS * LHS + REMAINDER
2509
2510    SCRATCH is a bignum of the same size as the operands and result for
2511    use by the routine; its contents need not be initialized and are
2512    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2513 */
2514 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2515                     WordType *remainder, WordType *srhs,
2516                     unsigned parts) {
2517   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2518
2519   unsigned shiftCount = tcMSB(rhs, parts) + 1;
2520   if (shiftCount == 0)
2521     return true;
2522
2523   shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2524   unsigned n = shiftCount / APINT_BITS_PER_WORD;
2525   WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2526
2527   tcAssign(srhs, rhs, parts);
2528   tcShiftLeft(srhs, parts, shiftCount);
2529   tcAssign(remainder, lhs, parts);
2530   tcSet(lhs, 0, parts);
2531
2532   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2533      the total.  */
2534   for (;;) {
2535     int compare = tcCompare(remainder, srhs, parts);
2536     if (compare >= 0) {
2537       tcSubtract(remainder, srhs, 0, parts);
2538       lhs[n] |= mask;
2539     }
2540
2541     if (shiftCount == 0)
2542       break;
2543     shiftCount--;
2544     tcShiftRight(srhs, parts, 1);
2545     if ((mask >>= 1) == 0) {
2546       mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2547       n--;
2548     }
2549   }
2550
2551   return false;
2552 }
2553
2554 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2555 /// no restrictions on Count.
2556 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2557   // Don't bother performing a no-op shift.
2558   if (!Count)
2559     return;
2560
2561   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2562   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2563   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2564
2565   // Fastpath for moving by whole words.
2566   if (BitShift == 0) {
2567     std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2568   } else {
2569     while (Words-- > WordShift) {
2570       Dst[Words] = Dst[Words - WordShift] << BitShift;
2571       if (Words > WordShift)
2572         Dst[Words] |=
2573           Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2574     }
2575   }
2576
2577   // Fill in the remainder with 0s.
2578   std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2579 }
2580
2581 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2582 /// are no restrictions on Count.
2583 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2584   // Don't bother performing a no-op shift.
2585   if (!Count)
2586     return;
2587
2588   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2589   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2590   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2591
2592   unsigned WordsToMove = Words - WordShift;
2593   // Fastpath for moving by whole words.
2594   if (BitShift == 0) {
2595     std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2596   } else {
2597     for (unsigned i = 0; i != WordsToMove; ++i) {
2598       Dst[i] = Dst[i + WordShift] >> BitShift;
2599       if (i + 1 != WordsToMove)
2600         Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2601     }
2602   }
2603
2604   // Fill in the remainder with 0s.
2605   std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2606 }
2607
2608 /* Bitwise and of two bignums.  */
2609 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2610   for (unsigned i = 0; i < parts; i++)
2611     dst[i] &= rhs[i];
2612 }
2613
2614 /* Bitwise inclusive or of two bignums.  */
2615 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2616   for (unsigned i = 0; i < parts; i++)
2617     dst[i] |= rhs[i];
2618 }
2619
2620 /* Bitwise exclusive or of two bignums.  */
2621 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2622   for (unsigned i = 0; i < parts; i++)
2623     dst[i] ^= rhs[i];
2624 }
2625
2626 /* Complement a bignum in-place.  */
2627 void APInt::tcComplement(WordType *dst, unsigned parts) {
2628   for (unsigned i = 0; i < parts; i++)
2629     dst[i] = ~dst[i];
2630 }
2631
2632 /* Comparison (unsigned) of two bignums.  */
2633 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2634                      unsigned parts) {
2635   while (parts) {
2636     parts--;
2637     if (lhs[parts] != rhs[parts])
2638       return (lhs[parts] > rhs[parts]) ? 1 : -1;
2639   }
2640
2641   return 0;
2642 }
2643
2644 /* Set the least significant BITS bits of a bignum, clear the
2645    rest.  */
2646 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2647                                       unsigned bits) {
2648   unsigned i = 0;
2649   while (bits > APINT_BITS_PER_WORD) {
2650     dst[i++] = ~(WordType) 0;
2651     bits -= APINT_BITS_PER_WORD;
2652   }
2653
2654   if (bits)
2655     dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2656
2657   while (i < parts)
2658     dst[i++] = 0;
2659 }