contrib/llvm/lib/Support/APInt.cpp

   1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements a class to represent arbitrary precision integer
  11 // constant values and provide a variety of arithmetic operations on them.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "llvm/ADT/APInt.h"
  16 #include "llvm/ADT/ArrayRef.h"
  17 #include "llvm/ADT/FoldingSet.h"
  18 #include "llvm/ADT/Hashing.h"
  19 #include "llvm/ADT/SmallString.h"
  20 #include "llvm/ADT/StringRef.h"
  21 #include "llvm/Support/Debug.h"
  22 #include "llvm/Support/ErrorHandling.h"
  23 #include "llvm/Support/MathExtras.h"
  24 #include "llvm/Support/raw_ostream.h"
  25 #include <climits>
  26 #include <cmath>
  27 #include <cstdlib>
  28 #include <cstring>
  29 using namespace llvm;
  30
  31 #define DEBUG_TYPE "apint"
  32
  33 /// A utility function for allocating memory, checking for allocation failures,
  34 /// and ensuring the contents are zeroed.
  35 inline static uint64_t* getClearedMemory(unsigned numWords) {
  36   uint64_t * result = new uint64_t[numWords];
  37   assert(result && "APInt memory allocation fails!");
  38   memset(result, 0, numWords * sizeof(uint64_t));
  39   return result;
  40 }
  41
  42 /// A utility function for allocating memory and checking for allocation
  43 /// failure.  The content is not zeroed.
  44 inline static uint64_t* getMemory(unsigned numWords) {
  45   uint64_t * result = new uint64_t[numWords];
  46   assert(result && "APInt memory allocation fails!");
  47   return result;
  48 }
  49
  50 /// A utility function that converts a character to a digit.
  51 inline static unsigned getDigit(char cdigit, uint8_t radix) {
  52   unsigned r;
  53
  54   if (radix == 16 || radix == 36) {
  55     r = cdigit - '0';
  56     if (r <= 9)
  57       return r;
  58
  59     r = cdigit - 'A';
  60     if (r <= radix - 11U)
  61       return r + 10;
  62
  63     r = cdigit - 'a';
  64     if (r <= radix - 11U)
  65       return r + 10;
  66
  67     radix = 10;
  68   }
  69
  70   r = cdigit - '0';
  71   if (r < radix)
  72     return r;
  73
  74   return -1U;
  75 }
  76
  77
  78 void APInt::initSlowCase(uint64_t val, bool isSigned) {
  79   VAL = 0;
  80   pVal = getClearedMemory(getNumWords());
  81   pVal[0] = val;
  82   if (isSigned && int64_t(val) < 0)
  83     for (unsigned i = 1; i < getNumWords(); ++i)
  84       pVal[i] = -1ULL;
  85   clearUnusedBits();
  86 }
  87
  88 void APInt::initSlowCase(const APInt& that) {
  89   VAL = 0;
  90   pVal = getMemory(getNumWords());
  91   memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE);
  92 }
  93
  94 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
  95   assert(BitWidth && "Bitwidth too small");
  96   assert(bigVal.data() && "Null pointer detected!");
  97   if (isSingleWord())
  98     VAL = bigVal[0];
  99   else {
 100     // Get memory, cleared to 0
 101     VAL = 0;
 102     pVal = getClearedMemory(getNumWords());
 103     // Calculate the number of words to copy
 104     unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
 105     // Copy the words from bigVal to pVal
 106     memcpy(pVal, bigVal.data(), words * APINT_WORD_SIZE);
 107   }
 108   // Make sure unused high bits are cleared
 109   clearUnusedBits();
 110 }
 111
 112 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
 113   : BitWidth(numBits) {
 114   initFromArray(bigVal);
 115 }
 116
 117 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
 118   : BitWidth(numBits) {
 119   initFromArray(makeArrayRef(bigVal, numWords));
 120 }
 121
 122 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
 123   : VAL(0), BitWidth(numbits) {
 124   assert(BitWidth && "Bitwidth too small");
 125   fromString(numbits, Str, radix);
 126 }
 127
 128 void APInt::AssignSlowCase(const APInt& RHS) {
 129   // Don't do anything for X = X
 130   if (this == &RHS)
 131     return;
 132
 133   if (BitWidth == RHS.getBitWidth()) {
 134     // assume same bit-width single-word case is already handled
 135     assert(!isSingleWord());
 136     memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE);
 137     return;
 138   }
 139
 140   if (isSingleWord()) {
 141     // assume case where both are single words is already handled
 142     assert(!RHS.isSingleWord());
 143     VAL = 0;
 144     pVal = getMemory(RHS.getNumWords());
 145     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
 146   } else if (getNumWords() == RHS.getNumWords())
 147     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
 148   else if (RHS.isSingleWord()) {
 149     delete [] pVal;
 150     VAL = RHS.VAL;
 151   } else {
 152     delete [] pVal;
 153     pVal = getMemory(RHS.getNumWords());
 154     memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
 155   }
 156   BitWidth = RHS.BitWidth;
 157   clearUnusedBits();
 158 }
 159
 160 /// This method 'profiles' an APInt for use with FoldingSet.
 161 void APInt::Profile(FoldingSetNodeID& ID) const {
 162   ID.AddInteger(BitWidth);
 163
 164   if (isSingleWord()) {
 165     ID.AddInteger(VAL);
 166     return;
 167   }
 168
 169   unsigned NumWords = getNumWords();
 170   for (unsigned i = 0; i < NumWords; ++i)
 171     ID.AddInteger(pVal[i]);
 172 }
 173
 174 /// @brief Prefix increment operator. Increments the APInt by one.
 175 APInt& APInt::operator++() {
 176   if (isSingleWord())
 177     ++VAL;
 178   else
 179     tcIncrement(pVal, getNumWords());
 180   return clearUnusedBits();
 181 }
 182
 183 /// @brief Prefix decrement operator. Decrements the APInt by one.
 184 APInt& APInt::operator--() {
 185   if (isSingleWord())
 186     --VAL;
 187   else
 188     tcDecrement(pVal, getNumWords());
 189   return clearUnusedBits();
 190 }
 191
 192 /// Adds the RHS APint to this APInt.
 193 /// @returns this, after addition of RHS.
 194 /// @brief Addition assignment operator.
 195 APInt& APInt::operator+=(const APInt& RHS) {
 196   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 197   if (isSingleWord())
 198     VAL += RHS.VAL;
 199   else
 200     tcAdd(pVal, RHS.pVal, 0, getNumWords());
 201   return clearUnusedBits();
 202 }
 203
 204 APInt& APInt::operator+=(uint64_t RHS) {
 205   if (isSingleWord())
 206     VAL += RHS;
 207   else
 208     tcAddPart(pVal, RHS, getNumWords());
 209   return clearUnusedBits();
 210 }
 211
 212 /// Subtracts the RHS APInt from this APInt
 213 /// @returns this, after subtraction
 214 /// @brief Subtraction assignment operator.
 215 APInt& APInt::operator-=(const APInt& RHS) {
 216   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 217   if (isSingleWord())
 218     VAL -= RHS.VAL;
 219   else
 220     tcSubtract(pVal, RHS.pVal, 0, getNumWords());
 221   return clearUnusedBits();
 222 }
 223
 224 APInt& APInt::operator-=(uint64_t RHS) {
 225   if (isSingleWord())
 226     VAL -= RHS;
 227   else
 228     tcSubtractPart(pVal, RHS, getNumWords());
 229   return clearUnusedBits();
 230 }
 231
 232 /// Multiplies an integer array, x, by a uint64_t integer and places the result
 233 /// into dest.
 234 /// @returns the carry out of the multiplication.
 235 /// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
 236 static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
 237   // Split y into high 32-bit part (hy)  and low 32-bit part (ly)
 238   uint64_t ly = y & 0xffffffffULL, hy = y >> 32;
 239   uint64_t carry = 0;
 240
 241   // For each digit of x.
 242   for (unsigned i = 0; i < len; ++i) {
 243     // Split x into high and low words
 244     uint64_t lx = x[i] & 0xffffffffULL;
 245     uint64_t hx = x[i] >> 32;
 246     // hasCarry - A flag to indicate if there is a carry to the next digit.
 247     // hasCarry == 0, no carry
 248     // hasCarry == 1, has carry
 249     // hasCarry == 2, no carry and the calculation result == 0.
 250     uint8_t hasCarry = 0;
 251     dest[i] = carry + lx * ly;
 252     // Determine if the add above introduces carry.
 253     hasCarry = (dest[i] < carry) ? 1 : 0;
 254     carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0);
 255     // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
 256     // (2^32 - 1) + 2^32 = 2^64.
 257     hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
 258
 259     carry += (lx * hy) & 0xffffffffULL;
 260     dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL);
 261     carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) +
 262             (carry >> 32) + ((lx * hy) >> 32) + hx * hy;
 263   }
 264   return carry;
 265 }
 266
 267 /// Multiplies integer array x by integer array y and stores the result into
 268 /// the integer array dest. Note that dest's size must be >= xlen + ylen.
 269 /// @brief Generalized multiplication of integer arrays.
 270 static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[],
 271                 unsigned ylen) {
 272   dest[xlen] = mul_1(dest, x, xlen, y[0]);
 273   for (unsigned i = 1; i < ylen; ++i) {
 274     uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32;
 275     uint64_t carry = 0, lx = 0, hx = 0;
 276     for (unsigned j = 0; j < xlen; ++j) {
 277       lx = x[j] & 0xffffffffULL;
 278       hx = x[j] >> 32;
 279       // hasCarry - A flag to indicate if has carry.
 280       // hasCarry == 0, no carry
 281       // hasCarry == 1, has carry
 282       // hasCarry == 2, no carry and the calculation result == 0.
 283       uint8_t hasCarry = 0;
 284       uint64_t resul = carry + lx * ly;
 285       hasCarry = (resul < carry) ? 1 : 0;
 286       carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32);
 287       hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
 288
 289       carry += (lx * hy) & 0xffffffffULL;
 290       resul = (carry << 32) | (resul & 0xffffffffULL);
 291       dest[i+j] += resul;
 292       carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+
 293               (carry >> 32) + (dest[i+j] < resul ? 1 : 0) +
 294               ((lx * hy) >> 32) + hx * hy;
 295     }
 296     dest[i+xlen] = carry;
 297   }
 298 }
 299
 300 APInt& APInt::operator*=(const APInt& RHS) {
 301   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 302   if (isSingleWord()) {
 303     VAL *= RHS.VAL;
 304     clearUnusedBits();
 305     return *this;
 306   }
 307
 308   // Get some bit facts about LHS and check for zero
 309   unsigned lhsBits = getActiveBits();
 310   unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
 311   if (!lhsWords)
 312     // 0 * X ===> 0
 313     return *this;
 314
 315   // Get some bit facts about RHS and check for zero
 316   unsigned rhsBits = RHS.getActiveBits();
 317   unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
 318   if (!rhsWords) {
 319     // X * 0 ===> 0
 320     clearAllBits();
 321     return *this;
 322   }
 323
 324   // Allocate space for the result
 325   unsigned destWords = rhsWords + lhsWords;
 326   uint64_t *dest = getMemory(destWords);
 327
 328   // Perform the long multiply
 329   mul(dest, pVal, lhsWords, RHS.pVal, rhsWords);
 330
 331   // Copy result back into *this
 332   clearAllBits();
 333   unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
 334   memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
 335   clearUnusedBits();
 336
 337   // delete dest array and return
 338   delete[] dest;
 339   return *this;
 340 }
 341
 342 void APInt::AndAssignSlowCase(const APInt& RHS) {
 343   tcAnd(pVal, RHS.pVal, getNumWords());
 344 }
 345
 346 void APInt::OrAssignSlowCase(const APInt& RHS) {
 347   tcOr(pVal, RHS.pVal, getNumWords());
 348 }
 349
 350 void APInt::XorAssignSlowCase(const APInt& RHS) {
 351   tcXor(pVal, RHS.pVal, getNumWords());
 352 }
 353
 354 APInt APInt::operator*(const APInt& RHS) const {
 355   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 356   if (isSingleWord())
 357     return APInt(BitWidth, VAL * RHS.VAL);
 358   APInt Result(*this);
 359   Result *= RHS;
 360   return Result;
 361 }
 362
 363 bool APInt::EqualSlowCase(const APInt& RHS) const {
 364   return std::equal(pVal, pVal + getNumWords(), RHS.pVal);
 365 }
 366
 367 bool APInt::ult(const APInt& RHS) const {
 368   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
 369   if (isSingleWord())
 370     return VAL < RHS.VAL;
 371
 372   // Get active bit length of both operands
 373   unsigned n1 = getActiveBits();
 374   unsigned n2 = RHS.getActiveBits();
 375
 376   // If magnitude of LHS is less than RHS, return true.
 377   if (n1 < n2)
 378     return true;
 379
 380   // If magnitude of RHS is greater than LHS, return false.
 381   if (n2 < n1)
 382     return false;
 383
 384   // If they both fit in a word, just compare the low order word
 385   if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
 386     return pVal[0] < RHS.pVal[0];
 387
 388   // Otherwise, compare all words
 389   unsigned topWord = whichWord(std::max(n1,n2)-1);
 390   for (int i = topWord; i >= 0; --i) {
 391     if (pVal[i] > RHS.pVal[i])
 392       return false;
 393     if (pVal[i] < RHS.pVal[i])
 394       return true;
 395   }
 396   return false;
 397 }
 398
 399 bool APInt::slt(const APInt& RHS) const {
 400   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
 401   if (isSingleWord()) {
 402     int64_t lhsSext = SignExtend64(VAL, BitWidth);
 403     int64_t rhsSext = SignExtend64(RHS.VAL, BitWidth);
 404     return lhsSext < rhsSext;
 405   }
 406
 407   bool lhsNeg = isNegative();
 408   bool rhsNeg = RHS.isNegative();
 409
 410   // If the sign bits don't match, then (LHS < RHS) if LHS is negative
 411   if (lhsNeg != rhsNeg)
 412     return lhsNeg;
 413
 414   // Otherwise we can just use an unsigned comparison, because even negative
 415   // numbers compare correctly this way if both have the same signed-ness.
 416   return ult(RHS);
 417 }
 418
 419 void APInt::setBit(unsigned bitPosition) {
 420   if (isSingleWord())
 421     VAL |= maskBit(bitPosition);
 422   else
 423     pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
 424 }
 425
 426 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
 427   unsigned loWord = whichWord(loBit);
 428   unsigned hiWord = whichWord(hiBit);
 429
 430   // Create an initial mask for the low word with zeros below loBit.
 431   uint64_t loMask = UINT64_MAX << whichBit(loBit);
 432
 433   // If hiBit is not aligned, we need a high mask.
 434   unsigned hiShiftAmt = whichBit(hiBit);
 435   if (hiShiftAmt != 0) {
 436     // Create a high mask with zeros above hiBit.
 437     uint64_t hiMask = UINT64_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
 438     // If loWord and hiWord are equal, then we combine the masks. Otherwise,
 439     // set the bits in hiWord.
 440     if (hiWord == loWord)
 441       loMask &= hiMask;
 442     else
 443       pVal[hiWord] |= hiMask;
 444   }
 445   // Apply the mask to the low word.
 446   pVal[loWord] |= loMask;
 447
 448   // Fill any words between loWord and hiWord with all ones.
 449   for (unsigned word = loWord + 1; word < hiWord; ++word)
 450     pVal[word] = UINT64_MAX;
 451 }
 452
 453 /// Set the given bit to 0 whose position is given as "bitPosition".
 454 /// @brief Set a given bit to 0.
 455 void APInt::clearBit(unsigned bitPosition) {
 456   if (isSingleWord())
 457     VAL &= ~maskBit(bitPosition);
 458   else
 459     pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
 460 }
 461
 462 /// @brief Toggle every bit to its opposite value.
 463 void APInt::flipAllBitsSlowCase() {
 464   tcComplement(pVal, getNumWords());
 465   clearUnusedBits();
 466 }
 467
 468 /// Toggle a given bit to its opposite value whose position is given
 469 /// as "bitPosition".
 470 /// @brief Toggles a given bit to its opposite value.
 471 void APInt::flipBit(unsigned bitPosition) {
 472   assert(bitPosition < BitWidth && "Out of the bit-width range!");
 473   if ((*this)[bitPosition]) clearBit(bitPosition);
 474   else setBit(bitPosition);
 475 }
 476
 477 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
 478   unsigned subBitWidth = subBits.getBitWidth();
 479   assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
 480          "Illegal bit insertion");
 481
 482   // Insertion is a direct copy.
 483   if (subBitWidth == BitWidth) {
 484     *this = subBits;
 485     return;
 486   }
 487
 488   // Single word result can be done as a direct bitmask.
 489   if (isSingleWord()) {
 490     uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
 491     VAL &= ~(mask << bitPosition);
 492     VAL |= (subBits.VAL << bitPosition);
 493     return;
 494   }
 495
 496   unsigned loBit = whichBit(bitPosition);
 497   unsigned loWord = whichWord(bitPosition);
 498   unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
 499
 500   // Insertion within a single word can be done as a direct bitmask.
 501   if (loWord == hi1Word) {
 502     uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
 503     pVal[loWord] &= ~(mask << loBit);
 504     pVal[loWord] |= (subBits.VAL << loBit);
 505     return;
 506   }
 507
 508   // Insert on word boundaries.
 509   if (loBit == 0) {
 510     // Direct copy whole words.
 511     unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
 512     memcpy(pVal + loWord, subBits.getRawData(),
 513            numWholeSubWords * APINT_WORD_SIZE);
 514
 515     // Mask+insert remaining bits.
 516     unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
 517     if (remainingBits != 0) {
 518       uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - remainingBits);
 519       pVal[hi1Word] &= ~mask;
 520       pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
 521     }
 522     return;
 523   }
 524
 525   // General case - set/clear individual bits in dst based on src.
 526   // TODO - there is scope for optimization here, but at the moment this code
 527   // path is barely used so prefer readability over performance.
 528   for (unsigned i = 0; i != subBitWidth; ++i) {
 529     if (subBits[i])
 530       setBit(bitPosition + i);
 531     else
 532       clearBit(bitPosition + i);
 533   }
 534 }
 535
 536 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
 537   assert(numBits > 0 && "Can't extract zero bits");
 538   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
 539          "Illegal bit extraction");
 540
 541   if (isSingleWord())
 542     return APInt(numBits, VAL >> bitPosition);
 543
 544   unsigned loBit = whichBit(bitPosition);
 545   unsigned loWord = whichWord(bitPosition);
 546   unsigned hiWord = whichWord(bitPosition + numBits - 1);
 547
 548   // Single word result extracting bits from a single word source.
 549   if (loWord == hiWord)
 550     return APInt(numBits, pVal[loWord] >> loBit);
 551
 552   // Extracting bits that start on a source word boundary can be done
 553   // as a fast memory copy.
 554   if (loBit == 0)
 555     return APInt(numBits, makeArrayRef(pVal + loWord, 1 + hiWord - loWord));
 556
 557   // General case - shift + copy source words directly into place.
 558   APInt Result(numBits, 0);
 559   unsigned NumSrcWords = getNumWords();
 560   unsigned NumDstWords = Result.getNumWords();
 561
 562   for (unsigned word = 0; word < NumDstWords; ++word) {
 563     uint64_t w0 = pVal[loWord + word];
 564     uint64_t w1 =
 565         (loWord + word + 1) < NumSrcWords ? pVal[loWord + word + 1] : 0;
 566     Result.pVal[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
 567   }
 568
 569   return Result.clearUnusedBits();
 570 }
 571
 572 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
 573   assert(!str.empty() && "Invalid string length");
 574   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
 575           radix == 36) &&
 576          "Radix should be 2, 8, 10, 16, or 36!");
 577
 578   size_t slen = str.size();
 579
 580   // Each computation below needs to know if it's negative.
 581   StringRef::iterator p = str.begin();
 582   unsigned isNegative = *p == '-';
 583   if (*p == '-' || *p == '+') {
 584     p++;
 585     slen--;
 586     assert(slen && "String is only a sign, needs a value.");
 587   }
 588
 589   // For radixes of power-of-two values, the bits required is accurately and
 590   // easily computed
 591   if (radix == 2)
 592     return slen + isNegative;
 593   if (radix == 8)
 594     return slen * 3 + isNegative;
 595   if (radix == 16)
 596     return slen * 4 + isNegative;
 597
 598   // FIXME: base 36
 599
 600   // This is grossly inefficient but accurate. We could probably do something
 601   // with a computation of roughly slen*64/20 and then adjust by the value of
 602   // the first few digits. But, I'm not sure how accurate that could be.
 603
 604   // Compute a sufficient number of bits that is always large enough but might
 605   // be too large. This avoids the assertion in the constructor. This
 606   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
 607   // bits in that case.
 608   unsigned sufficient
 609     = radix == 10? (slen == 1 ? 4 : slen * 64/18)
 610                  : (slen == 1 ? 7 : slen * 16/3);
 611
 612   // Convert to the actual binary value.
 613   APInt tmp(sufficient, StringRef(p, slen), radix);
 614
 615   // Compute how many bits are required. If the log is infinite, assume we need
 616   // just bit.
 617   unsigned log = tmp.logBase2();
 618   if (log == (unsigned)-1) {
 619     return isNegative + 1;
 620   } else {
 621     return isNegative + log + 1;
 622   }
 623 }
 624
 625 hash_code llvm::hash_value(const APInt &Arg) {
 626   if (Arg.isSingleWord())
 627     return hash_combine(Arg.VAL);
 628
 629   return hash_combine_range(Arg.pVal, Arg.pVal + Arg.getNumWords());
 630 }
 631
 632 bool APInt::isSplat(unsigned SplatSizeInBits) const {
 633   assert(getBitWidth() % SplatSizeInBits == 0 &&
 634          "SplatSizeInBits must divide width!");
 635   // We can check that all parts of an integer are equal by making use of a
 636   // little trick: rotate and check if it's still the same value.
 637   return *this == rotl(SplatSizeInBits);
 638 }
 639
 640 /// This function returns the high "numBits" bits of this APInt.
 641 APInt APInt::getHiBits(unsigned numBits) const {
 642   return this->lshr(BitWidth - numBits);
 643 }
 644
 645 /// This function returns the low "numBits" bits of this APInt.
 646 APInt APInt::getLoBits(unsigned numBits) const {
 647   APInt Result(getLowBitsSet(BitWidth, numBits));
 648   Result &= *this;
 649   return Result;
 650 }
 651
 652 unsigned APInt::countLeadingZerosSlowCase() const {
 653   unsigned Count = 0;
 654   for (int i = getNumWords()-1; i >= 0; --i) {
 655     uint64_t V = pVal[i];
 656     if (V == 0)
 657       Count += APINT_BITS_PER_WORD;
 658     else {
 659       Count += llvm::countLeadingZeros(V);
 660       break;
 661     }
 662   }
 663   // Adjust for unused bits in the most significant word (they are zero).
 664   unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
 665   Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
 666   return Count;
 667 }
 668
 669 unsigned APInt::countLeadingOnes() const {
 670   if (isSingleWord())
 671     return llvm::countLeadingOnes(VAL << (APINT_BITS_PER_WORD - BitWidth));
 672
 673   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
 674   unsigned shift;
 675   if (!highWordBits) {
 676     highWordBits = APINT_BITS_PER_WORD;
 677     shift = 0;
 678   } else {
 679     shift = APINT_BITS_PER_WORD - highWordBits;
 680   }
 681   int i = getNumWords() - 1;
 682   unsigned Count = llvm::countLeadingOnes(pVal[i] << shift);
 683   if (Count == highWordBits) {
 684     for (i--; i >= 0; --i) {
 685       if (pVal[i] == -1ULL)
 686         Count += APINT_BITS_PER_WORD;
 687       else {
 688         Count += llvm::countLeadingOnes(pVal[i]);
 689         break;
 690       }
 691     }
 692   }
 693   return Count;
 694 }
 695
 696 unsigned APInt::countTrailingZeros() const {
 697   if (isSingleWord())
 698     return std::min(unsigned(llvm::countTrailingZeros(VAL)), BitWidth);
 699   unsigned Count = 0;
 700   unsigned i = 0;
 701   for (; i < getNumWords() && pVal[i] == 0; ++i)
 702     Count += APINT_BITS_PER_WORD;
 703   if (i < getNumWords())
 704     Count += llvm::countTrailingZeros(pVal[i]);
 705   return std::min(Count, BitWidth);
 706 }
 707
 708 unsigned APInt::countTrailingOnesSlowCase() const {
 709   unsigned Count = 0;
 710   unsigned i = 0;
 711   for (; i < getNumWords() && pVal[i] == -1ULL; ++i)
 712     Count += APINT_BITS_PER_WORD;
 713   if (i < getNumWords())
 714     Count += llvm::countTrailingOnes(pVal[i]);
 715   return std::min(Count, BitWidth);
 716 }
 717
 718 unsigned APInt::countPopulationSlowCase() const {
 719   unsigned Count = 0;
 720   for (unsigned i = 0; i < getNumWords(); ++i)
 721     Count += llvm::countPopulation(pVal[i]);
 722   return Count;
 723 }
 724
 725 bool APInt::intersectsSlowCase(const APInt &RHS) const {
 726   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
 727     if ((pVal[i] & RHS.pVal[i]) != 0)
 728       return true;
 729
 730   return false;
 731 }
 732
 733 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
 734   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
 735     if ((pVal[i] & ~RHS.pVal[i]) != 0)
 736       return false;
 737
 738   return true;
 739 }
 740
 741 APInt APInt::byteSwap() const {
 742   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
 743   if (BitWidth == 16)
 744     return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
 745   if (BitWidth == 32)
 746     return APInt(BitWidth, ByteSwap_32(unsigned(VAL)));
 747   if (BitWidth == 48) {
 748     unsigned Tmp1 = unsigned(VAL >> 16);
 749     Tmp1 = ByteSwap_32(Tmp1);
 750     uint16_t Tmp2 = uint16_t(VAL);
 751     Tmp2 = ByteSwap_16(Tmp2);
 752     return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
 753   }
 754   if (BitWidth == 64)
 755     return APInt(BitWidth, ByteSwap_64(VAL));
 756
 757   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
 758   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
 759     Result.pVal[I] = ByteSwap_64(pVal[N - I - 1]);
 760   if (Result.BitWidth != BitWidth) {
 761     Result.lshrInPlace(Result.BitWidth - BitWidth);
 762     Result.BitWidth = BitWidth;
 763   }
 764   return Result;
 765 }
 766
 767 APInt APInt::reverseBits() const {
 768   switch (BitWidth) {
 769   case 64:
 770     return APInt(BitWidth, llvm::reverseBits<uint64_t>(VAL));
 771   case 32:
 772     return APInt(BitWidth, llvm::reverseBits<uint32_t>(VAL));
 773   case 16:
 774     return APInt(BitWidth, llvm::reverseBits<uint16_t>(VAL));
 775   case 8:
 776     return APInt(BitWidth, llvm::reverseBits<uint8_t>(VAL));
 777   default:
 778     break;
 779   }
 780
 781   APInt Val(*this);
 782   APInt Reversed(BitWidth, 0);
 783   unsigned S = BitWidth;
 784
 785   for (; Val != 0; Val.lshrInPlace(1)) {
 786     Reversed <<= 1;
 787     Reversed |= Val[0];
 788     --S;
 789   }
 790
 791   Reversed <<= S;
 792   return Reversed;
 793 }
 794
 795 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
 796   // Fast-path a common case.
 797   if (A == B) return A;
 798
 799   // Corner cases: if either operand is zero, the other is the gcd.
 800   if (!A) return B;
 801   if (!B) return A;
 802
 803   // Count common powers of 2 and remove all other powers of 2.
 804   unsigned Pow2;
 805   {
 806     unsigned Pow2_A = A.countTrailingZeros();
 807     unsigned Pow2_B = B.countTrailingZeros();
 808     if (Pow2_A > Pow2_B) {
 809       A.lshrInPlace(Pow2_A - Pow2_B);
 810       Pow2 = Pow2_B;
 811     } else if (Pow2_B > Pow2_A) {
 812       B.lshrInPlace(Pow2_B - Pow2_A);
 813       Pow2 = Pow2_A;
 814     } else {
 815       Pow2 = Pow2_A;
 816     }
 817   }
 818
 819   // Both operands are odd multiples of 2^Pow_2:
 820   //
 821   //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
 822   //
 823   // This is a modified version of Stein's algorithm, taking advantage of
 824   // efficient countTrailingZeros().
 825   while (A != B) {
 826     if (A.ugt(B)) {
 827       A -= B;
 828       A.lshrInPlace(A.countTrailingZeros() - Pow2);
 829     } else {
 830       B -= A;
 831       B.lshrInPlace(B.countTrailingZeros() - Pow2);
 832     }
 833   }
 834
 835   return A;
 836 }
 837
 838 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
 839   union {
 840     double D;
 841     uint64_t I;
 842   } T;
 843   T.D = Double;
 844
 845   // Get the sign bit from the highest order bit
 846   bool isNeg = T.I >> 63;
 847
 848   // Get the 11-bit exponent and adjust for the 1023 bit bias
 849   int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
 850
 851   // If the exponent is negative, the value is < 0 so just return 0.
 852   if (exp < 0)
 853     return APInt(width, 0u);
 854
 855   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
 856   uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
 857
 858   // If the exponent doesn't shift all bits out of the mantissa
 859   if (exp < 52)
 860     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
 861                     APInt(width, mantissa >> (52 - exp));
 862
 863   // If the client didn't provide enough bits for us to shift the mantissa into
 864   // then the result is undefined, just return 0
 865   if (width <= exp - 52)
 866     return APInt(width, 0);
 867
 868   // Otherwise, we have to shift the mantissa bits up to the right location
 869   APInt Tmp(width, mantissa);
 870   Tmp = Tmp.shl((unsigned)exp - 52);
 871   return isNeg ? -Tmp : Tmp;
 872 }
 873
 874 /// This function converts this APInt to a double.
 875 /// The layout for double is as following (IEEE Standard 754):
 876 ///  --------------------------------------
 877 /// |  Sign    Exponent    Fraction    Bias |
 878 /// |-------------------------------------- |
 879 /// |  1[63]   11[62-52]   52[51-00]   1023 |
 880 ///  --------------------------------------
 881 double APInt::roundToDouble(bool isSigned) const {
 882
 883   // Handle the simple case where the value is contained in one uint64_t.
 884   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
 885   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
 886     if (isSigned) {
 887       int64_t sext = SignExtend64(getWord(0), BitWidth);
 888       return double(sext);
 889     } else
 890       return double(getWord(0));
 891   }
 892
 893   // Determine if the value is negative.
 894   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
 895
 896   // Construct the absolute value if we're negative.
 897   APInt Tmp(isNeg ? -(*this) : (*this));
 898
 899   // Figure out how many bits we're using.
 900   unsigned n = Tmp.getActiveBits();
 901
 902   // The exponent (without bias normalization) is just the number of bits
 903   // we are using. Note that the sign bit is gone since we constructed the
 904   // absolute value.
 905   uint64_t exp = n;
 906
 907   // Return infinity for exponent overflow
 908   if (exp > 1023) {
 909     if (!isSigned || !isNeg)
 910       return std::numeric_limits<double>::infinity();
 911     else
 912       return -std::numeric_limits<double>::infinity();
 913   }
 914   exp += 1023; // Increment for 1023 bias
 915
 916   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
 917   // extract the high 52 bits from the correct words in pVal.
 918   uint64_t mantissa;
 919   unsigned hiWord = whichWord(n-1);
 920   if (hiWord == 0) {
 921     mantissa = Tmp.pVal[0];
 922     if (n > 52)
 923       mantissa >>= n - 52; // shift down, we want the top 52 bits.
 924   } else {
 925     assert(hiWord > 0 && "huh?");
 926     uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
 927     uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
 928     mantissa = hibits | lobits;
 929   }
 930
 931   // The leading bit of mantissa is implicit, so get rid of it.
 932   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
 933   union {
 934     double D;
 935     uint64_t I;
 936   } T;
 937   T.I = sign | (exp << 52) | mantissa;
 938   return T.D;
 939 }
 940
 941 // Truncate to new width.
 942 APInt APInt::trunc(unsigned width) const {
 943   assert(width < BitWidth && "Invalid APInt Truncate request");
 944   assert(width && "Can't truncate to 0 bits");
 945
 946   if (width <= APINT_BITS_PER_WORD)
 947     return APInt(width, getRawData()[0]);
 948
 949   APInt Result(getMemory(getNumWords(width)), width);
 950
 951   // Copy full words.
 952   unsigned i;
 953   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
 954     Result.pVal[i] = pVal[i];
 955
 956   // Truncate and copy any partial word.
 957   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
 958   if (bits != 0)
 959     Result.pVal[i] = pVal[i] << bits >> bits;
 960
 961   return Result;
 962 }
 963
 964 // Sign extend to a new width.
 965 APInt APInt::sext(unsigned width) const {
 966   assert(width > BitWidth && "Invalid APInt SignExtend request");
 967
 968   if (width <= APINT_BITS_PER_WORD) {
 969     uint64_t val = VAL << (APINT_BITS_PER_WORD - BitWidth);
 970     val = (int64_t)val >> (width - BitWidth);
 971     return APInt(width, val >> (APINT_BITS_PER_WORD - width));
 972   }
 973
 974   APInt Result(getMemory(getNumWords(width)), width);
 975
 976   // Copy full words.
 977   unsigned i;
 978   uint64_t word = 0;
 979   for (i = 0; i != BitWidth / APINT_BITS_PER_WORD; i++) {
 980     word = getRawData()[i];
 981     Result.pVal[i] = word;
 982   }
 983
 984   // Read and sign-extend any partial word.
 985   unsigned bits = (0 - BitWidth) % APINT_BITS_PER_WORD;
 986   if (bits != 0)
 987     word = (int64_t)getRawData()[i] << bits >> bits;
 988   else
 989     word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
 990
 991   // Write remaining full words.
 992   for (; i != width / APINT_BITS_PER_WORD; i++) {
 993     Result.pVal[i] = word;
 994     word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
 995   }
 996
 997   // Write any partial word.
 998   bits = (0 - width) % APINT_BITS_PER_WORD;
 999   if (bits != 0)
1000     Result.pVal[i] = word << bits >> bits;
1001
1002   return Result;
1003 }
1004
1005 //  Zero extend to a new width.
1006 APInt APInt::zext(unsigned width) const {
1007   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
1008
1009   if (width <= APINT_BITS_PER_WORD)
1010     return APInt(width, VAL);
1011
1012   APInt Result(getMemory(getNumWords(width)), width);
1013
1014   // Copy words.
1015   unsigned i;
1016   for (i = 0; i != getNumWords(); i++)
1017     Result.pVal[i] = getRawData()[i];
1018
1019   // Zero remaining words.
1020   memset(&Result.pVal[i], 0, (Result.getNumWords() - i) * APINT_WORD_SIZE);
1021
1022   return Result;
1023 }
1024
1025 APInt APInt::zextOrTrunc(unsigned width) const {
1026   if (BitWidth < width)
1027     return zext(width);
1028   if (BitWidth > width)
1029     return trunc(width);
1030   return *this;
1031 }
1032
1033 APInt APInt::sextOrTrunc(unsigned width) const {
1034   if (BitWidth < width)
1035     return sext(width);
1036   if (BitWidth > width)
1037     return trunc(width);
1038   return *this;
1039 }
1040
1041 APInt APInt::zextOrSelf(unsigned width) const {
1042   if (BitWidth < width)
1043     return zext(width);
1044   return *this;
1045 }
1046
1047 APInt APInt::sextOrSelf(unsigned width) const {
1048   if (BitWidth < width)
1049     return sext(width);
1050   return *this;
1051 }
1052
1053 /// Arithmetic right-shift this APInt by shiftAmt.
1054 /// @brief Arithmetic right-shift function.
1055 APInt APInt::ashr(const APInt &shiftAmt) const {
1056   return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1057 }
1058
1059 /// Arithmetic right-shift this APInt by shiftAmt.
1060 /// @brief Arithmetic right-shift function.
1061 APInt APInt::ashr(unsigned shiftAmt) const {
1062   assert(shiftAmt <= BitWidth && "Invalid shift amount");
1063   // Handle a degenerate case
1064   if (shiftAmt == 0)
1065     return *this;
1066
1067   // Handle single word shifts with built-in ashr
1068   if (isSingleWord()) {
1069     if (shiftAmt == BitWidth)
1070       return APInt(BitWidth, 0); // undefined
1071     return APInt(BitWidth, SignExtend64(VAL, BitWidth) >> shiftAmt);
1072   }
1073
1074   // If all the bits were shifted out, the result is, technically, undefined.
1075   // We return -1 if it was negative, 0 otherwise. We check this early to avoid
1076   // issues in the algorithm below.
1077   if (shiftAmt == BitWidth) {
1078     if (isNegative())
1079       return APInt(BitWidth, -1ULL, true);
1080     else
1081       return APInt(BitWidth, 0);
1082   }
1083
1084   // Create some space for the result.
1085   uint64_t * val = new uint64_t[getNumWords()];
1086
1087   // Compute some values needed by the following shift algorithms
1088   unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word
1089   unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift
1090   unsigned breakWord = getNumWords() - 1 - offset; // last word affected
1091   unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word?
1092   if (bitsInWord == 0)
1093     bitsInWord = APINT_BITS_PER_WORD;
1094
1095   // If we are shifting whole words, just move whole words
1096   if (wordShift == 0) {
1097     // Move the words containing significant bits
1098     for (unsigned i = 0; i <= breakWord; ++i)
1099       val[i] = pVal[i+offset]; // move whole word
1100
1101     // Adjust the top significant word for sign bit fill, if negative
1102     if (isNegative())
1103       if (bitsInWord < APINT_BITS_PER_WORD)
1104         val[breakWord] |= ~0ULL << bitsInWord; // set high bits
1105   } else {
1106     // Shift the low order words
1107     for (unsigned i = 0; i < breakWord; ++i) {
1108       // This combines the shifted corresponding word with the low bits from
1109       // the next word (shifted into this word's high bits).
1110       val[i] = (pVal[i+offset] >> wordShift) |
1111                (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1112     }
1113
1114     // Shift the break word. In this case there are no bits from the next word
1115     // to include in this word.
1116     val[breakWord] = pVal[breakWord+offset] >> wordShift;
1117
1118     // Deal with sign extension in the break word, and possibly the word before
1119     // it.
1120     if (isNegative()) {
1121       if (wordShift > bitsInWord) {
1122         if (breakWord > 0)
1123           val[breakWord-1] |=
1124             ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord));
1125         val[breakWord] |= ~0ULL;
1126       } else
1127         val[breakWord] |= (~0ULL << (bitsInWord - wordShift));
1128     }
1129   }
1130
1131   // Remaining words are 0 or -1, just assign them.
1132   uint64_t fillValue = (isNegative() ? -1ULL : 0);
1133   for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1134     val[i] = fillValue;
1135   APInt Result(val, BitWidth);
1136   Result.clearUnusedBits();
1137   return Result;
1138 }
1139
1140 /// Logical right-shift this APInt by shiftAmt.
1141 /// @brief Logical right-shift function.
1142 void APInt::lshrInPlace(const APInt &shiftAmt) {
1143   lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
1144 }
1145
1146 /// Logical right-shift this APInt by shiftAmt.
1147 /// @brief Logical right-shift function.
1148 void APInt::lshrSlowCase(unsigned ShiftAmt) {
1149   tcShiftRight(pVal, getNumWords(), ShiftAmt);
1150 }
1151
1152 /// Left-shift this APInt by shiftAmt.
1153 /// @brief Left-shift function.
1154 APInt APInt::shl(const APInt &shiftAmt) const {
1155   // It's undefined behavior in C to shift by BitWidth or greater.
1156   return shl((unsigned)shiftAmt.getLimitedValue(BitWidth));
1157 }
1158
1159 void APInt::shlSlowCase(unsigned ShiftAmt) {
1160   tcShiftLeft(pVal, getNumWords(), ShiftAmt);
1161   clearUnusedBits();
1162 }
1163
1164 // Calculate the rotate amount modulo the bit width.
1165 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
1166   unsigned rotBitWidth = rotateAmt.getBitWidth();
1167   APInt rot = rotateAmt;
1168   if (rotBitWidth < BitWidth) {
1169     // Extend the rotate APInt, so that the urem doesn't divide by 0.
1170     // e.g. APInt(1, 32) would give APInt(1, 0).
1171     rot = rotateAmt.zext(BitWidth);
1172   }
1173   rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
1174   return rot.getLimitedValue(BitWidth);
1175 }
1176
1177 APInt APInt::rotl(const APInt &rotateAmt) const {
1178   return rotl(rotateModulo(BitWidth, rotateAmt));
1179 }
1180
1181 APInt APInt::rotl(unsigned rotateAmt) const {
1182   rotateAmt %= BitWidth;
1183   if (rotateAmt == 0)
1184     return *this;
1185   return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1186 }
1187
1188 APInt APInt::rotr(const APInt &rotateAmt) const {
1189   return rotr(rotateModulo(BitWidth, rotateAmt));
1190 }
1191
1192 APInt APInt::rotr(unsigned rotateAmt) const {
1193   rotateAmt %= BitWidth;
1194   if (rotateAmt == 0)
1195     return *this;
1196   return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1197 }
1198
1199 // Square Root - this method computes and returns the square root of "this".
1200 // Three mechanisms are used for computation. For small values (<= 5 bits),
1201 // a table lookup is done. This gets some performance for common cases. For
1202 // values using less than 52 bits, the value is converted to double and then
1203 // the libc sqrt function is called. The result is rounded and then converted
1204 // back to a uint64_t which is then used to construct the result. Finally,
1205 // the Babylonian method for computing square roots is used.
1206 APInt APInt::sqrt() const {
1207
1208   // Determine the magnitude of the value.
1209   unsigned magnitude = getActiveBits();
1210
1211   // Use a fast table for some small values. This also gets rid of some
1212   // rounding errors in libc sqrt for small values.
1213   if (magnitude <= 5) {
1214     static const uint8_t results[32] = {
1215       /*     0 */ 0,
1216       /*  1- 2 */ 1, 1,
1217       /*  3- 6 */ 2, 2, 2, 2,
1218       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1219       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1220       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1221       /*    31 */ 6
1222     };
1223     return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]);
1224   }
1225
1226   // If the magnitude of the value fits in less than 52 bits (the precision of
1227   // an IEEE double precision floating point value), then we can use the
1228   // libc sqrt function which will probably use a hardware sqrt computation.
1229   // This should be faster than the algorithm below.
1230   if (magnitude < 52) {
1231     return APInt(BitWidth,
1232                  uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
1233   }
1234
1235   // Okay, all the short cuts are exhausted. We must compute it. The following
1236   // is a classical Babylonian method for computing the square root. This code
1237   // was adapted to APInt from a wikipedia article on such computations.
1238   // See http://www.wikipedia.org/ and go to the page named
1239   // Calculate_an_integer_square_root.
1240   unsigned nbits = BitWidth, i = 4;
1241   APInt testy(BitWidth, 16);
1242   APInt x_old(BitWidth, 1);
1243   APInt x_new(BitWidth, 0);
1244   APInt two(BitWidth, 2);
1245
1246   // Select a good starting value using binary logarithms.
1247   for (;; i += 2, testy = testy.shl(2))
1248     if (i >= nbits || this->ule(testy)) {
1249       x_old = x_old.shl(i / 2);
1250       break;
1251     }
1252
1253   // Use the Babylonian method to arrive at the integer square root:
1254   for (;;) {
1255     x_new = (this->udiv(x_old) + x_old).udiv(two);
1256     if (x_old.ule(x_new))
1257       break;
1258     x_old = x_new;
1259   }
1260
1261   // Make sure we return the closest approximation
1262   // NOTE: The rounding calculation below is correct. It will produce an
1263   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1264   // determined to be a rounding issue with pari/gp as it begins to use a
1265   // floating point representation after 192 bits. There are no discrepancies
1266   // between this algorithm and pari/gp for bit widths < 192 bits.
1267   APInt square(x_old * x_old);
1268   APInt nextSquare((x_old + 1) * (x_old +1));
1269   if (this->ult(square))
1270     return x_old;
1271   assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1272   APInt midpoint((nextSquare - square).udiv(two));
1273   APInt offset(*this - square);
1274   if (offset.ult(midpoint))
1275     return x_old;
1276   return x_old + 1;
1277 }
1278
1279 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1280 /// iterative extended Euclidean algorithm is used to solve for this value,
1281 /// however we simplify it to speed up calculating only the inverse, and take
1282 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1283 /// (potentially large) APInts around.
1284 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1285   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1286
1287   // Using the properties listed at the following web page (accessed 06/21/08):
1288   //   http://www.numbertheory.org/php/euclid.html
1289   // (especially the properties numbered 3, 4 and 9) it can be proved that
1290   // BitWidth bits suffice for all the computations in the algorithm implemented
1291   // below. More precisely, this number of bits suffice if the multiplicative
1292   // inverse exists, but may not suffice for the general extended Euclidean
1293   // algorithm.
1294
1295   APInt r[2] = { modulo, *this };
1296   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1297   APInt q(BitWidth, 0);
1298
1299   unsigned i;
1300   for (i = 0; r[i^1] != 0; i ^= 1) {
1301     // An overview of the math without the confusing bit-flipping:
1302     // q = r[i-2] / r[i-1]
1303     // r[i] = r[i-2] % r[i-1]
1304     // t[i] = t[i-2] - t[i-1] * q
1305     udivrem(r[i], r[i^1], q, r[i]);
1306     t[i] -= t[i^1] * q;
1307   }
1308
1309   // If this APInt and the modulo are not coprime, there is no multiplicative
1310   // inverse, so return 0. We check this by looking at the next-to-last
1311   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1312   // algorithm.
1313   if (r[i] != 1)
1314     return APInt(BitWidth, 0);
1315
1316   // The next-to-last t is the multiplicative inverse.  However, we are
1317   // interested in a positive inverse. Calcuate a positive one from a negative
1318   // one if necessary. A simple addition of the modulo suffices because
1319   // abs(t[i]) is known to be less than *this/2 (see the link above).
1320   return t[i].isNegative() ? t[i] + modulo : t[i];
1321 }
1322
1323 /// Calculate the magic numbers required to implement a signed integer division
1324 /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1325 /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1326 /// Warren, Jr., chapter 10.
1327 APInt::ms APInt::magic() const {
1328   const APInt& d = *this;
1329   unsigned p;
1330   APInt ad, anc, delta, q1, r1, q2, r2, t;
1331   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1332   struct ms mag;
1333
1334   ad = d.abs();
1335   t = signedMin + (d.lshr(d.getBitWidth() - 1));
1336   anc = t - 1 - t.urem(ad);   // absolute value of nc
1337   p = d.getBitWidth() - 1;    // initialize p
1338   q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1339   r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1340   q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1341   r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1342   do {
1343     p = p + 1;
1344     q1 = q1<<1;          // update q1 = 2p/abs(nc)
1345     r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1346     if (r1.uge(anc)) {  // must be unsigned comparison
1347       q1 = q1 + 1;
1348       r1 = r1 - anc;
1349     }
1350     q2 = q2<<1;          // update q2 = 2p/abs(d)
1351     r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1352     if (r2.uge(ad)) {   // must be unsigned comparison
1353       q2 = q2 + 1;
1354       r2 = r2 - ad;
1355     }
1356     delta = ad - r2;
1357   } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1358
1359   mag.m = q2 + 1;
1360   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1361   mag.s = p - d.getBitWidth();          // resulting shift
1362   return mag;
1363 }
1364
1365 /// Calculate the magic numbers required to implement an unsigned integer
1366 /// division by a constant as a sequence of multiplies, adds and shifts.
1367 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1368 /// S. Warren, Jr., chapter 10.
1369 /// LeadingZeros can be used to simplify the calculation if the upper bits
1370 /// of the divided value are known zero.
1371 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1372   const APInt& d = *this;
1373   unsigned p;
1374   APInt nc, delta, q1, r1, q2, r2;
1375   struct mu magu;
1376   magu.a = 0;               // initialize "add" indicator
1377   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1378   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1379   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1380
1381   nc = allOnes - (allOnes - d).urem(d);
1382   p = d.getBitWidth() - 1;  // initialize p
1383   q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1384   r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1385   q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1386   r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1387   do {
1388     p = p + 1;
1389     if (r1.uge(nc - r1)) {
1390       q1 = q1 + q1 + 1;  // update q1
1391       r1 = r1 + r1 - nc; // update r1
1392     }
1393     else {
1394       q1 = q1+q1; // update q1
1395       r1 = r1+r1; // update r1
1396     }
1397     if ((r2 + 1).uge(d - r2)) {
1398       if (q2.uge(signedMax)) magu.a = 1;
1399       q2 = q2+q2 + 1;     // update q2
1400       r2 = r2+r2 + 1 - d; // update r2
1401     }
1402     else {
1403       if (q2.uge(signedMin)) magu.a = 1;
1404       q2 = q2+q2;     // update q2
1405       r2 = r2+r2 + 1; // update r2
1406     }
1407     delta = d - 1 - r2;
1408   } while (p < d.getBitWidth()*2 &&
1409            (q1.ult(delta) || (q1 == delta && r1 == 0)));
1410   magu.m = q2 + 1; // resulting magic number
1411   magu.s = p - d.getBitWidth();  // resulting shift
1412   return magu;
1413 }
1414
1415 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1416 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1417 /// variables here have the same names as in the algorithm. Comments explain
1418 /// the algorithm and any deviation from it.
1419 static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
1420                      unsigned m, unsigned n) {
1421   assert(u && "Must provide dividend");
1422   assert(v && "Must provide divisor");
1423   assert(q && "Must provide quotient");
1424   assert(u != v && u != q && v != q && "Must use different memory");
1425   assert(n>1 && "n must be > 1");
1426
1427   // b denotes the base of the number system. In our case b is 2^32.
1428   const uint64_t b = uint64_t(1) << 32;
1429
1430   DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1431   DEBUG(dbgs() << "KnuthDiv: original:");
1432   DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1433   DEBUG(dbgs() << " by");
1434   DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1435   DEBUG(dbgs() << '\n');
1436   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1437   // u and v by d. Note that we have taken Knuth's advice here to use a power
1438   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1439   // 2 allows us to shift instead of multiply and it is easy to determine the
1440   // shift amount from the leading zeros.  We are basically normalizing the u
1441   // and v so that its high bits are shifted to the top of v's range without
1442   // overflow. Note that this can require an extra word in u so that u must
1443   // be of length m+n+1.
1444   unsigned shift = countLeadingZeros(v[n-1]);
1445   unsigned v_carry = 0;
1446   unsigned u_carry = 0;
1447   if (shift) {
1448     for (unsigned i = 0; i < m+n; ++i) {
1449       unsigned u_tmp = u[i] >> (32 - shift);
1450       u[i] = (u[i] << shift) | u_carry;
1451       u_carry = u_tmp;
1452     }
1453     for (unsigned i = 0; i < n; ++i) {
1454       unsigned v_tmp = v[i] >> (32 - shift);
1455       v[i] = (v[i] << shift) | v_carry;
1456       v_carry = v_tmp;
1457     }
1458   }
1459   u[m+n] = u_carry;
1460
1461   DEBUG(dbgs() << "KnuthDiv:   normal:");
1462   DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1463   DEBUG(dbgs() << " by");
1464   DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1465   DEBUG(dbgs() << '\n');
1466
1467   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1468   int j = m;
1469   do {
1470     DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1471     // D3. [Calculate q'.].
1472     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1473     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1474     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1475     // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
1476     // on v[n-2] determines at high speed most of the cases in which the trial
1477     // value qp is one too large, and it eliminates all cases where qp is two
1478     // too large.
1479     uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
1480     DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1481     uint64_t qp = dividend / v[n-1];
1482     uint64_t rp = dividend % v[n-1];
1483     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1484       qp--;
1485       rp += v[n-1];
1486       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1487         qp--;
1488     }
1489     DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1490
1491     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1492     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1493     // consists of a simple multiplication by a one-place number, combined with
1494     // a subtraction.
1495     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1496     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1497     // true value plus b**(n+1), namely as the b's complement of
1498     // the true value, and a "borrow" to the left should be remembered.
1499     int64_t borrow = 0;
1500     for (unsigned i = 0; i < n; ++i) {
1501       uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1502       int64_t subres = int64_t(u[j+i]) - borrow - (unsigned)p;
1503       u[j+i] = (unsigned)subres;
1504       borrow = (p >> 32) - (subres >> 32);
1505       DEBUG(dbgs() << "KnuthDiv: u[j+i] = " << u[j+i]
1506                    << ", borrow = " << borrow << '\n');
1507     }
1508     bool isNeg = u[j+n] < borrow;
1509     u[j+n] -= (unsigned)borrow;
1510
1511     DEBUG(dbgs() << "KnuthDiv: after subtraction:");
1512     DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1513     DEBUG(dbgs() << '\n');
1514
1515     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1516     // negative, go to step D6; otherwise go on to step D7.
1517     q[j] = (unsigned)qp;
1518     if (isNeg) {
1519       // D6. [Add back]. The probability that this step is necessary is very
1520       // small, on the order of only 2/b. Make sure that test data accounts for
1521       // this possibility. Decrease q[j] by 1
1522       q[j]--;
1523       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1524       // A carry will occur to the left of u[j+n], and it should be ignored
1525       // since it cancels with the borrow that occurred in D4.
1526       bool carry = false;
1527       for (unsigned i = 0; i < n; i++) {
1528         unsigned limit = std::min(u[j+i],v[i]);
1529         u[j+i] += v[i] + carry;
1530         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1531       }
1532       u[j+n] += carry;
1533     }
1534     DEBUG(dbgs() << "KnuthDiv: after correction:");
1535     DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1536     DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1537
1538   // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1539   } while (--j >= 0);
1540
1541   DEBUG(dbgs() << "KnuthDiv: quotient:");
1542   DEBUG(for (int i = m; i >=0; i--) dbgs() <<" " << q[i]);
1543   DEBUG(dbgs() << '\n');
1544
1545   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1546   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1547   // compute the remainder (urem uses this).
1548   if (r) {
1549     // The value d is expressed by the "shift" value above since we avoided
1550     // multiplication by d by using a shift left. So, all we have to do is
1551     // shift right here.
1552     if (shift) {
1553       unsigned carry = 0;
1554       DEBUG(dbgs() << "KnuthDiv: remainder:");
1555       for (int i = n-1; i >= 0; i--) {
1556         r[i] = (u[i] >> shift) | carry;
1557         carry = u[i] << (32 - shift);
1558         DEBUG(dbgs() << " " << r[i]);
1559       }
1560     } else {
1561       for (int i = n-1; i >= 0; i--) {
1562         r[i] = u[i];
1563         DEBUG(dbgs() << " " << r[i]);
1564       }
1565     }
1566     DEBUG(dbgs() << '\n');
1567   }
1568   DEBUG(dbgs() << '\n');
1569 }
1570
1571 void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS,
1572                    unsigned rhsWords, APInt *Quotient, APInt *Remainder) {
1573   assert(lhsWords >= rhsWords && "Fractional result");
1574
1575   // First, compose the values into an array of 32-bit words instead of
1576   // 64-bit words. This is a necessity of both the "short division" algorithm
1577   // and the Knuth "classical algorithm" which requires there to be native
1578   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1579   // can't use 64-bit operands here because we don't have native results of
1580   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1581   // work on large-endian machines.
1582   uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT);
1583   unsigned n = rhsWords * 2;
1584   unsigned m = (lhsWords * 2) - n;
1585
1586   // Allocate space for the temporary values we need either on the stack, if
1587   // it will fit, or on the heap if it won't.
1588   unsigned SPACE[128];
1589   unsigned *U = nullptr;
1590   unsigned *V = nullptr;
1591   unsigned *Q = nullptr;
1592   unsigned *R = nullptr;
1593   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1594     U = &SPACE[0];
1595     V = &SPACE[m+n+1];
1596     Q = &SPACE[(m+n+1) + n];
1597     if (Remainder)
1598       R = &SPACE[(m+n+1) + n + (m+n)];
1599   } else {
1600     U = new unsigned[m + n + 1];
1601     V = new unsigned[n];
1602     Q = new unsigned[m+n];
1603     if (Remainder)
1604       R = new unsigned[n];
1605   }
1606
1607   // Initialize the dividend
1608   memset(U, 0, (m+n+1)*sizeof(unsigned));
1609   for (unsigned i = 0; i < lhsWords; ++i) {
1610     uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
1611     U[i * 2] = (unsigned)(tmp & mask);
1612     U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1613   }
1614   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1615
1616   // Initialize the divisor
1617   memset(V, 0, (n)*sizeof(unsigned));
1618   for (unsigned i = 0; i < rhsWords; ++i) {
1619     uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]);
1620     V[i * 2] = (unsigned)(tmp & mask);
1621     V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1622   }
1623
1624   // initialize the quotient and remainder
1625   memset(Q, 0, (m+n) * sizeof(unsigned));
1626   if (Remainder)
1627     memset(R, 0, n * sizeof(unsigned));
1628
1629   // Now, adjust m and n for the Knuth division. n is the number of words in
1630   // the divisor. m is the number of words by which the dividend exceeds the
1631   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1632   // contain any zero words or the Knuth algorithm fails.
1633   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1634     n--;
1635     m++;
1636   }
1637   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1638     m--;
1639
1640   // If we're left with only a single word for the divisor, Knuth doesn't work
1641   // so we implement the short division algorithm here. This is much simpler
1642   // and faster because we are certain that we can divide a 64-bit quantity
1643   // by a 32-bit quantity at hardware speed and short division is simply a
1644   // series of such operations. This is just like doing short division but we
1645   // are using base 2^32 instead of base 10.
1646   assert(n != 0 && "Divide by zero?");
1647   if (n == 1) {
1648     unsigned divisor = V[0];
1649     unsigned remainder = 0;
1650     for (int i = m+n-1; i >= 0; i--) {
1651       uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i];
1652       if (partial_dividend == 0) {
1653         Q[i] = 0;
1654         remainder = 0;
1655       } else if (partial_dividend < divisor) {
1656         Q[i] = 0;
1657         remainder = (unsigned)partial_dividend;
1658       } else if (partial_dividend == divisor) {
1659         Q[i] = 1;
1660         remainder = 0;
1661       } else {
1662         Q[i] = (unsigned)(partial_dividend / divisor);
1663         remainder = (unsigned)(partial_dividend - (Q[i] * divisor));
1664       }
1665     }
1666     if (R)
1667       R[0] = remainder;
1668   } else {
1669     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1670     // case n > 1.
1671     KnuthDiv(U, V, Q, R, m, n);
1672   }
1673
1674   // If the caller wants the quotient
1675   if (Quotient) {
1676     // Set up the Quotient value's memory.
1677     if (Quotient->BitWidth != LHS.BitWidth) {
1678       if (Quotient->isSingleWord())
1679         Quotient->VAL = 0;
1680       else
1681         delete [] Quotient->pVal;
1682       Quotient->BitWidth = LHS.BitWidth;
1683       if (!Quotient->isSingleWord())
1684         Quotient->pVal = getClearedMemory(Quotient->getNumWords());
1685     } else
1686       Quotient->clearAllBits();
1687
1688     // The quotient is in Q. Reconstitute the quotient into Quotient's low
1689     // order words.
1690     // This case is currently dead as all users of divide() handle trivial cases
1691     // earlier.
1692     if (lhsWords == 1) {
1693       uint64_t tmp =
1694         uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
1695       if (Quotient->isSingleWord())
1696         Quotient->VAL = tmp;
1697       else
1698         Quotient->pVal[0] = tmp;
1699     } else {
1700       assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
1701       for (unsigned i = 0; i < lhsWords; ++i)
1702         Quotient->pVal[i] =
1703           uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1704     }
1705   }
1706
1707   // If the caller wants the remainder
1708   if (Remainder) {
1709     // Set up the Remainder value's memory.
1710     if (Remainder->BitWidth != RHS.BitWidth) {
1711       if (Remainder->isSingleWord())
1712         Remainder->VAL = 0;
1713       else
1714         delete [] Remainder->pVal;
1715       Remainder->BitWidth = RHS.BitWidth;
1716       if (!Remainder->isSingleWord())
1717         Remainder->pVal = getClearedMemory(Remainder->getNumWords());
1718     } else
1719       Remainder->clearAllBits();
1720
1721     // The remainder is in R. Reconstitute the remainder into Remainder's low
1722     // order words.
1723     if (rhsWords == 1) {
1724       uint64_t tmp =
1725         uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
1726       if (Remainder->isSingleWord())
1727         Remainder->VAL = tmp;
1728       else
1729         Remainder->pVal[0] = tmp;
1730     } else {
1731       assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
1732       for (unsigned i = 0; i < rhsWords; ++i)
1733         Remainder->pVal[i] =
1734           uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1735     }
1736   }
1737
1738   // Clean up the memory we allocated.
1739   if (U != &SPACE[0]) {
1740     delete [] U;
1741     delete [] V;
1742     delete [] Q;
1743     delete [] R;
1744   }
1745 }
1746
1747 APInt APInt::udiv(const APInt& RHS) const {
1748   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1749
1750   // First, deal with the easy case
1751   if (isSingleWord()) {
1752     assert(RHS.VAL != 0 && "Divide by zero?");
1753     return APInt(BitWidth, VAL / RHS.VAL);
1754   }
1755
1756   // Get some facts about the LHS and RHS number of bits and words
1757   unsigned rhsBits = RHS.getActiveBits();
1758   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1759   assert(rhsWords && "Divided by zero???");
1760   unsigned lhsBits = this->getActiveBits();
1761   unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1762
1763   // Deal with some degenerate cases
1764   if (!lhsWords)
1765     // 0 / X ===> 0
1766     return APInt(BitWidth, 0);
1767   else if (lhsWords < rhsWords || this->ult(RHS)) {
1768     // X / Y ===> 0, iff X < Y
1769     return APInt(BitWidth, 0);
1770   } else if (*this == RHS) {
1771     // X / X ===> 1
1772     return APInt(BitWidth, 1);
1773   } else if (lhsWords == 1 && rhsWords == 1) {
1774     // All high words are zero, just use native divide
1775     return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]);
1776   }
1777
1778   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1779   APInt Quotient(1,0); // to hold result.
1780   divide(*this, lhsWords, RHS, rhsWords, &Quotient, nullptr);
1781   return Quotient;
1782 }
1783
1784 APInt APInt::sdiv(const APInt &RHS) const {
1785   if (isNegative()) {
1786     if (RHS.isNegative())
1787       return (-(*this)).udiv(-RHS);
1788     return -((-(*this)).udiv(RHS));
1789   }
1790   if (RHS.isNegative())
1791     return -(this->udiv(-RHS));
1792   return this->udiv(RHS);
1793 }
1794
1795 APInt APInt::urem(const APInt& RHS) const {
1796   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1797   if (isSingleWord()) {
1798     assert(RHS.VAL != 0 && "Remainder by zero?");
1799     return APInt(BitWidth, VAL % RHS.VAL);
1800   }
1801
1802   // Get some facts about the LHS
1803   unsigned lhsBits = getActiveBits();
1804   unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
1805
1806   // Get some facts about the RHS
1807   unsigned rhsBits = RHS.getActiveBits();
1808   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1809   assert(rhsWords && "Performing remainder operation by zero ???");
1810
1811   // Check the degenerate cases
1812   if (lhsWords == 0) {
1813     // 0 % Y ===> 0
1814     return APInt(BitWidth, 0);
1815   } else if (lhsWords < rhsWords || this->ult(RHS)) {
1816     // X % Y ===> X, iff X < Y
1817     return *this;
1818   } else if (*this == RHS) {
1819     // X % X == 0;
1820     return APInt(BitWidth, 0);
1821   } else if (lhsWords == 1) {
1822     // All high words are zero, just use native remainder
1823     return APInt(BitWidth, pVal[0] % RHS.pVal[0]);
1824   }
1825
1826   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1827   APInt Remainder(1,0);
1828   divide(*this, lhsWords, RHS, rhsWords, nullptr, &Remainder);
1829   return Remainder;
1830 }
1831
1832 APInt APInt::srem(const APInt &RHS) const {
1833   if (isNegative()) {
1834     if (RHS.isNegative())
1835       return -((-(*this)).urem(-RHS));
1836     return -((-(*this)).urem(RHS));
1837   }
1838   if (RHS.isNegative())
1839     return this->urem(-RHS);
1840   return this->urem(RHS);
1841 }
1842
1843 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1844                     APInt &Quotient, APInt &Remainder) {
1845   assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1846
1847   // First, deal with the easy case
1848   if (LHS.isSingleWord()) {
1849     assert(RHS.VAL != 0 && "Divide by zero?");
1850     uint64_t QuotVal = LHS.VAL / RHS.VAL;
1851     uint64_t RemVal = LHS.VAL % RHS.VAL;
1852     Quotient = APInt(LHS.BitWidth, QuotVal);
1853     Remainder = APInt(LHS.BitWidth, RemVal);
1854     return;
1855   }
1856
1857   // Get some size facts about the dividend and divisor
1858   unsigned lhsBits  = LHS.getActiveBits();
1859   unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1860   unsigned rhsBits  = RHS.getActiveBits();
1861   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1862
1863   // Check the degenerate cases
1864   if (lhsWords == 0) {
1865     Quotient = 0;                // 0 / Y ===> 0
1866     Remainder = 0;               // 0 % Y ===> 0
1867     return;
1868   }
1869
1870   if (lhsWords < rhsWords || LHS.ult(RHS)) {
1871     Remainder = LHS;            // X % Y ===> X, iff X < Y
1872     Quotient = 0;               // X / Y ===> 0, iff X < Y
1873     return;
1874   }
1875
1876   if (LHS == RHS) {
1877     Quotient  = 1;              // X / X ===> 1
1878     Remainder = 0;              // X % X ===> 0;
1879     return;
1880   }
1881
1882   if (lhsWords == 1 && rhsWords == 1) {
1883     // There is only one word to consider so use the native versions.
1884     uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0];
1885     uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0];
1886     Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
1887     Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
1888     return;
1889   }
1890
1891   // Okay, lets do it the long way
1892   divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
1893 }
1894
1895 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1896                     APInt &Quotient, APInt &Remainder) {
1897   if (LHS.isNegative()) {
1898     if (RHS.isNegative())
1899       APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1900     else {
1901       APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1902       Quotient = -Quotient;
1903     }
1904     Remainder = -Remainder;
1905   } else if (RHS.isNegative()) {
1906     APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1907     Quotient = -Quotient;
1908   } else {
1909     APInt::udivrem(LHS, RHS, Quotient, Remainder);
1910   }
1911 }
1912
1913 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1914   APInt Res = *this+RHS;
1915   Overflow = isNonNegative() == RHS.isNonNegative() &&
1916              Res.isNonNegative() != isNonNegative();
1917   return Res;
1918 }
1919
1920 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1921   APInt Res = *this+RHS;
1922   Overflow = Res.ult(RHS);
1923   return Res;
1924 }
1925
1926 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1927   APInt Res = *this - RHS;
1928   Overflow = isNonNegative() != RHS.isNonNegative() &&
1929              Res.isNonNegative() != isNonNegative();
1930   return Res;
1931 }
1932
1933 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1934   APInt Res = *this-RHS;
1935   Overflow = Res.ugt(*this);
1936   return Res;
1937 }
1938
1939 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1940   // MININT/-1  -->  overflow.
1941   Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1942   return sdiv(RHS);
1943 }
1944
1945 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1946   APInt Res = *this * RHS;
1947
1948   if (*this != 0 && RHS != 0)
1949     Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1950   else
1951     Overflow = false;
1952   return Res;
1953 }
1954
1955 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1956   APInt Res = *this * RHS;
1957
1958   if (*this != 0 && RHS != 0)
1959     Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS;
1960   else
1961     Overflow = false;
1962   return Res;
1963 }
1964
1965 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1966   Overflow = ShAmt.uge(getBitWidth());
1967   if (Overflow)
1968     return APInt(BitWidth, 0);
1969
1970   if (isNonNegative()) // Don't allow sign change.
1971     Overflow = ShAmt.uge(countLeadingZeros());
1972   else
1973     Overflow = ShAmt.uge(countLeadingOnes());
1974
1975   return *this << ShAmt;
1976 }
1977
1978 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
1979   Overflow = ShAmt.uge(getBitWidth());
1980   if (Overflow)
1981     return APInt(BitWidth, 0);
1982
1983   Overflow = ShAmt.ugt(countLeadingZeros());
1984
1985   return *this << ShAmt;
1986 }
1987
1988
1989
1990
1991 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
1992   // Check our assumptions here
1993   assert(!str.empty() && "Invalid string length");
1994   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
1995           radix == 36) &&
1996          "Radix should be 2, 8, 10, 16, or 36!");
1997
1998   StringRef::iterator p = str.begin();
1999   size_t slen = str.size();
2000   bool isNeg = *p == '-';
2001   if (*p == '-' || *p == '+') {
2002     p++;
2003     slen--;
2004     assert(slen && "String is only a sign, needs a value.");
2005   }
2006   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2007   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2008   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2009   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2010          "Insufficient bit width");
2011
2012   // Allocate memory
2013   if (!isSingleWord())
2014     pVal = getClearedMemory(getNumWords());
2015
2016   // Figure out if we can shift instead of multiply
2017   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2018
2019   // Set up an APInt for the radix multiplier outside the loop so we don't
2020   // constantly construct/destruct it.
2021   APInt apradix(getBitWidth(), radix);
2022
2023   // Enter digit traversal loop
2024   for (StringRef::iterator e = str.end(); p != e; ++p) {
2025     unsigned digit = getDigit(*p, radix);
2026     assert(digit < radix && "Invalid character in digit string");
2027
2028     // Shift or multiply the value by the radix
2029     if (slen > 1) {
2030       if (shift)
2031         *this <<= shift;
2032       else
2033         *this *= apradix;
2034     }
2035
2036     // Add in the digit we just interpreted
2037     *this += digit;
2038   }
2039   // If its negative, put it in two's complement form
2040   if (isNeg) {
2041     --(*this);
2042     this->flipAllBits();
2043   }
2044 }
2045
2046 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2047                      bool Signed, bool formatAsCLiteral) const {
2048   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2049           Radix == 36) &&
2050          "Radix should be 2, 8, 10, 16, or 36!");
2051
2052   const char *Prefix = "";
2053   if (formatAsCLiteral) {
2054     switch (Radix) {
2055       case 2:
2056         // Binary literals are a non-standard extension added in gcc 4.3:
2057         // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2058         Prefix = "0b";
2059         break;
2060       case 8:
2061         Prefix = "0";
2062         break;
2063       case 10:
2064         break; // No prefix
2065       case 16:
2066         Prefix = "0x";
2067         break;
2068       default:
2069         llvm_unreachable("Invalid radix!");
2070     }
2071   }
2072
2073   // First, check for a zero value and just short circuit the logic below.
2074   if (*this == 0) {
2075     while (*Prefix) {
2076       Str.push_back(*Prefix);
2077       ++Prefix;
2078     };
2079     Str.push_back('0');
2080     return;
2081   }
2082
2083   static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2084
2085   if (isSingleWord()) {
2086     char Buffer[65];
2087     char *BufPtr = Buffer+65;
2088
2089     uint64_t N;
2090     if (!Signed) {
2091       N = getZExtValue();
2092     } else {
2093       int64_t I = getSExtValue();
2094       if (I >= 0) {
2095         N = I;
2096       } else {
2097         Str.push_back('-');
2098         N = -(uint64_t)I;
2099       }
2100     }
2101
2102     while (*Prefix) {
2103       Str.push_back(*Prefix);
2104       ++Prefix;
2105     };
2106
2107     while (N) {
2108       *--BufPtr = Digits[N % Radix];
2109       N /= Radix;
2110     }
2111     Str.append(BufPtr, Buffer+65);
2112     return;
2113   }
2114
2115   APInt Tmp(*this);
2116
2117   if (Signed && isNegative()) {
2118     // They want to print the signed version and it is a negative value
2119     // Flip the bits and add one to turn it into the equivalent positive
2120     // value and put a '-' in the result.
2121     Tmp.flipAllBits();
2122     ++Tmp;
2123     Str.push_back('-');
2124   }
2125
2126   while (*Prefix) {
2127     Str.push_back(*Prefix);
2128     ++Prefix;
2129   };
2130
2131   // We insert the digits backward, then reverse them to get the right order.
2132   unsigned StartDig = Str.size();
2133
2134   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2135   // because the number of bits per digit (1, 3 and 4 respectively) divides
2136   // equally.  We just shift until the value is zero.
2137   if (Radix == 2 || Radix == 8 || Radix == 16) {
2138     // Just shift tmp right for each digit width until it becomes zero
2139     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2140     unsigned MaskAmt = Radix - 1;
2141
2142     while (Tmp != 0) {
2143       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2144       Str.push_back(Digits[Digit]);
2145       Tmp.lshrInPlace(ShiftAmt);
2146     }
2147   } else {
2148     APInt divisor(Radix == 10? 4 : 8, Radix);
2149     while (Tmp != 0) {
2150       APInt APdigit(1, 0);
2151       APInt tmp2(Tmp.getBitWidth(), 0);
2152       divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
2153              &APdigit);
2154       unsigned Digit = (unsigned)APdigit.getZExtValue();
2155       assert(Digit < Radix && "divide failed");
2156       Str.push_back(Digits[Digit]);
2157       Tmp = tmp2;
2158     }
2159   }
2160
2161   // Reverse the digits before returning.
2162   std::reverse(Str.begin()+StartDig, Str.end());
2163 }
2164
2165 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2166 /// It is better to pass in a SmallVector/SmallString to the methods above.
2167 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2168   SmallString<40> S;
2169   toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2170   return S.str();
2171 }
2172
2173 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2174 LLVM_DUMP_METHOD void APInt::dump() const {
2175   SmallString<40> S, U;
2176   this->toStringUnsigned(U);
2177   this->toStringSigned(S);
2178   dbgs() << "APInt(" << BitWidth << "b, "
2179          << U << "u " << S << "s)\n";
2180 }
2181 #endif
2182
2183 void APInt::print(raw_ostream &OS, bool isSigned) const {
2184   SmallString<40> S;
2185   this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2186   OS << S;
2187 }
2188
2189 // This implements a variety of operations on a representation of
2190 // arbitrary precision, two's-complement, bignum integer values.
2191
2192 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2193 // and unrestricting assumption.
2194 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2195               "Part width must be divisible by 2!");
2196
2197 /* Some handy functions local to this file.  */
2198
2199 /* Returns the integer part with the least significant BITS set.
2200    BITS cannot be zero.  */
2201 static inline APInt::WordType lowBitMask(unsigned bits) {
2202   assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2203
2204   return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2205 }
2206
2207 /* Returns the value of the lower half of PART.  */
2208 static inline APInt::WordType lowHalf(APInt::WordType part) {
2209   return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2210 }
2211
2212 /* Returns the value of the upper half of PART.  */
2213 static inline APInt::WordType highHalf(APInt::WordType part) {
2214   return part >> (APInt::APINT_BITS_PER_WORD / 2);
2215 }
2216
2217 /* Returns the bit number of the most significant set bit of a part.
2218    If the input number has no bits set -1U is returned.  */
2219 static unsigned partMSB(APInt::WordType value) {
2220   return findLastSet(value, ZB_Max);
2221 }
2222
2223 /* Returns the bit number of the least significant set bit of a
2224    part.  If the input number has no bits set -1U is returned.  */
2225 static unsigned partLSB(APInt::WordType value) {
2226   return findFirstSet(value, ZB_Max);
2227 }
2228
2229 /* Sets the least significant part of a bignum to the input value, and
2230    zeroes out higher parts.  */
2231 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2232   assert(parts > 0);
2233
2234   dst[0] = part;
2235   for (unsigned i = 1; i < parts; i++)
2236     dst[i] = 0;
2237 }
2238
2239 /* Assign one bignum to another.  */
2240 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2241   for (unsigned i = 0; i < parts; i++)
2242     dst[i] = src[i];
2243 }
2244
2245 /* Returns true if a bignum is zero, false otherwise.  */
2246 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2247   for (unsigned i = 0; i < parts; i++)
2248     if (src[i])
2249       return false;
2250
2251   return true;
2252 }
2253
2254 /* Extract the given bit of a bignum; returns 0 or 1.  */
2255 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2256   return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2257 }
2258
2259 /* Set the given bit of a bignum. */
2260 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2261   parts[whichWord(bit)] |= maskBit(bit);
2262 }
2263
2264 /* Clears the given bit of a bignum. */
2265 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2266   parts[whichWord(bit)] &= ~maskBit(bit);
2267 }
2268
2269 /* Returns the bit number of the least significant set bit of a
2270    number.  If the input number has no bits set -1U is returned.  */
2271 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2272   for (unsigned i = 0; i < n; i++) {
2273     if (parts[i] != 0) {
2274       unsigned lsb = partLSB(parts[i]);
2275
2276       return lsb + i * APINT_BITS_PER_WORD;
2277     }
2278   }
2279
2280   return -1U;
2281 }
2282
2283 /* Returns the bit number of the most significant set bit of a number.
2284    If the input number has no bits set -1U is returned.  */
2285 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2286   do {
2287     --n;
2288
2289     if (parts[n] != 0) {
2290       unsigned msb = partMSB(parts[n]);
2291
2292       return msb + n * APINT_BITS_PER_WORD;
2293     }
2294   } while (n);
2295
2296   return -1U;
2297 }
2298
2299 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2300    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2301    the least significant bit of DST.  All high bits above srcBITS in
2302    DST are zero-filled.  */
2303 void
2304 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2305                  unsigned srcBits, unsigned srcLSB) {
2306   unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2307   assert(dstParts <= dstCount);
2308
2309   unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2310   tcAssign (dst, src + firstSrcPart, dstParts);
2311
2312   unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2313   tcShiftRight (dst, dstParts, shift);
2314
2315   /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2316      in DST.  If this is less that srcBits, append the rest, else
2317      clear the high bits.  */
2318   unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2319   if (n < srcBits) {
2320     WordType mask = lowBitMask (srcBits - n);
2321     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2322                           << n % APINT_BITS_PER_WORD);
2323   } else if (n > srcBits) {
2324     if (srcBits % APINT_BITS_PER_WORD)
2325       dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2326   }
2327
2328   /* Clear high parts.  */
2329   while (dstParts < dstCount)
2330     dst[dstParts++] = 0;
2331 }
2332
2333 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2334 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2335                              WordType c, unsigned parts) {
2336   assert(c <= 1);
2337
2338   for (unsigned i = 0; i < parts; i++) {
2339     WordType l = dst[i];
2340     if (c) {
2341       dst[i] += rhs[i] + 1;
2342       c = (dst[i] <= l);
2343     } else {
2344       dst[i] += rhs[i];
2345       c = (dst[i] < l);
2346     }
2347   }
2348
2349   return c;
2350 }
2351
2352 /// This function adds a single "word" integer, src, to the multiple
2353 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2354 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2355 /// @returns the carry of the addition.
2356 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2357                                  unsigned parts) {
2358   for (unsigned i = 0; i < parts; ++i) {
2359     dst[i] += src;
2360     if (dst[i] >= src)
2361       return 0; // No need to carry so exit early.
2362     src = 1; // Carry one to next digit.
2363   }
2364
2365   return 1;
2366 }
2367
2368 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2369 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2370                                   WordType c, unsigned parts) {
2371   assert(c <= 1);
2372
2373   for (unsigned i = 0; i < parts; i++) {
2374     WordType l = dst[i];
2375     if (c) {
2376       dst[i] -= rhs[i] + 1;
2377       c = (dst[i] >= l);
2378     } else {
2379       dst[i] -= rhs[i];
2380       c = (dst[i] > l);
2381     }
2382   }
2383
2384   return c;
2385 }
2386
2387 /// This function subtracts a single "word" (64-bit word), src, from
2388 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2389 /// no further borrowing is needed or it runs out of "words" in dst.  The result
2390 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2391 /// exhausted. In other words, if src > dst then this function returns 1,
2392 /// otherwise 0.
2393 /// @returns the borrow out of the subtraction
2394 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2395                                       unsigned parts) {
2396   for (unsigned i = 0; i < parts; ++i) {
2397     WordType Dst = dst[i];
2398     dst[i] -= src;
2399     if (src <= Dst)
2400       return 0; // No need to borrow so exit early.
2401     src = 1; // We have to "borrow 1" from next "word"
2402   }
2403
2404   return 1;
2405 }
2406
2407 /* Negate a bignum in-place.  */
2408 void APInt::tcNegate(WordType *dst, unsigned parts) {
2409   tcComplement(dst, parts);
2410   tcIncrement(dst, parts);
2411 }
2412
2413 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2414     DST  = SRC * MULTIPLIER + CARRY   if add is false
2415
2416     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2417     they must start at the same point, i.e. DST == SRC.
2418
2419     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2420     returned.  Otherwise DST is filled with the least significant
2421     DSTPARTS parts of the result, and if all of the omitted higher
2422     parts were zero return zero, otherwise overflow occurred and
2423     return one.  */
2424 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2425                           WordType multiplier, WordType carry,
2426                           unsigned srcParts, unsigned dstParts,
2427                           bool add) {
2428   /* Otherwise our writes of DST kill our later reads of SRC.  */
2429   assert(dst <= src || dst >= src + srcParts);
2430   assert(dstParts <= srcParts + 1);
2431
2432   /* N loops; minimum of dstParts and srcParts.  */
2433   unsigned n = dstParts < srcParts ? dstParts: srcParts;
2434
2435   unsigned i;
2436   for (i = 0; i < n; i++) {
2437     WordType low, mid, high, srcPart;
2438
2439       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2440
2441          This cannot overflow, because
2442
2443          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2444
2445          which is less than n^2.  */
2446
2447     srcPart = src[i];
2448
2449     if (multiplier == 0 || srcPart == 0) {
2450       low = carry;
2451       high = 0;
2452     } else {
2453       low = lowHalf(srcPart) * lowHalf(multiplier);
2454       high = highHalf(srcPart) * highHalf(multiplier);
2455
2456       mid = lowHalf(srcPart) * highHalf(multiplier);
2457       high += highHalf(mid);
2458       mid <<= APINT_BITS_PER_WORD / 2;
2459       if (low + mid < low)
2460         high++;
2461       low += mid;
2462
2463       mid = highHalf(srcPart) * lowHalf(multiplier);
2464       high += highHalf(mid);
2465       mid <<= APINT_BITS_PER_WORD / 2;
2466       if (low + mid < low)
2467         high++;
2468       low += mid;
2469
2470       /* Now add carry.  */
2471       if (low + carry < low)
2472         high++;
2473       low += carry;
2474     }
2475
2476     if (add) {
2477       /* And now DST[i], and store the new low part there.  */
2478       if (low + dst[i] < low)
2479         high++;
2480       dst[i] += low;
2481     } else
2482       dst[i] = low;
2483
2484     carry = high;
2485   }
2486
2487   if (i < dstParts) {
2488     /* Full multiplication, there is no overflow.  */
2489     assert(i + 1 == dstParts);
2490     dst[i] = carry;
2491     return 0;
2492   } else {
2493     /* We overflowed if there is carry.  */
2494     if (carry)
2495       return 1;
2496
2497     /* We would overflow if any significant unwritten parts would be
2498        non-zero.  This is true if any remaining src parts are non-zero
2499        and the multiplier is non-zero.  */
2500     if (multiplier)
2501       for (; i < srcParts; i++)
2502         if (src[i])
2503           return 1;
2504
2505     /* We fitted in the narrow destination.  */
2506     return 0;
2507   }
2508 }
2509
2510 /* DST = LHS * RHS, where DST has the same width as the operands and
2511    is filled with the least significant parts of the result.  Returns
2512    one if overflow occurred, otherwise zero.  DST must be disjoint
2513    from both operands.  */
2514 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2515                       const WordType *rhs, unsigned parts) {
2516   assert(dst != lhs && dst != rhs);
2517
2518   int overflow = 0;
2519   tcSet(dst, 0, parts);
2520
2521   for (unsigned i = 0; i < parts; i++)
2522     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2523                                parts - i, true);
2524
2525   return overflow;
2526 }
2527
2528 /* DST = LHS * RHS, where DST has width the sum of the widths of the
2529    operands.  No overflow occurs.  DST must be disjoint from both
2530    operands.  Returns the number of parts required to hold the
2531    result.  */
2532 unsigned APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2533                                const WordType *rhs, unsigned lhsParts,
2534                                unsigned rhsParts) {
2535   /* Put the narrower number on the LHS for less loops below.  */
2536   if (lhsParts > rhsParts) {
2537     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2538   } else {
2539     assert(dst != lhs && dst != rhs);
2540
2541     tcSet(dst, 0, rhsParts);
2542
2543     for (unsigned i = 0; i < lhsParts; i++)
2544       tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2545
2546     unsigned n = lhsParts + rhsParts;
2547
2548     return n - (dst[n - 1] == 0);
2549   }
2550 }
2551
2552 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2553    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2554    set REMAINDER to the remainder, return zero.  i.e.
2555
2556    OLD_LHS = RHS * LHS + REMAINDER
2557
2558    SCRATCH is a bignum of the same size as the operands and result for
2559    use by the routine; its contents need not be initialized and are
2560    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2561 */
2562 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2563                     WordType *remainder, WordType *srhs,
2564                     unsigned parts) {
2565   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2566
2567   unsigned shiftCount = tcMSB(rhs, parts) + 1;
2568   if (shiftCount == 0)
2569     return true;
2570
2571   shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2572   unsigned n = shiftCount / APINT_BITS_PER_WORD;
2573   WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2574
2575   tcAssign(srhs, rhs, parts);
2576   tcShiftLeft(srhs, parts, shiftCount);
2577   tcAssign(remainder, lhs, parts);
2578   tcSet(lhs, 0, parts);
2579
2580   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2581      the total.  */
2582   for (;;) {
2583       int compare;
2584
2585       compare = tcCompare(remainder, srhs, parts);
2586       if (compare >= 0) {
2587         tcSubtract(remainder, srhs, 0, parts);
2588         lhs[n] |= mask;
2589       }
2590
2591       if (shiftCount == 0)
2592         break;
2593       shiftCount--;
2594       tcShiftRight(srhs, parts, 1);
2595       if ((mask >>= 1) == 0) {
2596         mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2597         n--;
2598       }
2599   }
2600
2601   return false;
2602 }
2603
2604 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2605 /// no restrictions on Count.
2606 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2607   // Don't bother performing a no-op shift.
2608   if (!Count)
2609     return;
2610
2611   /* WordShift is the inter-part shift; BitShift is is intra-part shift.  */
2612   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2613   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2614
2615   // Fastpath for moving by whole words.
2616   if (BitShift == 0) {
2617     std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2618   } else {
2619     while (Words-- > WordShift) {
2620       Dst[Words] = Dst[Words - WordShift] << BitShift;
2621       if (Words > WordShift)
2622         Dst[Words] |=
2623           Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2624     }
2625   }
2626
2627   // Fill in the remainder with 0s.
2628   std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2629 }
2630
2631 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2632 /// are no restrictions on Count.
2633 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2634   // Don't bother performing a no-op shift.
2635   if (!Count)
2636     return;
2637
2638   // WordShift is the inter-part shift; BitShift is is intra-part shift.
2639   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2640   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2641
2642   unsigned WordsToMove = Words - WordShift;
2643   // Fastpath for moving by whole words.
2644   if (BitShift == 0) {
2645     std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2646   } else {
2647     for (unsigned i = 0; i != WordsToMove; ++i) {
2648       Dst[i] = Dst[i + WordShift] >> BitShift;
2649       if (i + 1 != WordsToMove)
2650         Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2651     }
2652   }
2653
2654   // Fill in the remainder with 0s.
2655   std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2656 }
2657
2658 /* Bitwise and of two bignums.  */
2659 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2660   for (unsigned i = 0; i < parts; i++)
2661     dst[i] &= rhs[i];
2662 }
2663
2664 /* Bitwise inclusive or of two bignums.  */
2665 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2666   for (unsigned i = 0; i < parts; i++)
2667     dst[i] |= rhs[i];
2668 }
2669
2670 /* Bitwise exclusive or of two bignums.  */
2671 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2672   for (unsigned i = 0; i < parts; i++)
2673     dst[i] ^= rhs[i];
2674 }
2675
2676 /* Complement a bignum in-place.  */
2677 void APInt::tcComplement(WordType *dst, unsigned parts) {
2678   for (unsigned i = 0; i < parts; i++)
2679     dst[i] = ~dst[i];
2680 }
2681
2682 /* Comparison (unsigned) of two bignums.  */
2683 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2684                      unsigned parts) {
2685   while (parts) {
2686     parts--;
2687     if (lhs[parts] == rhs[parts])
2688       continue;
2689
2690     return (lhs[parts] > rhs[parts]) ? 1 : -1;
2691   }
2692
2693   return 0;
2694 }
2695
2696 /* Set the least significant BITS bits of a bignum, clear the
2697    rest.  */
2698 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2699                                       unsigned bits) {
2700   unsigned i = 0;
2701   while (bits > APINT_BITS_PER_WORD) {
2702     dst[i++] = ~(WordType) 0;
2703     bits -= APINT_BITS_PER_WORD;
2704   }
2705
2706   if (bits)
2707     dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2708
2709   while (i < parts)
2710     dst[i++] = 0;
2711 }