lib/Support/APInt.cpp

   1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements a class to represent arbitrary precision integer
  11 // constant values and provide a variety of arithmetic operations on them.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "llvm/ADT/APInt.h"
  16 #include "llvm/ADT/ArrayRef.h"
  17 #include "llvm/ADT/FoldingSet.h"
  18 #include "llvm/ADT/Hashing.h"
  19 #include "llvm/ADT/SmallString.h"
  20 #include "llvm/ADT/StringRef.h"
  21 #include "llvm/Support/Debug.h"
  22 #include "llvm/Support/ErrorHandling.h"
  23 #include "llvm/Support/MathExtras.h"
  24 #include "llvm/Support/raw_ostream.h"
  25 #include <climits>
  26 #include <cmath>
  27 #include <cstdlib>
  28 #include <cstring>
  29 using namespace llvm;
  30
  31 #define DEBUG_TYPE "apint"
  32
  33 /// A utility function for allocating memory, checking for allocation failures,
  34 /// and ensuring the contents are zeroed.
  35 inline static uint64_t* getClearedMemory(unsigned numWords) {
  36   uint64_t * result = new uint64_t[numWords];
  37   assert(result && "APInt memory allocation fails!");
  38   memset(result, 0, numWords * sizeof(uint64_t));
  39   return result;
  40 }
  41
  42 /// A utility function for allocating memory and checking for allocation
  43 /// failure.  The content is not zeroed.
  44 inline static uint64_t* getMemory(unsigned numWords) {
  45   uint64_t * result = new uint64_t[numWords];
  46   assert(result && "APInt memory allocation fails!");
  47   return result;
  48 }
  49
  50 /// A utility function that converts a character to a digit.
  51 inline static unsigned getDigit(char cdigit, uint8_t radix) {
  52   unsigned r;
  53
  54   if (radix == 16 || radix == 36) {
  55     r = cdigit - '0';
  56     if (r <= 9)
  57       return r;
  58
  59     r = cdigit - 'A';
  60     if (r <= radix - 11U)
  61       return r + 10;
  62
  63     r = cdigit - 'a';
  64     if (r <= radix - 11U)
  65       return r + 10;
  66
  67     radix = 10;
  68   }
  69
  70   r = cdigit - '0';
  71   if (r < radix)
  72     return r;
  73
  74   return -1U;
  75 }
  76
  77
  78 void APInt::initSlowCase(uint64_t val, bool isSigned) {
  79   U.pVal = getClearedMemory(getNumWords());
  80   U.pVal[0] = val;
  81   if (isSigned && int64_t(val) < 0)
  82     for (unsigned i = 1; i < getNumWords(); ++i)
  83       U.pVal[i] = WORD_MAX;
  84   clearUnusedBits();
  85 }
  86
  87 void APInt::initSlowCase(const APInt& that) {
  88   U.pVal = getMemory(getNumWords());
  89   memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
  90 }
  91
  92 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
  93   assert(BitWidth && "Bitwidth too small");
  94   assert(bigVal.data() && "Null pointer detected!");
  95   if (isSingleWord())
  96     U.VAL = bigVal[0];
  97   else {
  98     // Get memory, cleared to 0
  99     U.pVal = getClearedMemory(getNumWords());
 100     // Calculate the number of words to copy
 101     unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
 102     // Copy the words from bigVal to pVal
 103     memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
 104   }
 105   // Make sure unused high bits are cleared
 106   clearUnusedBits();
 107 }
 108
 109 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
 110   : BitWidth(numBits) {
 111   initFromArray(bigVal);
 112 }
 113
 114 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
 115   : BitWidth(numBits) {
 116   initFromArray(makeArrayRef(bigVal, numWords));
 117 }
 118
 119 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
 120   : BitWidth(numbits) {
 121   assert(BitWidth && "Bitwidth too small");
 122   fromString(numbits, Str, radix);
 123 }
 124
 125 void APInt::AssignSlowCase(const APInt& RHS) {
 126   // Don't do anything for X = X
 127   if (this == &RHS)
 128     return;
 129
 130   if (BitWidth == RHS.getBitWidth()) {
 131     // assume same bit-width single-word case is already handled
 132     assert(!isSingleWord());
 133     memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
 134     return;
 135   }
 136
 137   if (isSingleWord()) {
 138     // assume case where both are single words is already handled
 139     assert(!RHS.isSingleWord());
 140     U.pVal = getMemory(RHS.getNumWords());
 141     memcpy(U.pVal, RHS.U.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
 142   } else if (getNumWords() == RHS.getNumWords())
 143     memcpy(U.pVal, RHS.U.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
 144   else if (RHS.isSingleWord()) {
 145     delete [] U.pVal;
 146     U.VAL = RHS.U.VAL;
 147   } else {
 148     delete [] U.pVal;
 149     U.pVal = getMemory(RHS.getNumWords());
 150     memcpy(U.pVal, RHS.U.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
 151   }
 152   BitWidth = RHS.BitWidth;
 153   clearUnusedBits();
 154 }
 155
 156 /// This method 'profiles' an APInt for use with FoldingSet.
 157 void APInt::Profile(FoldingSetNodeID& ID) const {
 158   ID.AddInteger(BitWidth);
 159
 160   if (isSingleWord()) {
 161     ID.AddInteger(U.VAL);
 162     return;
 163   }
 164
 165   unsigned NumWords = getNumWords();
 166   for (unsigned i = 0; i < NumWords; ++i)
 167     ID.AddInteger(U.pVal[i]);
 168 }
 169
 170 /// @brief Prefix increment operator. Increments the APInt by one.
 171 APInt& APInt::operator++() {
 172   if (isSingleWord())
 173     ++U.VAL;
 174   else
 175     tcIncrement(U.pVal, getNumWords());
 176   return clearUnusedBits();
 177 }
 178
 179 /// @brief Prefix decrement operator. Decrements the APInt by one.
 180 APInt& APInt::operator--() {
 181   if (isSingleWord())
 182     --U.VAL;
 183   else
 184     tcDecrement(U.pVal, getNumWords());
 185   return clearUnusedBits();
 186 }
 187
 188 /// Adds the RHS APint to this APInt.
 189 /// @returns this, after addition of RHS.
 190 /// @brief Addition assignment operator.
 191 APInt& APInt::operator+=(const APInt& RHS) {
 192   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 193   if (isSingleWord())
 194     U.VAL += RHS.U.VAL;
 195   else
 196     tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
 197   return clearUnusedBits();
 198 }
 199
 200 APInt& APInt::operator+=(uint64_t RHS) {
 201   if (isSingleWord())
 202     U.VAL += RHS;
 203   else
 204     tcAddPart(U.pVal, RHS, getNumWords());
 205   return clearUnusedBits();
 206 }
 207
 208 /// Subtracts the RHS APInt from this APInt
 209 /// @returns this, after subtraction
 210 /// @brief Subtraction assignment operator.
 211 APInt& APInt::operator-=(const APInt& RHS) {
 212   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 213   if (isSingleWord())
 214     U.VAL -= RHS.U.VAL;
 215   else
 216     tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
 217   return clearUnusedBits();
 218 }
 219
 220 APInt& APInt::operator-=(uint64_t RHS) {
 221   if (isSingleWord())
 222     U.VAL -= RHS;
 223   else
 224     tcSubtractPart(U.pVal, RHS, getNumWords());
 225   return clearUnusedBits();
 226 }
 227
 228 /// Multiplies an integer array, x, by a uint64_t integer and places the result
 229 /// into dest.
 230 /// @returns the carry out of the multiplication.
 231 /// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
 232 static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
 233   // Split y into high 32-bit part (hy)  and low 32-bit part (ly)
 234   uint64_t ly = y & 0xffffffffULL, hy = y >> 32;
 235   uint64_t carry = 0;
 236
 237   // For each digit of x.
 238   for (unsigned i = 0; i < len; ++i) {
 239     // Split x into high and low words
 240     uint64_t lx = x[i] & 0xffffffffULL;
 241     uint64_t hx = x[i] >> 32;
 242     // hasCarry - A flag to indicate if there is a carry to the next digit.
 243     // hasCarry == 0, no carry
 244     // hasCarry == 1, has carry
 245     // hasCarry == 2, no carry and the calculation result == 0.
 246     uint8_t hasCarry = 0;
 247     dest[i] = carry + lx * ly;
 248     // Determine if the add above introduces carry.
 249     hasCarry = (dest[i] < carry) ? 1 : 0;
 250     carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0);
 251     // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
 252     // (2^32 - 1) + 2^32 = 2^64.
 253     hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
 254
 255     carry += (lx * hy) & 0xffffffffULL;
 256     dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL);
 257     carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) +
 258             (carry >> 32) + ((lx * hy) >> 32) + hx * hy;
 259   }
 260   return carry;
 261 }
 262
 263 /// Multiplies integer array x by integer array y and stores the result into
 264 /// the integer array dest. Note that dest's size must be >= xlen + ylen.
 265 /// @brief Generalized multiplication of integer arrays.
 266 static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[],
 267                 unsigned ylen) {
 268   dest[xlen] = mul_1(dest, x, xlen, y[0]);
 269   for (unsigned i = 1; i < ylen; ++i) {
 270     uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32;
 271     uint64_t carry = 0, lx = 0, hx = 0;
 272     for (unsigned j = 0; j < xlen; ++j) {
 273       lx = x[j] & 0xffffffffULL;
 274       hx = x[j] >> 32;
 275       // hasCarry - A flag to indicate if has carry.
 276       // hasCarry == 0, no carry
 277       // hasCarry == 1, has carry
 278       // hasCarry == 2, no carry and the calculation result == 0.
 279       uint8_t hasCarry = 0;
 280       uint64_t resul = carry + lx * ly;
 281       hasCarry = (resul < carry) ? 1 : 0;
 282       carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32);
 283       hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
 284
 285       carry += (lx * hy) & 0xffffffffULL;
 286       resul = (carry << 32) | (resul & 0xffffffffULL);
 287       dest[i+j] += resul;
 288       carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+
 289               (carry >> 32) + (dest[i+j] < resul ? 1 : 0) +
 290               ((lx * hy) >> 32) + hx * hy;
 291     }
 292     dest[i+xlen] = carry;
 293   }
 294 }
 295
 296 APInt& APInt::operator*=(const APInt& RHS) {
 297   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 298   if (isSingleWord()) {
 299     U.VAL *= RHS.U.VAL;
 300     clearUnusedBits();
 301     return *this;
 302   }
 303
 304   // Get some bit facts about LHS and check for zero
 305   unsigned lhsBits = getActiveBits();
 306   unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
 307   if (!lhsWords)
 308     // 0 * X ===> 0
 309     return *this;
 310
 311   // Get some bit facts about RHS and check for zero
 312   unsigned rhsBits = RHS.getActiveBits();
 313   unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
 314   if (!rhsWords) {
 315     // X * 0 ===> 0
 316     clearAllBits();
 317     return *this;
 318   }
 319
 320   // Allocate space for the result
 321   unsigned destWords = rhsWords + lhsWords;
 322   uint64_t *dest = getMemory(destWords);
 323
 324   // Perform the long multiply
 325   mul(dest, U.pVal, lhsWords, RHS.U.pVal, rhsWords);
 326
 327   // Copy result back into *this
 328   clearAllBits();
 329   unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
 330   memcpy(U.pVal, dest, wordsToCopy * APINT_WORD_SIZE);
 331   clearUnusedBits();
 332
 333   // delete dest array and return
 334   delete[] dest;
 335   return *this;
 336 }
 337
 338 void APInt::AndAssignSlowCase(const APInt& RHS) {
 339   tcAnd(U.pVal, RHS.U.pVal, getNumWords());
 340 }
 341
 342 void APInt::OrAssignSlowCase(const APInt& RHS) {
 343   tcOr(U.pVal, RHS.U.pVal, getNumWords());
 344 }
 345
 346 void APInt::XorAssignSlowCase(const APInt& RHS) {
 347   tcXor(U.pVal, RHS.U.pVal, getNumWords());
 348 }
 349
 350 APInt APInt::operator*(const APInt& RHS) const {
 351   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 352   if (isSingleWord())
 353     return APInt(BitWidth, U.VAL * RHS.U.VAL);
 354   APInt Result(*this);
 355   Result *= RHS;
 356   return Result;
 357 }
 358
 359 bool APInt::EqualSlowCase(const APInt& RHS) const {
 360   return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
 361 }
 362
 363 int APInt::compare(const APInt& RHS) const {
 364   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
 365   if (isSingleWord())
 366     return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
 367
 368   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
 369 }
 370
 371 int APInt::compareSigned(const APInt& RHS) const {
 372   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
 373   if (isSingleWord()) {
 374     int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
 375     int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
 376     return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
 377   }
 378
 379   bool lhsNeg = isNegative();
 380   bool rhsNeg = RHS.isNegative();
 381
 382   // If the sign bits don't match, then (LHS < RHS) if LHS is negative
 383   if (lhsNeg != rhsNeg)
 384     return lhsNeg ? -1 : 1;
 385
 386   // Otherwise we can just use an unsigned comparison, because even negative
 387   // numbers compare correctly this way if both have the same signed-ness.
 388   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
 389 }
 390
 391 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
 392   unsigned loWord = whichWord(loBit);
 393   unsigned hiWord = whichWord(hiBit);
 394
 395   // Create an initial mask for the low word with zeros below loBit.
 396   uint64_t loMask = WORD_MAX << whichBit(loBit);
 397
 398   // If hiBit is not aligned, we need a high mask.
 399   unsigned hiShiftAmt = whichBit(hiBit);
 400   if (hiShiftAmt != 0) {
 401     // Create a high mask with zeros above hiBit.
 402     uint64_t hiMask = WORD_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
 403     // If loWord and hiWord are equal, then we combine the masks. Otherwise,
 404     // set the bits in hiWord.
 405     if (hiWord == loWord)
 406       loMask &= hiMask;
 407     else
 408       U.pVal[hiWord] |= hiMask;
 409   }
 410   // Apply the mask to the low word.
 411   U.pVal[loWord] |= loMask;
 412
 413   // Fill any words between loWord and hiWord with all ones.
 414   for (unsigned word = loWord + 1; word < hiWord; ++word)
 415     U.pVal[word] = WORD_MAX;
 416 }
 417
 418 /// @brief Toggle every bit to its opposite value.
 419 void APInt::flipAllBitsSlowCase() {
 420   tcComplement(U.pVal, getNumWords());
 421   clearUnusedBits();
 422 }
 423
 424 /// Toggle a given bit to its opposite value whose position is given
 425 /// as "bitPosition".
 426 /// @brief Toggles a given bit to its opposite value.
 427 void APInt::flipBit(unsigned bitPosition) {
 428   assert(bitPosition < BitWidth && "Out of the bit-width range!");
 429   if ((*this)[bitPosition]) clearBit(bitPosition);
 430   else setBit(bitPosition);
 431 }
 432
 433 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
 434   unsigned subBitWidth = subBits.getBitWidth();
 435   assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
 436          "Illegal bit insertion");
 437
 438   // Insertion is a direct copy.
 439   if (subBitWidth == BitWidth) {
 440     *this = subBits;
 441     return;
 442   }
 443
 444   // Single word result can be done as a direct bitmask.
 445   if (isSingleWord()) {
 446     uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
 447     U.VAL &= ~(mask << bitPosition);
 448     U.VAL |= (subBits.U.VAL << bitPosition);
 449     return;
 450   }
 451
 452   unsigned loBit = whichBit(bitPosition);
 453   unsigned loWord = whichWord(bitPosition);
 454   unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
 455
 456   // Insertion within a single word can be done as a direct bitmask.
 457   if (loWord == hi1Word) {
 458     uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
 459     U.pVal[loWord] &= ~(mask << loBit);
 460     U.pVal[loWord] |= (subBits.U.VAL << loBit);
 461     return;
 462   }
 463
 464   // Insert on word boundaries.
 465   if (loBit == 0) {
 466     // Direct copy whole words.
 467     unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
 468     memcpy(U.pVal + loWord, subBits.getRawData(),
 469            numWholeSubWords * APINT_WORD_SIZE);
 470
 471     // Mask+insert remaining bits.
 472     unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
 473     if (remainingBits != 0) {
 474       uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - remainingBits);
 475       U.pVal[hi1Word] &= ~mask;
 476       U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
 477     }
 478     return;
 479   }
 480
 481   // General case - set/clear individual bits in dst based on src.
 482   // TODO - there is scope for optimization here, but at the moment this code
 483   // path is barely used so prefer readability over performance.
 484   for (unsigned i = 0; i != subBitWidth; ++i) {
 485     if (subBits[i])
 486       setBit(bitPosition + i);
 487     else
 488       clearBit(bitPosition + i);
 489   }
 490 }
 491
 492 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
 493   assert(numBits > 0 && "Can't extract zero bits");
 494   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
 495          "Illegal bit extraction");
 496
 497   if (isSingleWord())
 498     return APInt(numBits, U.VAL >> bitPosition);
 499
 500   unsigned loBit = whichBit(bitPosition);
 501   unsigned loWord = whichWord(bitPosition);
 502   unsigned hiWord = whichWord(bitPosition + numBits - 1);
 503
 504   // Single word result extracting bits from a single word source.
 505   if (loWord == hiWord)
 506     return APInt(numBits, U.pVal[loWord] >> loBit);
 507
 508   // Extracting bits that start on a source word boundary can be done
 509   // as a fast memory copy.
 510   if (loBit == 0)
 511     return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
 512
 513   // General case - shift + copy source words directly into place.
 514   APInt Result(numBits, 0);
 515   unsigned NumSrcWords = getNumWords();
 516   unsigned NumDstWords = Result.getNumWords();
 517
 518   for (unsigned word = 0; word < NumDstWords; ++word) {
 519     uint64_t w0 = U.pVal[loWord + word];
 520     uint64_t w1 =
 521         (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
 522     Result.U.pVal[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
 523   }
 524
 525   return Result.clearUnusedBits();
 526 }
 527
 528 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
 529   assert(!str.empty() && "Invalid string length");
 530   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
 531           radix == 36) &&
 532          "Radix should be 2, 8, 10, 16, or 36!");
 533
 534   size_t slen = str.size();
 535
 536   // Each computation below needs to know if it's negative.
 537   StringRef::iterator p = str.begin();
 538   unsigned isNegative = *p == '-';
 539   if (*p == '-' || *p == '+') {
 540     p++;
 541     slen--;
 542     assert(slen && "String is only a sign, needs a value.");
 543   }
 544
 545   // For radixes of power-of-two values, the bits required is accurately and
 546   // easily computed
 547   if (radix == 2)
 548     return slen + isNegative;
 549   if (radix == 8)
 550     return slen * 3 + isNegative;
 551   if (radix == 16)
 552     return slen * 4 + isNegative;
 553
 554   // FIXME: base 36
 555
 556   // This is grossly inefficient but accurate. We could probably do something
 557   // with a computation of roughly slen*64/20 and then adjust by the value of
 558   // the first few digits. But, I'm not sure how accurate that could be.
 559
 560   // Compute a sufficient number of bits that is always large enough but might
 561   // be too large. This avoids the assertion in the constructor. This
 562   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
 563   // bits in that case.
 564   unsigned sufficient
 565     = radix == 10? (slen == 1 ? 4 : slen * 64/18)
 566                  : (slen == 1 ? 7 : slen * 16/3);
 567
 568   // Convert to the actual binary value.
 569   APInt tmp(sufficient, StringRef(p, slen), radix);
 570
 571   // Compute how many bits are required. If the log is infinite, assume we need
 572   // just bit.
 573   unsigned log = tmp.logBase2();
 574   if (log == (unsigned)-1) {
 575     return isNegative + 1;
 576   } else {
 577     return isNegative + log + 1;
 578   }
 579 }
 580
 581 hash_code llvm::hash_value(const APInt &Arg) {
 582   if (Arg.isSingleWord())
 583     return hash_combine(Arg.U.VAL);
 584
 585   return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords());
 586 }
 587
 588 bool APInt::isSplat(unsigned SplatSizeInBits) const {
 589   assert(getBitWidth() % SplatSizeInBits == 0 &&
 590          "SplatSizeInBits must divide width!");
 591   // We can check that all parts of an integer are equal by making use of a
 592   // little trick: rotate and check if it's still the same value.
 593   return *this == rotl(SplatSizeInBits);
 594 }
 595
 596 /// This function returns the high "numBits" bits of this APInt.
 597 APInt APInt::getHiBits(unsigned numBits) const {
 598   return this->lshr(BitWidth - numBits);
 599 }
 600
 601 /// This function returns the low "numBits" bits of this APInt.
 602 APInt APInt::getLoBits(unsigned numBits) const {
 603   APInt Result(getLowBitsSet(BitWidth, numBits));
 604   Result &= *this;
 605   return Result;
 606 }
 607
 608 /// Return a value containing V broadcasted over NewLen bits.
 609 APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
 610   assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
 611
 612   APInt Val = V.zextOrSelf(NewLen);
 613   for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
 614     Val |= Val << I;
 615
 616   return Val;
 617 }
 618
 619 unsigned APInt::countLeadingZerosSlowCase() const {
 620   unsigned Count = 0;
 621   for (int i = getNumWords()-1; i >= 0; --i) {
 622     uint64_t V = U.pVal[i];
 623     if (V == 0)
 624       Count += APINT_BITS_PER_WORD;
 625     else {
 626       Count += llvm::countLeadingZeros(V);
 627       break;
 628     }
 629   }
 630   // Adjust for unused bits in the most significant word (they are zero).
 631   unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
 632   Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
 633   return Count;
 634 }
 635
 636 unsigned APInt::countLeadingOnes() const {
 637   if (isSingleWord())
 638     return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth));
 639
 640   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
 641   unsigned shift;
 642   if (!highWordBits) {
 643     highWordBits = APINT_BITS_PER_WORD;
 644     shift = 0;
 645   } else {
 646     shift = APINT_BITS_PER_WORD - highWordBits;
 647   }
 648   int i = getNumWords() - 1;
 649   unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
 650   if (Count == highWordBits) {
 651     for (i--; i >= 0; --i) {
 652       if (U.pVal[i] == WORD_MAX)
 653         Count += APINT_BITS_PER_WORD;
 654       else {
 655         Count += llvm::countLeadingOnes(U.pVal[i]);
 656         break;
 657       }
 658     }
 659   }
 660   return Count;
 661 }
 662
 663 unsigned APInt::countTrailingZeros() const {
 664   if (isSingleWord())
 665     return std::min(unsigned(llvm::countTrailingZeros(U.VAL)), BitWidth);
 666   unsigned Count = 0;
 667   unsigned i = 0;
 668   for (; i < getNumWords() && U.pVal[i] == 0; ++i)
 669     Count += APINT_BITS_PER_WORD;
 670   if (i < getNumWords())
 671     Count += llvm::countTrailingZeros(U.pVal[i]);
 672   return std::min(Count, BitWidth);
 673 }
 674
 675 unsigned APInt::countTrailingOnesSlowCase() const {
 676   unsigned Count = 0;
 677   unsigned i = 0;
 678   for (; i < getNumWords() && U.pVal[i] == WORD_MAX; ++i)
 679     Count += APINT_BITS_PER_WORD;
 680   if (i < getNumWords())
 681     Count += llvm::countTrailingOnes(U.pVal[i]);
 682   assert(Count <= BitWidth);
 683   return Count;
 684 }
 685
 686 unsigned APInt::countPopulationSlowCase() const {
 687   unsigned Count = 0;
 688   for (unsigned i = 0; i < getNumWords(); ++i)
 689     Count += llvm::countPopulation(U.pVal[i]);
 690   return Count;
 691 }
 692
 693 bool APInt::intersectsSlowCase(const APInt &RHS) const {
 694   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
 695     if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
 696       return true;
 697
 698   return false;
 699 }
 700
 701 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
 702   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
 703     if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
 704       return false;
 705
 706   return true;
 707 }
 708
 709 APInt APInt::byteSwap() const {
 710   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
 711   if (BitWidth == 16)
 712     return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
 713   if (BitWidth == 32)
 714     return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
 715   if (BitWidth == 48) {
 716     unsigned Tmp1 = unsigned(U.VAL >> 16);
 717     Tmp1 = ByteSwap_32(Tmp1);
 718     uint16_t Tmp2 = uint16_t(U.VAL);
 719     Tmp2 = ByteSwap_16(Tmp2);
 720     return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
 721   }
 722   if (BitWidth == 64)
 723     return APInt(BitWidth, ByteSwap_64(U.VAL));
 724
 725   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
 726   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
 727     Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
 728   if (Result.BitWidth != BitWidth) {
 729     Result.lshrInPlace(Result.BitWidth - BitWidth);
 730     Result.BitWidth = BitWidth;
 731   }
 732   return Result;
 733 }
 734
 735 APInt APInt::reverseBits() const {
 736   switch (BitWidth) {
 737   case 64:
 738     return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
 739   case 32:
 740     return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
 741   case 16:
 742     return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
 743   case 8:
 744     return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
 745   default:
 746     break;
 747   }
 748
 749   APInt Val(*this);
 750   APInt Reversed(BitWidth, 0);
 751   unsigned S = BitWidth;
 752
 753   for (; Val != 0; Val.lshrInPlace(1)) {
 754     Reversed <<= 1;
 755     Reversed |= Val[0];
 756     --S;
 757   }
 758
 759   Reversed <<= S;
 760   return Reversed;
 761 }
 762
 763 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
 764   // Fast-path a common case.
 765   if (A == B) return A;
 766
 767   // Corner cases: if either operand is zero, the other is the gcd.
 768   if (!A) return B;
 769   if (!B) return A;
 770
 771   // Count common powers of 2 and remove all other powers of 2.
 772   unsigned Pow2;
 773   {
 774     unsigned Pow2_A = A.countTrailingZeros();
 775     unsigned Pow2_B = B.countTrailingZeros();
 776     if (Pow2_A > Pow2_B) {
 777       A.lshrInPlace(Pow2_A - Pow2_B);
 778       Pow2 = Pow2_B;
 779     } else if (Pow2_B > Pow2_A) {
 780       B.lshrInPlace(Pow2_B - Pow2_A);
 781       Pow2 = Pow2_A;
 782     } else {
 783       Pow2 = Pow2_A;
 784     }
 785   }
 786
 787   // Both operands are odd multiples of 2^Pow_2:
 788   //
 789   //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
 790   //
 791   // This is a modified version of Stein's algorithm, taking advantage of
 792   // efficient countTrailingZeros().
 793   while (A != B) {
 794     if (A.ugt(B)) {
 795       A -= B;
 796       A.lshrInPlace(A.countTrailingZeros() - Pow2);
 797     } else {
 798       B -= A;
 799       B.lshrInPlace(B.countTrailingZeros() - Pow2);
 800     }
 801   }
 802
 803   return A;
 804 }
 805
 806 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
 807   union {
 808     double D;
 809     uint64_t I;
 810   } T;
 811   T.D = Double;
 812
 813   // Get the sign bit from the highest order bit
 814   bool isNeg = T.I >> 63;
 815
 816   // Get the 11-bit exponent and adjust for the 1023 bit bias
 817   int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
 818
 819   // If the exponent is negative, the value is < 0 so just return 0.
 820   if (exp < 0)
 821     return APInt(width, 0u);
 822
 823   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
 824   uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
 825
 826   // If the exponent doesn't shift all bits out of the mantissa
 827   if (exp < 52)
 828     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
 829                     APInt(width, mantissa >> (52 - exp));
 830
 831   // If the client didn't provide enough bits for us to shift the mantissa into
 832   // then the result is undefined, just return 0
 833   if (width <= exp - 52)
 834     return APInt(width, 0);
 835
 836   // Otherwise, we have to shift the mantissa bits up to the right location
 837   APInt Tmp(width, mantissa);
 838   Tmp <<= (unsigned)exp - 52;
 839   return isNeg ? -Tmp : Tmp;
 840 }
 841
 842 /// This function converts this APInt to a double.
 843 /// The layout for double is as following (IEEE Standard 754):
 844 ///  --------------------------------------
 845 /// |  Sign    Exponent    Fraction    Bias |
 846 /// |-------------------------------------- |
 847 /// |  1[63]   11[62-52]   52[51-00]   1023 |
 848 ///  --------------------------------------
 849 double APInt::roundToDouble(bool isSigned) const {
 850
 851   // Handle the simple case where the value is contained in one uint64_t.
 852   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
 853   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
 854     if (isSigned) {
 855       int64_t sext = SignExtend64(getWord(0), BitWidth);
 856       return double(sext);
 857     } else
 858       return double(getWord(0));
 859   }
 860
 861   // Determine if the value is negative.
 862   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
 863
 864   // Construct the absolute value if we're negative.
 865   APInt Tmp(isNeg ? -(*this) : (*this));
 866
 867   // Figure out how many bits we're using.
 868   unsigned n = Tmp.getActiveBits();
 869
 870   // The exponent (without bias normalization) is just the number of bits
 871   // we are using. Note that the sign bit is gone since we constructed the
 872   // absolute value.
 873   uint64_t exp = n;
 874
 875   // Return infinity for exponent overflow
 876   if (exp > 1023) {
 877     if (!isSigned || !isNeg)
 878       return std::numeric_limits<double>::infinity();
 879     else
 880       return -std::numeric_limits<double>::infinity();
 881   }
 882   exp += 1023; // Increment for 1023 bias
 883
 884   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
 885   // extract the high 52 bits from the correct words in pVal.
 886   uint64_t mantissa;
 887   unsigned hiWord = whichWord(n-1);
 888   if (hiWord == 0) {
 889     mantissa = Tmp.U.pVal[0];
 890     if (n > 52)
 891       mantissa >>= n - 52; // shift down, we want the top 52 bits.
 892   } else {
 893     assert(hiWord > 0 && "huh?");
 894     uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
 895     uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
 896     mantissa = hibits | lobits;
 897   }
 898
 899   // The leading bit of mantissa is implicit, so get rid of it.
 900   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
 901   union {
 902     double D;
 903     uint64_t I;
 904   } T;
 905   T.I = sign | (exp << 52) | mantissa;
 906   return T.D;
 907 }
 908
 909 // Truncate to new width.
 910 APInt APInt::trunc(unsigned width) const {
 911   assert(width < BitWidth && "Invalid APInt Truncate request");
 912   assert(width && "Can't truncate to 0 bits");
 913
 914   if (width <= APINT_BITS_PER_WORD)
 915     return APInt(width, getRawData()[0]);
 916
 917   APInt Result(getMemory(getNumWords(width)), width);
 918
 919   // Copy full words.
 920   unsigned i;
 921   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
 922     Result.U.pVal[i] = U.pVal[i];
 923
 924   // Truncate and copy any partial word.
 925   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
 926   if (bits != 0)
 927     Result.U.pVal[i] = U.pVal[i] << bits >> bits;
 928
 929   return Result;
 930 }
 931
 932 // Sign extend to a new width.
 933 APInt APInt::sext(unsigned Width) const {
 934   assert(Width > BitWidth && "Invalid APInt SignExtend request");
 935
 936   if (Width <= APINT_BITS_PER_WORD)
 937     return APInt(Width, SignExtend64(U.VAL, BitWidth));
 938
 939   APInt Result(getMemory(getNumWords(Width)), Width);
 940
 941   // Copy words.
 942   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
 943
 944   // Sign extend the last word since there may be unused bits in the input.
 945   Result.U.pVal[getNumWords() - 1] =
 946       SignExtend64(Result.U.pVal[getNumWords() - 1],
 947                    ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
 948
 949   // Fill with sign bits.
 950   std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
 951               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
 952   Result.clearUnusedBits();
 953   return Result;
 954 }
 955
 956 //  Zero extend to a new width.
 957 APInt APInt::zext(unsigned width) const {
 958   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
 959
 960   if (width <= APINT_BITS_PER_WORD)
 961     return APInt(width, U.VAL);
 962
 963   APInt Result(getMemory(getNumWords(width)), width);
 964
 965   // Copy words.
 966   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
 967
 968   // Zero remaining words.
 969   std::memset(Result.U.pVal + getNumWords(), 0,
 970               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
 971
 972   return Result;
 973 }
 974
 975 APInt APInt::zextOrTrunc(unsigned width) const {
 976   if (BitWidth < width)
 977     return zext(width);
 978   if (BitWidth > width)
 979     return trunc(width);
 980   return *this;
 981 }
 982
 983 APInt APInt::sextOrTrunc(unsigned width) const {
 984   if (BitWidth < width)
 985     return sext(width);
 986   if (BitWidth > width)
 987     return trunc(width);
 988   return *this;
 989 }
 990
 991 APInt APInt::zextOrSelf(unsigned width) const {
 992   if (BitWidth < width)
 993     return zext(width);
 994   return *this;
 995 }
 996
 997 APInt APInt::sextOrSelf(unsigned width) const {
 998   if (BitWidth < width)
 999     return sext(width);
1000   return *this;
1001 }
1002
1003 /// Arithmetic right-shift this APInt by shiftAmt.
1004 /// @brief Arithmetic right-shift function.
1005 void APInt::ashrInPlace(const APInt &shiftAmt) {
1006   ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
1007 }
1008
1009 /// Arithmetic right-shift this APInt by shiftAmt.
1010 /// @brief Arithmetic right-shift function.
1011 void APInt::ashrSlowCase(unsigned ShiftAmt) {
1012   // Don't bother performing a no-op shift.
1013   if (!ShiftAmt)
1014     return;
1015
1016   // Save the original sign bit for later.
1017   bool Negative = isNegative();
1018
1019   // WordShift is the inter-part shift; BitShift is is intra-part shift.
1020   unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
1021   unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
1022
1023   unsigned WordsToMove = getNumWords() - WordShift;
1024   if (WordsToMove != 0) {
1025     // Sign extend the last word to fill in the unused bits.
1026     U.pVal[getNumWords() - 1] = SignExtend64(
1027         U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
1028
1029     // Fastpath for moving by whole words.
1030     if (BitShift == 0) {
1031       std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
1032     } else {
1033       // Move the words containing significant bits.
1034       for (unsigned i = 0; i != WordsToMove - 1; ++i)
1035         U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
1036                     (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
1037
1038       // Handle the last word which has no high bits to copy.
1039       U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
1040       // Sign extend one more time.
1041       U.pVal[WordsToMove - 1] =
1042           SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
1043     }
1044   }
1045
1046   // Fill in the remainder based on the original sign.
1047   std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
1048               WordShift * APINT_WORD_SIZE);
1049   clearUnusedBits();
1050 }
1051
1052 /// Logical right-shift this APInt by shiftAmt.
1053 /// @brief Logical right-shift function.
1054 void APInt::lshrInPlace(const APInt &shiftAmt) {
1055   lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
1056 }
1057
1058 /// Logical right-shift this APInt by shiftAmt.
1059 /// @brief Logical right-shift function.
1060 void APInt::lshrSlowCase(unsigned ShiftAmt) {
1061   tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
1062 }
1063
1064 /// Left-shift this APInt by shiftAmt.
1065 /// @brief Left-shift function.
1066 APInt &APInt::operator<<=(const APInt &shiftAmt) {
1067   // It's undefined behavior in C to shift by BitWidth or greater.
1068   *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
1069   return *this;
1070 }
1071
1072 void APInt::shlSlowCase(unsigned ShiftAmt) {
1073   tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
1074   clearUnusedBits();
1075 }
1076
1077 // Calculate the rotate amount modulo the bit width.
1078 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
1079   unsigned rotBitWidth = rotateAmt.getBitWidth();
1080   APInt rot = rotateAmt;
1081   if (rotBitWidth < BitWidth) {
1082     // Extend the rotate APInt, so that the urem doesn't divide by 0.
1083     // e.g. APInt(1, 32) would give APInt(1, 0).
1084     rot = rotateAmt.zext(BitWidth);
1085   }
1086   rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
1087   return rot.getLimitedValue(BitWidth);
1088 }
1089
1090 APInt APInt::rotl(const APInt &rotateAmt) const {
1091   return rotl(rotateModulo(BitWidth, rotateAmt));
1092 }
1093
1094 APInt APInt::rotl(unsigned rotateAmt) const {
1095   rotateAmt %= BitWidth;
1096   if (rotateAmt == 0)
1097     return *this;
1098   return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1099 }
1100
1101 APInt APInt::rotr(const APInt &rotateAmt) const {
1102   return rotr(rotateModulo(BitWidth, rotateAmt));
1103 }
1104
1105 APInt APInt::rotr(unsigned rotateAmt) const {
1106   rotateAmt %= BitWidth;
1107   if (rotateAmt == 0)
1108     return *this;
1109   return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1110 }
1111
1112 // Square Root - this method computes and returns the square root of "this".
1113 // Three mechanisms are used for computation. For small values (<= 5 bits),
1114 // a table lookup is done. This gets some performance for common cases. For
1115 // values using less than 52 bits, the value is converted to double and then
1116 // the libc sqrt function is called. The result is rounded and then converted
1117 // back to a uint64_t which is then used to construct the result. Finally,
1118 // the Babylonian method for computing square roots is used.
1119 APInt APInt::sqrt() const {
1120
1121   // Determine the magnitude of the value.
1122   unsigned magnitude = getActiveBits();
1123
1124   // Use a fast table for some small values. This also gets rid of some
1125   // rounding errors in libc sqrt for small values.
1126   if (magnitude <= 5) {
1127     static const uint8_t results[32] = {
1128       /*     0 */ 0,
1129       /*  1- 2 */ 1, 1,
1130       /*  3- 6 */ 2, 2, 2, 2,
1131       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1132       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1133       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1134       /*    31 */ 6
1135     };
1136     return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1137   }
1138
1139   // If the magnitude of the value fits in less than 52 bits (the precision of
1140   // an IEEE double precision floating point value), then we can use the
1141   // libc sqrt function which will probably use a hardware sqrt computation.
1142   // This should be faster than the algorithm below.
1143   if (magnitude < 52) {
1144     return APInt(BitWidth,
1145                  uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1146                                                                : U.pVal[0])))));
1147   }
1148
1149   // Okay, all the short cuts are exhausted. We must compute it. The following
1150   // is a classical Babylonian method for computing the square root. This code
1151   // was adapted to APInt from a wikipedia article on such computations.
1152   // See http://www.wikipedia.org/ and go to the page named
1153   // Calculate_an_integer_square_root.
1154   unsigned nbits = BitWidth, i = 4;
1155   APInt testy(BitWidth, 16);
1156   APInt x_old(BitWidth, 1);
1157   APInt x_new(BitWidth, 0);
1158   APInt two(BitWidth, 2);
1159
1160   // Select a good starting value using binary logarithms.
1161   for (;; i += 2, testy = testy.shl(2))
1162     if (i >= nbits || this->ule(testy)) {
1163       x_old = x_old.shl(i / 2);
1164       break;
1165     }
1166
1167   // Use the Babylonian method to arrive at the integer square root:
1168   for (;;) {
1169     x_new = (this->udiv(x_old) + x_old).udiv(two);
1170     if (x_old.ule(x_new))
1171       break;
1172     x_old = x_new;
1173   }
1174
1175   // Make sure we return the closest approximation
1176   // NOTE: The rounding calculation below is correct. It will produce an
1177   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1178   // determined to be a rounding issue with pari/gp as it begins to use a
1179   // floating point representation after 192 bits. There are no discrepancies
1180   // between this algorithm and pari/gp for bit widths < 192 bits.
1181   APInt square(x_old * x_old);
1182   APInt nextSquare((x_old + 1) * (x_old +1));
1183   if (this->ult(square))
1184     return x_old;
1185   assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1186   APInt midpoint((nextSquare - square).udiv(two));
1187   APInt offset(*this - square);
1188   if (offset.ult(midpoint))
1189     return x_old;
1190   return x_old + 1;
1191 }
1192
1193 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1194 /// iterative extended Euclidean algorithm is used to solve for this value,
1195 /// however we simplify it to speed up calculating only the inverse, and take
1196 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1197 /// (potentially large) APInts around.
1198 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1199   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1200
1201   // Using the properties listed at the following web page (accessed 06/21/08):
1202   //   http://www.numbertheory.org/php/euclid.html
1203   // (especially the properties numbered 3, 4 and 9) it can be proved that
1204   // BitWidth bits suffice for all the computations in the algorithm implemented
1205   // below. More precisely, this number of bits suffice if the multiplicative
1206   // inverse exists, but may not suffice for the general extended Euclidean
1207   // algorithm.
1208
1209   APInt r[2] = { modulo, *this };
1210   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1211   APInt q(BitWidth, 0);
1212
1213   unsigned i;
1214   for (i = 0; r[i^1] != 0; i ^= 1) {
1215     // An overview of the math without the confusing bit-flipping:
1216     // q = r[i-2] / r[i-1]
1217     // r[i] = r[i-2] % r[i-1]
1218     // t[i] = t[i-2] - t[i-1] * q
1219     udivrem(r[i], r[i^1], q, r[i]);
1220     t[i] -= t[i^1] * q;
1221   }
1222
1223   // If this APInt and the modulo are not coprime, there is no multiplicative
1224   // inverse, so return 0. We check this by looking at the next-to-last
1225   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1226   // algorithm.
1227   if (r[i] != 1)
1228     return APInt(BitWidth, 0);
1229
1230   // The next-to-last t is the multiplicative inverse.  However, we are
1231   // interested in a positive inverse. Calcuate a positive one from a negative
1232   // one if necessary. A simple addition of the modulo suffices because
1233   // abs(t[i]) is known to be less than *this/2 (see the link above).
1234   return t[i].isNegative() ? t[i] + modulo : t[i];
1235 }
1236
1237 /// Calculate the magic numbers required to implement a signed integer division
1238 /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1239 /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1240 /// Warren, Jr., chapter 10.
1241 APInt::ms APInt::magic() const {
1242   const APInt& d = *this;
1243   unsigned p;
1244   APInt ad, anc, delta, q1, r1, q2, r2, t;
1245   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1246   struct ms mag;
1247
1248   ad = d.abs();
1249   t = signedMin + (d.lshr(d.getBitWidth() - 1));
1250   anc = t - 1 - t.urem(ad);   // absolute value of nc
1251   p = d.getBitWidth() - 1;    // initialize p
1252   q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1253   r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1254   q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1255   r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1256   do {
1257     p = p + 1;
1258     q1 = q1<<1;          // update q1 = 2p/abs(nc)
1259     r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1260     if (r1.uge(anc)) {  // must be unsigned comparison
1261       q1 = q1 + 1;
1262       r1 = r1 - anc;
1263     }
1264     q2 = q2<<1;          // update q2 = 2p/abs(d)
1265     r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1266     if (r2.uge(ad)) {   // must be unsigned comparison
1267       q2 = q2 + 1;
1268       r2 = r2 - ad;
1269     }
1270     delta = ad - r2;
1271   } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1272
1273   mag.m = q2 + 1;
1274   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1275   mag.s = p - d.getBitWidth();          // resulting shift
1276   return mag;
1277 }
1278
1279 /// Calculate the magic numbers required to implement an unsigned integer
1280 /// division by a constant as a sequence of multiplies, adds and shifts.
1281 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1282 /// S. Warren, Jr., chapter 10.
1283 /// LeadingZeros can be used to simplify the calculation if the upper bits
1284 /// of the divided value are known zero.
1285 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1286   const APInt& d = *this;
1287   unsigned p;
1288   APInt nc, delta, q1, r1, q2, r2;
1289   struct mu magu;
1290   magu.a = 0;               // initialize "add" indicator
1291   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1292   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1293   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1294
1295   nc = allOnes - (allOnes - d).urem(d);
1296   p = d.getBitWidth() - 1;  // initialize p
1297   q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1298   r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1299   q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1300   r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1301   do {
1302     p = p + 1;
1303     if (r1.uge(nc - r1)) {
1304       q1 = q1 + q1 + 1;  // update q1
1305       r1 = r1 + r1 - nc; // update r1
1306     }
1307     else {
1308       q1 = q1+q1; // update q1
1309       r1 = r1+r1; // update r1
1310     }
1311     if ((r2 + 1).uge(d - r2)) {
1312       if (q2.uge(signedMax)) magu.a = 1;
1313       q2 = q2+q2 + 1;     // update q2
1314       r2 = r2+r2 + 1 - d; // update r2
1315     }
1316     else {
1317       if (q2.uge(signedMin)) magu.a = 1;
1318       q2 = q2+q2;     // update q2
1319       r2 = r2+r2 + 1; // update r2
1320     }
1321     delta = d - 1 - r2;
1322   } while (p < d.getBitWidth()*2 &&
1323            (q1.ult(delta) || (q1 == delta && r1 == 0)));
1324   magu.m = q2 + 1; // resulting magic number
1325   magu.s = p - d.getBitWidth();  // resulting shift
1326   return magu;
1327 }
1328
1329 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1330 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1331 /// variables here have the same names as in the algorithm. Comments explain
1332 /// the algorithm and any deviation from it.
1333 static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
1334                      unsigned m, unsigned n) {
1335   assert(u && "Must provide dividend");
1336   assert(v && "Must provide divisor");
1337   assert(q && "Must provide quotient");
1338   assert(u != v && u != q && v != q && "Must use different memory");
1339   assert(n>1 && "n must be > 1");
1340
1341   // b denotes the base of the number system. In our case b is 2^32.
1342   const uint64_t b = uint64_t(1) << 32;
1343
1344   DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1345   DEBUG(dbgs() << "KnuthDiv: original:");
1346   DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1347   DEBUG(dbgs() << " by");
1348   DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1349   DEBUG(dbgs() << '\n');
1350   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1351   // u and v by d. Note that we have taken Knuth's advice here to use a power
1352   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1353   // 2 allows us to shift instead of multiply and it is easy to determine the
1354   // shift amount from the leading zeros.  We are basically normalizing the u
1355   // and v so that its high bits are shifted to the top of v's range without
1356   // overflow. Note that this can require an extra word in u so that u must
1357   // be of length m+n+1.
1358   unsigned shift = countLeadingZeros(v[n-1]);
1359   unsigned v_carry = 0;
1360   unsigned u_carry = 0;
1361   if (shift) {
1362     for (unsigned i = 0; i < m+n; ++i) {
1363       unsigned u_tmp = u[i] >> (32 - shift);
1364       u[i] = (u[i] << shift) | u_carry;
1365       u_carry = u_tmp;
1366     }
1367     for (unsigned i = 0; i < n; ++i) {
1368       unsigned v_tmp = v[i] >> (32 - shift);
1369       v[i] = (v[i] << shift) | v_carry;
1370       v_carry = v_tmp;
1371     }
1372   }
1373   u[m+n] = u_carry;
1374
1375   DEBUG(dbgs() << "KnuthDiv:   normal:");
1376   DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1377   DEBUG(dbgs() << " by");
1378   DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1379   DEBUG(dbgs() << '\n');
1380
1381   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1382   int j = m;
1383   do {
1384     DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1385     // D3. [Calculate q'.].
1386     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1387     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1388     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1389     // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
1390     // on v[n-2] determines at high speed most of the cases in which the trial
1391     // value qp is one too large, and it eliminates all cases where qp is two
1392     // too large.
1393     uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
1394     DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1395     uint64_t qp = dividend / v[n-1];
1396     uint64_t rp = dividend % v[n-1];
1397     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1398       qp--;
1399       rp += v[n-1];
1400       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1401         qp--;
1402     }
1403     DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1404
1405     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1406     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1407     // consists of a simple multiplication by a one-place number, combined with
1408     // a subtraction.
1409     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1410     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1411     // true value plus b**(n+1), namely as the b's complement of
1412     // the true value, and a "borrow" to the left should be remembered.
1413     int64_t borrow = 0;
1414     for (unsigned i = 0; i < n; ++i) {
1415       uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1416       int64_t subres = int64_t(u[j+i]) - borrow - (unsigned)p;
1417       u[j+i] = (unsigned)subres;
1418       borrow = (p >> 32) - (subres >> 32);
1419       DEBUG(dbgs() << "KnuthDiv: u[j+i] = " << u[j+i]
1420                    << ", borrow = " << borrow << '\n');
1421     }
1422     bool isNeg = u[j+n] < borrow;
1423     u[j+n] -= (unsigned)borrow;
1424
1425     DEBUG(dbgs() << "KnuthDiv: after subtraction:");
1426     DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1427     DEBUG(dbgs() << '\n');
1428
1429     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1430     // negative, go to step D6; otherwise go on to step D7.
1431     q[j] = (unsigned)qp;
1432     if (isNeg) {
1433       // D6. [Add back]. The probability that this step is necessary is very
1434       // small, on the order of only 2/b. Make sure that test data accounts for
1435       // this possibility. Decrease q[j] by 1
1436       q[j]--;
1437       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1438       // A carry will occur to the left of u[j+n], and it should be ignored
1439       // since it cancels with the borrow that occurred in D4.
1440       bool carry = false;
1441       for (unsigned i = 0; i < n; i++) {
1442         unsigned limit = std::min(u[j+i],v[i]);
1443         u[j+i] += v[i] + carry;
1444         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1445       }
1446       u[j+n] += carry;
1447     }
1448     DEBUG(dbgs() << "KnuthDiv: after correction:");
1449     DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1450     DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1451
1452   // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1453   } while (--j >= 0);
1454
1455   DEBUG(dbgs() << "KnuthDiv: quotient:");
1456   DEBUG(for (int i = m; i >=0; i--) dbgs() <<" " << q[i]);
1457   DEBUG(dbgs() << '\n');
1458
1459   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1460   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1461   // compute the remainder (urem uses this).
1462   if (r) {
1463     // The value d is expressed by the "shift" value above since we avoided
1464     // multiplication by d by using a shift left. So, all we have to do is
1465     // shift right here.
1466     if (shift) {
1467       unsigned carry = 0;
1468       DEBUG(dbgs() << "KnuthDiv: remainder:");
1469       for (int i = n-1; i >= 0; i--) {
1470         r[i] = (u[i] >> shift) | carry;
1471         carry = u[i] << (32 - shift);
1472         DEBUG(dbgs() << " " << r[i]);
1473       }
1474     } else {
1475       for (int i = n-1; i >= 0; i--) {
1476         r[i] = u[i];
1477         DEBUG(dbgs() << " " << r[i]);
1478       }
1479     }
1480     DEBUG(dbgs() << '\n');
1481   }
1482   DEBUG(dbgs() << '\n');
1483 }
1484
1485 void APInt::divide(const APInt &LHS, unsigned lhsWords, const APInt &RHS,
1486                    unsigned rhsWords, APInt *Quotient, APInt *Remainder) {
1487   assert(lhsWords >= rhsWords && "Fractional result");
1488
1489   // First, compose the values into an array of 32-bit words instead of
1490   // 64-bit words. This is a necessity of both the "short division" algorithm
1491   // and the Knuth "classical algorithm" which requires there to be native
1492   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1493   // can't use 64-bit operands here because we don't have native results of
1494   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1495   // work on large-endian machines.
1496   uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT);
1497   unsigned n = rhsWords * 2;
1498   unsigned m = (lhsWords * 2) - n;
1499
1500   // Allocate space for the temporary values we need either on the stack, if
1501   // it will fit, or on the heap if it won't.
1502   unsigned SPACE[128];
1503   unsigned *U = nullptr;
1504   unsigned *V = nullptr;
1505   unsigned *Q = nullptr;
1506   unsigned *R = nullptr;
1507   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1508     U = &SPACE[0];
1509     V = &SPACE[m+n+1];
1510     Q = &SPACE[(m+n+1) + n];
1511     if (Remainder)
1512       R = &SPACE[(m+n+1) + n + (m+n)];
1513   } else {
1514     U = new unsigned[m + n + 1];
1515     V = new unsigned[n];
1516     Q = new unsigned[m+n];
1517     if (Remainder)
1518       R = new unsigned[n];
1519   }
1520
1521   // Initialize the dividend
1522   memset(U, 0, (m+n+1)*sizeof(unsigned));
1523   for (unsigned i = 0; i < lhsWords; ++i) {
1524     uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.U.VAL : LHS.U.pVal[i]);
1525     U[i * 2] = (unsigned)(tmp & mask);
1526     U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1527   }
1528   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1529
1530   // Initialize the divisor
1531   memset(V, 0, (n)*sizeof(unsigned));
1532   for (unsigned i = 0; i < rhsWords; ++i) {
1533     uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.U.VAL : RHS.U.pVal[i]);
1534     V[i * 2] = (unsigned)(tmp & mask);
1535     V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1536   }
1537
1538   // initialize the quotient and remainder
1539   memset(Q, 0, (m+n) * sizeof(unsigned));
1540   if (Remainder)
1541     memset(R, 0, n * sizeof(unsigned));
1542
1543   // Now, adjust m and n for the Knuth division. n is the number of words in
1544   // the divisor. m is the number of words by which the dividend exceeds the
1545   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1546   // contain any zero words or the Knuth algorithm fails.
1547   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1548     n--;
1549     m++;
1550   }
1551   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1552     m--;
1553
1554   // If we're left with only a single word for the divisor, Knuth doesn't work
1555   // so we implement the short division algorithm here. This is much simpler
1556   // and faster because we are certain that we can divide a 64-bit quantity
1557   // by a 32-bit quantity at hardware speed and short division is simply a
1558   // series of such operations. This is just like doing short division but we
1559   // are using base 2^32 instead of base 10.
1560   assert(n != 0 && "Divide by zero?");
1561   if (n == 1) {
1562     unsigned divisor = V[0];
1563     unsigned remainder = 0;
1564     for (int i = m+n-1; i >= 0; i--) {
1565       uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i];
1566       if (partial_dividend == 0) {
1567         Q[i] = 0;
1568         remainder = 0;
1569       } else if (partial_dividend < divisor) {
1570         Q[i] = 0;
1571         remainder = (unsigned)partial_dividend;
1572       } else if (partial_dividend == divisor) {
1573         Q[i] = 1;
1574         remainder = 0;
1575       } else {
1576         Q[i] = (unsigned)(partial_dividend / divisor);
1577         remainder = (unsigned)(partial_dividend - (Q[i] * divisor));
1578       }
1579     }
1580     if (R)
1581       R[0] = remainder;
1582   } else {
1583     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1584     // case n > 1.
1585     KnuthDiv(U, V, Q, R, m, n);
1586   }
1587
1588   // If the caller wants the quotient
1589   if (Quotient) {
1590     // Set up the Quotient value's memory.
1591     if (Quotient->BitWidth != LHS.BitWidth) {
1592       if (Quotient->isSingleWord())
1593         Quotient->U.VAL = 0;
1594       else
1595         delete [] Quotient->U.pVal;
1596       Quotient->BitWidth = LHS.BitWidth;
1597       if (!Quotient->isSingleWord())
1598         Quotient->U.pVal = getClearedMemory(Quotient->getNumWords());
1599     } else
1600       Quotient->clearAllBits();
1601
1602     // The quotient is in Q. Reconstitute the quotient into Quotient's low
1603     // order words.
1604     // This case is currently dead as all users of divide() handle trivial cases
1605     // earlier.
1606     if (lhsWords == 1) {
1607       uint64_t tmp =
1608         uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
1609       if (Quotient->isSingleWord())
1610         Quotient->U.VAL = tmp;
1611       else
1612         Quotient->U.pVal[0] = tmp;
1613     } else {
1614       assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
1615       for (unsigned i = 0; i < lhsWords; ++i)
1616         Quotient->U.pVal[i] =
1617           uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1618     }
1619   }
1620
1621   // If the caller wants the remainder
1622   if (Remainder) {
1623     // Set up the Remainder value's memory.
1624     if (Remainder->BitWidth != RHS.BitWidth) {
1625       if (Remainder->isSingleWord())
1626         Remainder->U.VAL = 0;
1627       else
1628         delete [] Remainder->U.pVal;
1629       Remainder->BitWidth = RHS.BitWidth;
1630       if (!Remainder->isSingleWord())
1631         Remainder->U.pVal = getClearedMemory(Remainder->getNumWords());
1632     } else
1633       Remainder->clearAllBits();
1634
1635     // The remainder is in R. Reconstitute the remainder into Remainder's low
1636     // order words.
1637     if (rhsWords == 1) {
1638       uint64_t tmp =
1639         uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
1640       if (Remainder->isSingleWord())
1641         Remainder->U.VAL = tmp;
1642       else
1643         Remainder->U.pVal[0] = tmp;
1644     } else {
1645       assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
1646       for (unsigned i = 0; i < rhsWords; ++i)
1647         Remainder->U.pVal[i] =
1648           uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1649     }
1650   }
1651
1652   // Clean up the memory we allocated.
1653   if (U != &SPACE[0]) {
1654     delete [] U;
1655     delete [] V;
1656     delete [] Q;
1657     delete [] R;
1658   }
1659 }
1660
1661 APInt APInt::udiv(const APInt& RHS) const {
1662   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1663
1664   // First, deal with the easy case
1665   if (isSingleWord()) {
1666     assert(RHS.U.VAL != 0 && "Divide by zero?");
1667     return APInt(BitWidth, U.VAL / RHS.U.VAL);
1668   }
1669
1670   // Get some facts about the LHS and RHS number of bits and words
1671   unsigned rhsBits = RHS.getActiveBits();
1672   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1673   assert(rhsWords && "Divided by zero???");
1674   unsigned lhsBits = this->getActiveBits();
1675   unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1676
1677   // Deal with some degenerate cases
1678   if (!lhsWords)
1679     // 0 / X ===> 0
1680     return APInt(BitWidth, 0);
1681   else if (lhsWords < rhsWords || this->ult(RHS)) {
1682     // X / Y ===> 0, iff X < Y
1683     return APInt(BitWidth, 0);
1684   } else if (*this == RHS) {
1685     // X / X ===> 1
1686     return APInt(BitWidth, 1);
1687   } else if (lhsWords == 1 && rhsWords == 1) {
1688     // All high words are zero, just use native divide
1689     return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1690   }
1691
1692   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1693   APInt Quotient(1,0); // to hold result.
1694   divide(*this, lhsWords, RHS, rhsWords, &Quotient, nullptr);
1695   return Quotient;
1696 }
1697
1698 APInt APInt::sdiv(const APInt &RHS) const {
1699   if (isNegative()) {
1700     if (RHS.isNegative())
1701       return (-(*this)).udiv(-RHS);
1702     return -((-(*this)).udiv(RHS));
1703   }
1704   if (RHS.isNegative())
1705     return -(this->udiv(-RHS));
1706   return this->udiv(RHS);
1707 }
1708
1709 APInt APInt::urem(const APInt& RHS) const {
1710   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1711   if (isSingleWord()) {
1712     assert(RHS.U.VAL != 0 && "Remainder by zero?");
1713     return APInt(BitWidth, U.VAL % RHS.U.VAL);
1714   }
1715
1716   // Get some facts about the LHS
1717   unsigned lhsBits = getActiveBits();
1718   unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
1719
1720   // Get some facts about the RHS
1721   unsigned rhsBits = RHS.getActiveBits();
1722   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1723   assert(rhsWords && "Performing remainder operation by zero ???");
1724
1725   // Check the degenerate cases
1726   if (lhsWords == 0) {
1727     // 0 % Y ===> 0
1728     return APInt(BitWidth, 0);
1729   } else if (lhsWords < rhsWords || this->ult(RHS)) {
1730     // X % Y ===> X, iff X < Y
1731     return *this;
1732   } else if (*this == RHS) {
1733     // X % X == 0;
1734     return APInt(BitWidth, 0);
1735   } else if (lhsWords == 1) {
1736     // All high words are zero, just use native remainder
1737     return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1738   }
1739
1740   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1741   APInt Remainder(1,0);
1742   divide(*this, lhsWords, RHS, rhsWords, nullptr, &Remainder);
1743   return Remainder;
1744 }
1745
1746 APInt APInt::srem(const APInt &RHS) const {
1747   if (isNegative()) {
1748     if (RHS.isNegative())
1749       return -((-(*this)).urem(-RHS));
1750     return -((-(*this)).urem(RHS));
1751   }
1752   if (RHS.isNegative())
1753     return this->urem(-RHS);
1754   return this->urem(RHS);
1755 }
1756
1757 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1758                     APInt &Quotient, APInt &Remainder) {
1759   assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1760
1761   // First, deal with the easy case
1762   if (LHS.isSingleWord()) {
1763     assert(RHS.U.VAL != 0 && "Divide by zero?");
1764     uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1765     uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1766     Quotient = APInt(LHS.BitWidth, QuotVal);
1767     Remainder = APInt(LHS.BitWidth, RemVal);
1768     return;
1769   }
1770
1771   // Get some size facts about the dividend and divisor
1772   unsigned lhsBits  = LHS.getActiveBits();
1773   unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1774   unsigned rhsBits  = RHS.getActiveBits();
1775   unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1776
1777   // Check the degenerate cases
1778   if (lhsWords == 0) {
1779     Quotient = 0;                // 0 / Y ===> 0
1780     Remainder = 0;               // 0 % Y ===> 0
1781     return;
1782   }
1783
1784   if (lhsWords < rhsWords || LHS.ult(RHS)) {
1785     Remainder = LHS;            // X % Y ===> X, iff X < Y
1786     Quotient = 0;               // X / Y ===> 0, iff X < Y
1787     return;
1788   }
1789
1790   if (LHS == RHS) {
1791     Quotient  = 1;              // X / X ===> 1
1792     Remainder = 0;              // X % X ===> 0;
1793     return;
1794   }
1795
1796   if (lhsWords == 1 && rhsWords == 1) {
1797     // There is only one word to consider so use the native versions.
1798     uint64_t lhsValue = LHS.isSingleWord() ? LHS.U.VAL : LHS.U.pVal[0];
1799     uint64_t rhsValue = RHS.isSingleWord() ? RHS.U.VAL : RHS.U.pVal[0];
1800     Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
1801     Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
1802     return;
1803   }
1804
1805   // Okay, lets do it the long way
1806   divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
1807 }
1808
1809 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1810                     APInt &Quotient, APInt &Remainder) {
1811   if (LHS.isNegative()) {
1812     if (RHS.isNegative())
1813       APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1814     else {
1815       APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1816       Quotient = -Quotient;
1817     }
1818     Remainder = -Remainder;
1819   } else if (RHS.isNegative()) {
1820     APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1821     Quotient = -Quotient;
1822   } else {
1823     APInt::udivrem(LHS, RHS, Quotient, Remainder);
1824   }
1825 }
1826
1827 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1828   APInt Res = *this+RHS;
1829   Overflow = isNonNegative() == RHS.isNonNegative() &&
1830              Res.isNonNegative() != isNonNegative();
1831   return Res;
1832 }
1833
1834 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1835   APInt Res = *this+RHS;
1836   Overflow = Res.ult(RHS);
1837   return Res;
1838 }
1839
1840 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1841   APInt Res = *this - RHS;
1842   Overflow = isNonNegative() != RHS.isNonNegative() &&
1843              Res.isNonNegative() != isNonNegative();
1844   return Res;
1845 }
1846
1847 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1848   APInt Res = *this-RHS;
1849   Overflow = Res.ugt(*this);
1850   return Res;
1851 }
1852
1853 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1854   // MININT/-1  -->  overflow.
1855   Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1856   return sdiv(RHS);
1857 }
1858
1859 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1860   APInt Res = *this * RHS;
1861
1862   if (*this != 0 && RHS != 0)
1863     Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1864   else
1865     Overflow = false;
1866   return Res;
1867 }
1868
1869 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1870   APInt Res = *this * RHS;
1871
1872   if (*this != 0 && RHS != 0)
1873     Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS;
1874   else
1875     Overflow = false;
1876   return Res;
1877 }
1878
1879 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1880   Overflow = ShAmt.uge(getBitWidth());
1881   if (Overflow)
1882     return APInt(BitWidth, 0);
1883
1884   if (isNonNegative()) // Don't allow sign change.
1885     Overflow = ShAmt.uge(countLeadingZeros());
1886   else
1887     Overflow = ShAmt.uge(countLeadingOnes());
1888
1889   return *this << ShAmt;
1890 }
1891
1892 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
1893   Overflow = ShAmt.uge(getBitWidth());
1894   if (Overflow)
1895     return APInt(BitWidth, 0);
1896
1897   Overflow = ShAmt.ugt(countLeadingZeros());
1898
1899   return *this << ShAmt;
1900 }
1901
1902
1903
1904
1905 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
1906   // Check our assumptions here
1907   assert(!str.empty() && "Invalid string length");
1908   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
1909           radix == 36) &&
1910          "Radix should be 2, 8, 10, 16, or 36!");
1911
1912   StringRef::iterator p = str.begin();
1913   size_t slen = str.size();
1914   bool isNeg = *p == '-';
1915   if (*p == '-' || *p == '+') {
1916     p++;
1917     slen--;
1918     assert(slen && "String is only a sign, needs a value.");
1919   }
1920   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
1921   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
1922   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
1923   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
1924          "Insufficient bit width");
1925
1926   // Allocate memory if needed
1927   if (isSingleWord())
1928     U.VAL = 0;
1929   else
1930     U.pVal = getClearedMemory(getNumWords());
1931
1932   // Figure out if we can shift instead of multiply
1933   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
1934
1935   // Set up an APInt for the radix multiplier outside the loop so we don't
1936   // constantly construct/destruct it.
1937   APInt apradix(getBitWidth(), radix);
1938
1939   // Enter digit traversal loop
1940   for (StringRef::iterator e = str.end(); p != e; ++p) {
1941     unsigned digit = getDigit(*p, radix);
1942     assert(digit < radix && "Invalid character in digit string");
1943
1944     // Shift or multiply the value by the radix
1945     if (slen > 1) {
1946       if (shift)
1947         *this <<= shift;
1948       else
1949         *this *= apradix;
1950     }
1951
1952     // Add in the digit we just interpreted
1953     *this += digit;
1954   }
1955   // If its negative, put it in two's complement form
1956   if (isNeg) {
1957     --(*this);
1958     this->flipAllBits();
1959   }
1960 }
1961
1962 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
1963                      bool Signed, bool formatAsCLiteral) const {
1964   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
1965           Radix == 36) &&
1966          "Radix should be 2, 8, 10, 16, or 36!");
1967
1968   const char *Prefix = "";
1969   if (formatAsCLiteral) {
1970     switch (Radix) {
1971       case 2:
1972         // Binary literals are a non-standard extension added in gcc 4.3:
1973         // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
1974         Prefix = "0b";
1975         break;
1976       case 8:
1977         Prefix = "0";
1978         break;
1979       case 10:
1980         break; // No prefix
1981       case 16:
1982         Prefix = "0x";
1983         break;
1984       default:
1985         llvm_unreachable("Invalid radix!");
1986     }
1987   }
1988
1989   // First, check for a zero value and just short circuit the logic below.
1990   if (*this == 0) {
1991     while (*Prefix) {
1992       Str.push_back(*Prefix);
1993       ++Prefix;
1994     };
1995     Str.push_back('0');
1996     return;
1997   }
1998
1999   static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2000
2001   if (isSingleWord()) {
2002     char Buffer[65];
2003     char *BufPtr = Buffer+65;
2004
2005     uint64_t N;
2006     if (!Signed) {
2007       N = getZExtValue();
2008     } else {
2009       int64_t I = getSExtValue();
2010       if (I >= 0) {
2011         N = I;
2012       } else {
2013         Str.push_back('-');
2014         N = -(uint64_t)I;
2015       }
2016     }
2017
2018     while (*Prefix) {
2019       Str.push_back(*Prefix);
2020       ++Prefix;
2021     };
2022
2023     while (N) {
2024       *--BufPtr = Digits[N % Radix];
2025       N /= Radix;
2026     }
2027     Str.append(BufPtr, Buffer+65);
2028     return;
2029   }
2030
2031   APInt Tmp(*this);
2032
2033   if (Signed && isNegative()) {
2034     // They want to print the signed version and it is a negative value
2035     // Flip the bits and add one to turn it into the equivalent positive
2036     // value and put a '-' in the result.
2037     Tmp.flipAllBits();
2038     ++Tmp;
2039     Str.push_back('-');
2040   }
2041
2042   while (*Prefix) {
2043     Str.push_back(*Prefix);
2044     ++Prefix;
2045   };
2046
2047   // We insert the digits backward, then reverse them to get the right order.
2048   unsigned StartDig = Str.size();
2049
2050   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2051   // because the number of bits per digit (1, 3 and 4 respectively) divides
2052   // equally.  We just shift until the value is zero.
2053   if (Radix == 2 || Radix == 8 || Radix == 16) {
2054     // Just shift tmp right for each digit width until it becomes zero
2055     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2056     unsigned MaskAmt = Radix - 1;
2057
2058     while (Tmp != 0) {
2059       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2060       Str.push_back(Digits[Digit]);
2061       Tmp.lshrInPlace(ShiftAmt);
2062     }
2063   } else {
2064     APInt divisor(Radix == 10? 4 : 8, Radix);
2065     while (Tmp != 0) {
2066       APInt APdigit(1, 0);
2067       APInt tmp2(Tmp.getBitWidth(), 0);
2068       divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
2069              &APdigit);
2070       unsigned Digit = (unsigned)APdigit.getZExtValue();
2071       assert(Digit < Radix && "divide failed");
2072       Str.push_back(Digits[Digit]);
2073       Tmp = tmp2;
2074     }
2075   }
2076
2077   // Reverse the digits before returning.
2078   std::reverse(Str.begin()+StartDig, Str.end());
2079 }
2080
2081 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2082 /// It is better to pass in a SmallVector/SmallString to the methods above.
2083 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2084   SmallString<40> S;
2085   toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2086   return S.str();
2087 }
2088
2089 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2090 LLVM_DUMP_METHOD void APInt::dump() const {
2091   SmallString<40> S, U;
2092   this->toStringUnsigned(U);
2093   this->toStringSigned(S);
2094   dbgs() << "APInt(" << BitWidth << "b, "
2095          << U << "u " << S << "s)\n";
2096 }
2097 #endif
2098
2099 void APInt::print(raw_ostream &OS, bool isSigned) const {
2100   SmallString<40> S;
2101   this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2102   OS << S;
2103 }
2104
2105 // This implements a variety of operations on a representation of
2106 // arbitrary precision, two's-complement, bignum integer values.
2107
2108 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2109 // and unrestricting assumption.
2110 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2111               "Part width must be divisible by 2!");
2112
2113 /* Some handy functions local to this file.  */
2114
2115 /* Returns the integer part with the least significant BITS set.
2116    BITS cannot be zero.  */
2117 static inline APInt::WordType lowBitMask(unsigned bits) {
2118   assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2119
2120   return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2121 }
2122
2123 /* Returns the value of the lower half of PART.  */
2124 static inline APInt::WordType lowHalf(APInt::WordType part) {
2125   return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2126 }
2127
2128 /* Returns the value of the upper half of PART.  */
2129 static inline APInt::WordType highHalf(APInt::WordType part) {
2130   return part >> (APInt::APINT_BITS_PER_WORD / 2);
2131 }
2132
2133 /* Returns the bit number of the most significant set bit of a part.
2134    If the input number has no bits set -1U is returned.  */
2135 static unsigned partMSB(APInt::WordType value) {
2136   return findLastSet(value, ZB_Max);
2137 }
2138
2139 /* Returns the bit number of the least significant set bit of a
2140    part.  If the input number has no bits set -1U is returned.  */
2141 static unsigned partLSB(APInt::WordType value) {
2142   return findFirstSet(value, ZB_Max);
2143 }
2144
2145 /* Sets the least significant part of a bignum to the input value, and
2146    zeroes out higher parts.  */
2147 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2148   assert(parts > 0);
2149
2150   dst[0] = part;
2151   for (unsigned i = 1; i < parts; i++)
2152     dst[i] = 0;
2153 }
2154
2155 /* Assign one bignum to another.  */
2156 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2157   for (unsigned i = 0; i < parts; i++)
2158     dst[i] = src[i];
2159 }
2160
2161 /* Returns true if a bignum is zero, false otherwise.  */
2162 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2163   for (unsigned i = 0; i < parts; i++)
2164     if (src[i])
2165       return false;
2166
2167   return true;
2168 }
2169
2170 /* Extract the given bit of a bignum; returns 0 or 1.  */
2171 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2172   return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2173 }
2174
2175 /* Set the given bit of a bignum. */
2176 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2177   parts[whichWord(bit)] |= maskBit(bit);
2178 }
2179
2180 /* Clears the given bit of a bignum. */
2181 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2182   parts[whichWord(bit)] &= ~maskBit(bit);
2183 }
2184
2185 /* Returns the bit number of the least significant set bit of a
2186    number.  If the input number has no bits set -1U is returned.  */
2187 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2188   for (unsigned i = 0; i < n; i++) {
2189     if (parts[i] != 0) {
2190       unsigned lsb = partLSB(parts[i]);
2191
2192       return lsb + i * APINT_BITS_PER_WORD;
2193     }
2194   }
2195
2196   return -1U;
2197 }
2198
2199 /* Returns the bit number of the most significant set bit of a number.
2200    If the input number has no bits set -1U is returned.  */
2201 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2202   do {
2203     --n;
2204
2205     if (parts[n] != 0) {
2206       unsigned msb = partMSB(parts[n]);
2207
2208       return msb + n * APINT_BITS_PER_WORD;
2209     }
2210   } while (n);
2211
2212   return -1U;
2213 }
2214
2215 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2216    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2217    the least significant bit of DST.  All high bits above srcBITS in
2218    DST are zero-filled.  */
2219 void
2220 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2221                  unsigned srcBits, unsigned srcLSB) {
2222   unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2223   assert(dstParts <= dstCount);
2224
2225   unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2226   tcAssign (dst, src + firstSrcPart, dstParts);
2227
2228   unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2229   tcShiftRight (dst, dstParts, shift);
2230
2231   /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2232      in DST.  If this is less that srcBits, append the rest, else
2233      clear the high bits.  */
2234   unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2235   if (n < srcBits) {
2236     WordType mask = lowBitMask (srcBits - n);
2237     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2238                           << n % APINT_BITS_PER_WORD);
2239   } else if (n > srcBits) {
2240     if (srcBits % APINT_BITS_PER_WORD)
2241       dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2242   }
2243
2244   /* Clear high parts.  */
2245   while (dstParts < dstCount)
2246     dst[dstParts++] = 0;
2247 }
2248
2249 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2250 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2251                              WordType c, unsigned parts) {
2252   assert(c <= 1);
2253
2254   for (unsigned i = 0; i < parts; i++) {
2255     WordType l = dst[i];
2256     if (c) {
2257       dst[i] += rhs[i] + 1;
2258       c = (dst[i] <= l);
2259     } else {
2260       dst[i] += rhs[i];
2261       c = (dst[i] < l);
2262     }
2263   }
2264
2265   return c;
2266 }
2267
2268 /// This function adds a single "word" integer, src, to the multiple
2269 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2270 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2271 /// @returns the carry of the addition.
2272 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2273                                  unsigned parts) {
2274   for (unsigned i = 0; i < parts; ++i) {
2275     dst[i] += src;
2276     if (dst[i] >= src)
2277       return 0; // No need to carry so exit early.
2278     src = 1; // Carry one to next digit.
2279   }
2280
2281   return 1;
2282 }
2283
2284 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2285 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2286                                   WordType c, unsigned parts) {
2287   assert(c <= 1);
2288
2289   for (unsigned i = 0; i < parts; i++) {
2290     WordType l = dst[i];
2291     if (c) {
2292       dst[i] -= rhs[i] + 1;
2293       c = (dst[i] >= l);
2294     } else {
2295       dst[i] -= rhs[i];
2296       c = (dst[i] > l);
2297     }
2298   }
2299
2300   return c;
2301 }
2302
2303 /// This function subtracts a single "word" (64-bit word), src, from
2304 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2305 /// no further borrowing is needed or it runs out of "words" in dst.  The result
2306 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2307 /// exhausted. In other words, if src > dst then this function returns 1,
2308 /// otherwise 0.
2309 /// @returns the borrow out of the subtraction
2310 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2311                                       unsigned parts) {
2312   for (unsigned i = 0; i < parts; ++i) {
2313     WordType Dst = dst[i];
2314     dst[i] -= src;
2315     if (src <= Dst)
2316       return 0; // No need to borrow so exit early.
2317     src = 1; // We have to "borrow 1" from next "word"
2318   }
2319
2320   return 1;
2321 }
2322
2323 /* Negate a bignum in-place.  */
2324 void APInt::tcNegate(WordType *dst, unsigned parts) {
2325   tcComplement(dst, parts);
2326   tcIncrement(dst, parts);
2327 }
2328
2329 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2330     DST  = SRC * MULTIPLIER + CARRY   if add is false
2331
2332     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2333     they must start at the same point, i.e. DST == SRC.
2334
2335     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2336     returned.  Otherwise DST is filled with the least significant
2337     DSTPARTS parts of the result, and if all of the omitted higher
2338     parts were zero return zero, otherwise overflow occurred and
2339     return one.  */
2340 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2341                           WordType multiplier, WordType carry,
2342                           unsigned srcParts, unsigned dstParts,
2343                           bool add) {
2344   /* Otherwise our writes of DST kill our later reads of SRC.  */
2345   assert(dst <= src || dst >= src + srcParts);
2346   assert(dstParts <= srcParts + 1);
2347
2348   /* N loops; minimum of dstParts and srcParts.  */
2349   unsigned n = dstParts < srcParts ? dstParts: srcParts;
2350
2351   unsigned i;
2352   for (i = 0; i < n; i++) {
2353     WordType low, mid, high, srcPart;
2354
2355       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2356
2357          This cannot overflow, because
2358
2359          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2360
2361          which is less than n^2.  */
2362
2363     srcPart = src[i];
2364
2365     if (multiplier == 0 || srcPart == 0) {
2366       low = carry;
2367       high = 0;
2368     } else {
2369       low = lowHalf(srcPart) * lowHalf(multiplier);
2370       high = highHalf(srcPart) * highHalf(multiplier);
2371
2372       mid = lowHalf(srcPart) * highHalf(multiplier);
2373       high += highHalf(mid);
2374       mid <<= APINT_BITS_PER_WORD / 2;
2375       if (low + mid < low)
2376         high++;
2377       low += mid;
2378
2379       mid = highHalf(srcPart) * lowHalf(multiplier);
2380       high += highHalf(mid);
2381       mid <<= APINT_BITS_PER_WORD / 2;
2382       if (low + mid < low)
2383         high++;
2384       low += mid;
2385
2386       /* Now add carry.  */
2387       if (low + carry < low)
2388         high++;
2389       low += carry;
2390     }
2391
2392     if (add) {
2393       /* And now DST[i], and store the new low part there.  */
2394       if (low + dst[i] < low)
2395         high++;
2396       dst[i] += low;
2397     } else
2398       dst[i] = low;
2399
2400     carry = high;
2401   }
2402
2403   if (i < dstParts) {
2404     /* Full multiplication, there is no overflow.  */
2405     assert(i + 1 == dstParts);
2406     dst[i] = carry;
2407     return 0;
2408   } else {
2409     /* We overflowed if there is carry.  */
2410     if (carry)
2411       return 1;
2412
2413     /* We would overflow if any significant unwritten parts would be
2414        non-zero.  This is true if any remaining src parts are non-zero
2415        and the multiplier is non-zero.  */
2416     if (multiplier)
2417       for (; i < srcParts; i++)
2418         if (src[i])
2419           return 1;
2420
2421     /* We fitted in the narrow destination.  */
2422     return 0;
2423   }
2424 }
2425
2426 /* DST = LHS * RHS, where DST has the same width as the operands and
2427    is filled with the least significant parts of the result.  Returns
2428    one if overflow occurred, otherwise zero.  DST must be disjoint
2429    from both operands.  */
2430 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2431                       const WordType *rhs, unsigned parts) {
2432   assert(dst != lhs && dst != rhs);
2433
2434   int overflow = 0;
2435   tcSet(dst, 0, parts);
2436
2437   for (unsigned i = 0; i < parts; i++)
2438     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2439                                parts - i, true);
2440
2441   return overflow;
2442 }
2443
2444 /* DST = LHS * RHS, where DST has width the sum of the widths of the
2445    operands.  No overflow occurs.  DST must be disjoint from both
2446    operands.  Returns the number of parts required to hold the
2447    result.  */
2448 unsigned APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2449                                const WordType *rhs, unsigned lhsParts,
2450                                unsigned rhsParts) {
2451   /* Put the narrower number on the LHS for less loops below.  */
2452   if (lhsParts > rhsParts) {
2453     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2454   } else {
2455     assert(dst != lhs && dst != rhs);
2456
2457     tcSet(dst, 0, rhsParts);
2458
2459     for (unsigned i = 0; i < lhsParts; i++)
2460       tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2461
2462     unsigned n = lhsParts + rhsParts;
2463
2464     return n - (dst[n - 1] == 0);
2465   }
2466 }
2467
2468 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2469    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2470    set REMAINDER to the remainder, return zero.  i.e.
2471
2472    OLD_LHS = RHS * LHS + REMAINDER
2473
2474    SCRATCH is a bignum of the same size as the operands and result for
2475    use by the routine; its contents need not be initialized and are
2476    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2477 */
2478 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2479                     WordType *remainder, WordType *srhs,
2480                     unsigned parts) {
2481   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2482
2483   unsigned shiftCount = tcMSB(rhs, parts) + 1;
2484   if (shiftCount == 0)
2485     return true;
2486
2487   shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2488   unsigned n = shiftCount / APINT_BITS_PER_WORD;
2489   WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2490
2491   tcAssign(srhs, rhs, parts);
2492   tcShiftLeft(srhs, parts, shiftCount);
2493   tcAssign(remainder, lhs, parts);
2494   tcSet(lhs, 0, parts);
2495
2496   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2497      the total.  */
2498   for (;;) {
2499       int compare;
2500
2501       compare = tcCompare(remainder, srhs, parts);
2502       if (compare >= 0) {
2503         tcSubtract(remainder, srhs, 0, parts);
2504         lhs[n] |= mask;
2505       }
2506
2507       if (shiftCount == 0)
2508         break;
2509       shiftCount--;
2510       tcShiftRight(srhs, parts, 1);
2511       if ((mask >>= 1) == 0) {
2512         mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2513         n--;
2514       }
2515   }
2516
2517   return false;
2518 }
2519
2520 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2521 /// no restrictions on Count.
2522 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2523   // Don't bother performing a no-op shift.
2524   if (!Count)
2525     return;
2526
2527   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2528   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2529   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2530
2531   // Fastpath for moving by whole words.
2532   if (BitShift == 0) {
2533     std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2534   } else {
2535     while (Words-- > WordShift) {
2536       Dst[Words] = Dst[Words - WordShift] << BitShift;
2537       if (Words > WordShift)
2538         Dst[Words] |=
2539           Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2540     }
2541   }
2542
2543   // Fill in the remainder with 0s.
2544   std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2545 }
2546
2547 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2548 /// are no restrictions on Count.
2549 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2550   // Don't bother performing a no-op shift.
2551   if (!Count)
2552     return;
2553
2554   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2555   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2556   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2557
2558   unsigned WordsToMove = Words - WordShift;
2559   // Fastpath for moving by whole words.
2560   if (BitShift == 0) {
2561     std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2562   } else {
2563     for (unsigned i = 0; i != WordsToMove; ++i) {
2564       Dst[i] = Dst[i + WordShift] >> BitShift;
2565       if (i + 1 != WordsToMove)
2566         Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2567     }
2568   }
2569
2570   // Fill in the remainder with 0s.
2571   std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2572 }
2573
2574 /* Bitwise and of two bignums.  */
2575 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2576   for (unsigned i = 0; i < parts; i++)
2577     dst[i] &= rhs[i];
2578 }
2579
2580 /* Bitwise inclusive or of two bignums.  */
2581 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2582   for (unsigned i = 0; i < parts; i++)
2583     dst[i] |= rhs[i];
2584 }
2585
2586 /* Bitwise exclusive or of two bignums.  */
2587 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2588   for (unsigned i = 0; i < parts; i++)
2589     dst[i] ^= rhs[i];
2590 }
2591
2592 /* Complement a bignum in-place.  */
2593 void APInt::tcComplement(WordType *dst, unsigned parts) {
2594   for (unsigned i = 0; i < parts; i++)
2595     dst[i] = ~dst[i];
2596 }
2597
2598 /* Comparison (unsigned) of two bignums.  */
2599 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2600                      unsigned parts) {
2601   while (parts) {
2602     parts--;
2603     if (lhs[parts] != rhs[parts])
2604       return (lhs[parts] > rhs[parts]) ? 1 : -1;
2605   }
2606
2607   return 0;
2608 }
2609
2610 /* Set the least significant BITS bits of a bignum, clear the
2611    rest.  */
2612 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2613                                       unsigned bits) {
2614   unsigned i = 0;
2615   while (bits > APINT_BITS_PER_WORD) {
2616     dst[i++] = ~(WordType) 0;
2617     bits -= APINT_BITS_PER_WORD;
2618   }
2619
2620   if (bits)
2621     dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2622
2623   while (i < parts)
2624     dst[i++] = 0;
2625 }