contrib/llvm-project/llvm/lib/Support/APInt.cpp

   1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file implements a class to represent arbitrary precision integer
  10 // constant values and provide a variety of arithmetic operations on them.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "llvm/ADT/APInt.h"
  15 #include "llvm/ADT/ArrayRef.h"
  16 #include "llvm/ADT/FoldingSet.h"
  17 #include "llvm/ADT/Hashing.h"
  18 #include "llvm/ADT/Optional.h"
  19 #include "llvm/ADT/SmallString.h"
  20 #include "llvm/ADT/StringRef.h"
  21 #include "llvm/ADT/bit.h"
  22 #include "llvm/Config/llvm-config.h"
  23 #include "llvm/Support/Debug.h"
  24 #include "llvm/Support/ErrorHandling.h"
  25 #include "llvm/Support/MathExtras.h"
  26 #include "llvm/Support/raw_ostream.h"
  27 #include <climits>
  28 #include <cmath>
  29 #include <cstdlib>
  30 #include <cstring>
  31 using namespace llvm;
  32
  33 #define DEBUG_TYPE "apint"
  34
  35 /// A utility function for allocating memory, checking for allocation failures,
  36 /// and ensuring the contents are zeroed.
  37 inline static uint64_t* getClearedMemory(unsigned numWords) {
  38   uint64_t *result = new uint64_t[numWords];
  39   memset(result, 0, numWords * sizeof(uint64_t));
  40   return result;
  41 }
  42
  43 /// A utility function for allocating memory and checking for allocation
  44 /// failure.  The content is not zeroed.
  45 inline static uint64_t* getMemory(unsigned numWords) {
  46   return new uint64_t[numWords];
  47 }
  48
  49 /// A utility function that converts a character to a digit.
  50 inline static unsigned getDigit(char cdigit, uint8_t radix) {
  51   unsigned r;
  52
  53   if (radix == 16 || radix == 36) {
  54     r = cdigit - '0';
  55     if (r <= 9)
  56       return r;
  57
  58     r = cdigit - 'A';
  59     if (r <= radix - 11U)
  60       return r + 10;
  61
  62     r = cdigit - 'a';
  63     if (r <= radix - 11U)
  64       return r + 10;
  65
  66     radix = 10;
  67   }
  68
  69   r = cdigit - '0';
  70   if (r < radix)
  71     return r;
  72
  73   return -1U;
  74 }
  75
  76
  77 void APInt::initSlowCase(uint64_t val, bool isSigned) {
  78   U.pVal = getClearedMemory(getNumWords());
  79   U.pVal[0] = val;
  80   if (isSigned && int64_t(val) < 0)
  81     for (unsigned i = 1; i < getNumWords(); ++i)
  82       U.pVal[i] = WORDTYPE_MAX;
  83   clearUnusedBits();
  84 }
  85
  86 void APInt::initSlowCase(const APInt& that) {
  87   U.pVal = getMemory(getNumWords());
  88   memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
  89 }
  90
  91 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
  92   assert(BitWidth && "Bitwidth too small");
  93   assert(bigVal.data() && "Null pointer detected!");
  94   if (isSingleWord())
  95     U.VAL = bigVal[0];
  96   else {
  97     // Get memory, cleared to 0
  98     U.pVal = getClearedMemory(getNumWords());
  99     // Calculate the number of words to copy
 100     unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
 101     // Copy the words from bigVal to pVal
 102     memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
 103   }
 104   // Make sure unused high bits are cleared
 105   clearUnusedBits();
 106 }
 107
 108 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
 109   : BitWidth(numBits) {
 110   initFromArray(bigVal);
 111 }
 112
 113 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
 114   : BitWidth(numBits) {
 115   initFromArray(makeArrayRef(bigVal, numWords));
 116 }
 117
 118 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
 119   : BitWidth(numbits) {
 120   assert(BitWidth && "Bitwidth too small");
 121   fromString(numbits, Str, radix);
 122 }
 123
 124 void APInt::reallocate(unsigned NewBitWidth) {
 125   // If the number of words is the same we can just change the width and stop.
 126   if (getNumWords() == getNumWords(NewBitWidth)) {
 127     BitWidth = NewBitWidth;
 128     return;
 129   }
 130
 131   // If we have an allocation, delete it.
 132   if (!isSingleWord())
 133     delete [] U.pVal;
 134
 135   // Update BitWidth.
 136   BitWidth = NewBitWidth;
 137
 138   // If we are supposed to have an allocation, create it.
 139   if (!isSingleWord())
 140     U.pVal = getMemory(getNumWords());
 141 }
 142
 143 void APInt::AssignSlowCase(const APInt& RHS) {
 144   // Don't do anything for X = X
 145   if (this == &RHS)
 146     return;
 147
 148   // Adjust the bit width and handle allocations as necessary.
 149   reallocate(RHS.getBitWidth());
 150
 151   // Copy the data.
 152   if (isSingleWord())
 153     U.VAL = RHS.U.VAL;
 154   else
 155     memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
 156 }
 157
 158 /// This method 'profiles' an APInt for use with FoldingSet.
 159 void APInt::Profile(FoldingSetNodeID& ID) const {
 160   ID.AddInteger(BitWidth);
 161
 162   if (isSingleWord()) {
 163     ID.AddInteger(U.VAL);
 164     return;
 165   }
 166
 167   unsigned NumWords = getNumWords();
 168   for (unsigned i = 0; i < NumWords; ++i)
 169     ID.AddInteger(U.pVal[i]);
 170 }
 171
 172 /// Prefix increment operator. Increments the APInt by one.
 173 APInt& APInt::operator++() {
 174   if (isSingleWord())
 175     ++U.VAL;
 176   else
 177     tcIncrement(U.pVal, getNumWords());
 178   return clearUnusedBits();
 179 }
 180
 181 /// Prefix decrement operator. Decrements the APInt by one.
 182 APInt& APInt::operator--() {
 183   if (isSingleWord())
 184     --U.VAL;
 185   else
 186     tcDecrement(U.pVal, getNumWords());
 187   return clearUnusedBits();
 188 }
 189
 190 /// Adds the RHS APInt to this APInt.
 191 /// @returns this, after addition of RHS.
 192 /// Addition assignment operator.
 193 APInt& APInt::operator+=(const APInt& RHS) {
 194   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 195   if (isSingleWord())
 196     U.VAL += RHS.U.VAL;
 197   else
 198     tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
 199   return clearUnusedBits();
 200 }
 201
 202 APInt& APInt::operator+=(uint64_t RHS) {
 203   if (isSingleWord())
 204     U.VAL += RHS;
 205   else
 206     tcAddPart(U.pVal, RHS, getNumWords());
 207   return clearUnusedBits();
 208 }
 209
 210 /// Subtracts the RHS APInt from this APInt
 211 /// @returns this, after subtraction
 212 /// Subtraction assignment operator.
 213 APInt& APInt::operator-=(const APInt& RHS) {
 214   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 215   if (isSingleWord())
 216     U.VAL -= RHS.U.VAL;
 217   else
 218     tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
 219   return clearUnusedBits();
 220 }
 221
 222 APInt& APInt::operator-=(uint64_t RHS) {
 223   if (isSingleWord())
 224     U.VAL -= RHS;
 225   else
 226     tcSubtractPart(U.pVal, RHS, getNumWords());
 227   return clearUnusedBits();
 228 }
 229
 230 APInt APInt::operator*(const APInt& RHS) const {
 231   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 232   if (isSingleWord())
 233     return APInt(BitWidth, U.VAL * RHS.U.VAL);
 234
 235   APInt Result(getMemory(getNumWords()), getBitWidth());
 236
 237   tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
 238
 239   Result.clearUnusedBits();
 240   return Result;
 241 }
 242
 243 void APInt::AndAssignSlowCase(const APInt& RHS) {
 244   tcAnd(U.pVal, RHS.U.pVal, getNumWords());
 245 }
 246
 247 void APInt::OrAssignSlowCase(const APInt& RHS) {
 248   tcOr(U.pVal, RHS.U.pVal, getNumWords());
 249 }
 250
 251 void APInt::XorAssignSlowCase(const APInt& RHS) {
 252   tcXor(U.pVal, RHS.U.pVal, getNumWords());
 253 }
 254
 255 APInt& APInt::operator*=(const APInt& RHS) {
 256   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 257   *this = *this * RHS;
 258   return *this;
 259 }
 260
 261 APInt& APInt::operator*=(uint64_t RHS) {
 262   if (isSingleWord()) {
 263     U.VAL *= RHS;
 264   } else {
 265     unsigned NumWords = getNumWords();
 266     tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
 267   }
 268   return clearUnusedBits();
 269 }
 270
 271 bool APInt::EqualSlowCase(const APInt& RHS) const {
 272   return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
 273 }
 274
 275 int APInt::compare(const APInt& RHS) const {
 276   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
 277   if (isSingleWord())
 278     return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
 279
 280   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
 281 }
 282
 283 int APInt::compareSigned(const APInt& RHS) const {
 284   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
 285   if (isSingleWord()) {
 286     int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
 287     int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
 288     return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
 289   }
 290
 291   bool lhsNeg = isNegative();
 292   bool rhsNeg = RHS.isNegative();
 293
 294   // If the sign bits don't match, then (LHS < RHS) if LHS is negative
 295   if (lhsNeg != rhsNeg)
 296     return lhsNeg ? -1 : 1;
 297
 298   // Otherwise we can just use an unsigned comparison, because even negative
 299   // numbers compare correctly this way if both have the same signed-ness.
 300   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
 301 }
 302
 303 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
 304   unsigned loWord = whichWord(loBit);
 305   unsigned hiWord = whichWord(hiBit);
 306
 307   // Create an initial mask for the low word with zeros below loBit.
 308   uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
 309
 310   // If hiBit is not aligned, we need a high mask.
 311   unsigned hiShiftAmt = whichBit(hiBit);
 312   if (hiShiftAmt != 0) {
 313     // Create a high mask with zeros above hiBit.
 314     uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
 315     // If loWord and hiWord are equal, then we combine the masks. Otherwise,
 316     // set the bits in hiWord.
 317     if (hiWord == loWord)
 318       loMask &= hiMask;
 319     else
 320       U.pVal[hiWord] |= hiMask;
 321   }
 322   // Apply the mask to the low word.
 323   U.pVal[loWord] |= loMask;
 324
 325   // Fill any words between loWord and hiWord with all ones.
 326   for (unsigned word = loWord + 1; word < hiWord; ++word)
 327     U.pVal[word] = WORDTYPE_MAX;
 328 }
 329
 330 /// Toggle every bit to its opposite value.
 331 void APInt::flipAllBitsSlowCase() {
 332   tcComplement(U.pVal, getNumWords());
 333   clearUnusedBits();
 334 }
 335
 336 /// Toggle a given bit to its opposite value whose position is given
 337 /// as "bitPosition".
 338 /// Toggles a given bit to its opposite value.
 339 void APInt::flipBit(unsigned bitPosition) {
 340   assert(bitPosition < BitWidth && "Out of the bit-width range!");
 341   if ((*this)[bitPosition]) clearBit(bitPosition);
 342   else setBit(bitPosition);
 343 }
 344
 345 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
 346   unsigned subBitWidth = subBits.getBitWidth();
 347   assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
 348          "Illegal bit insertion");
 349
 350   // Insertion is a direct copy.
 351   if (subBitWidth == BitWidth) {
 352     *this = subBits;
 353     return;
 354   }
 355
 356   // Single word result can be done as a direct bitmask.
 357   if (isSingleWord()) {
 358     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
 359     U.VAL &= ~(mask << bitPosition);
 360     U.VAL |= (subBits.U.VAL << bitPosition);
 361     return;
 362   }
 363
 364   unsigned loBit = whichBit(bitPosition);
 365   unsigned loWord = whichWord(bitPosition);
 366   unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
 367
 368   // Insertion within a single word can be done as a direct bitmask.
 369   if (loWord == hi1Word) {
 370     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
 371     U.pVal[loWord] &= ~(mask << loBit);
 372     U.pVal[loWord] |= (subBits.U.VAL << loBit);
 373     return;
 374   }
 375
 376   // Insert on word boundaries.
 377   if (loBit == 0) {
 378     // Direct copy whole words.
 379     unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
 380     memcpy(U.pVal + loWord, subBits.getRawData(),
 381            numWholeSubWords * APINT_WORD_SIZE);
 382
 383     // Mask+insert remaining bits.
 384     unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
 385     if (remainingBits != 0) {
 386       uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
 387       U.pVal[hi1Word] &= ~mask;
 388       U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
 389     }
 390     return;
 391   }
 392
 393   // General case - set/clear individual bits in dst based on src.
 394   // TODO - there is scope for optimization here, but at the moment this code
 395   // path is barely used so prefer readability over performance.
 396   for (unsigned i = 0; i != subBitWidth; ++i) {
 397     if (subBits[i])
 398       setBit(bitPosition + i);
 399     else
 400       clearBit(bitPosition + i);
 401   }
 402 }
 403
 404 void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) {
 405   uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
 406   subBits &= maskBits;
 407   if (isSingleWord()) {
 408     U.VAL &= ~(maskBits << bitPosition);
 409     U.VAL |= subBits << bitPosition;
 410     return;
 411   }
 412
 413   unsigned loBit = whichBit(bitPosition);
 414   unsigned loWord = whichWord(bitPosition);
 415   unsigned hiWord = whichWord(bitPosition + numBits - 1);
 416   if (loWord == hiWord) {
 417     U.pVal[loWord] &= ~(maskBits << loBit);
 418     U.pVal[loWord] |= subBits << loBit;
 419     return;
 420   }
 421
 422   static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
 423   unsigned wordBits = 8 * sizeof(WordType);
 424   U.pVal[loWord] &= ~(maskBits << loBit);
 425   U.pVal[loWord] |= subBits << loBit;
 426
 427   U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit));
 428   U.pVal[hiWord] |= subBits >> (wordBits - loBit);
 429 }
 430
 431 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
 432   assert(numBits > 0 && "Can't extract zero bits");
 433   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
 434          "Illegal bit extraction");
 435
 436   if (isSingleWord())
 437     return APInt(numBits, U.VAL >> bitPosition);
 438
 439   unsigned loBit = whichBit(bitPosition);
 440   unsigned loWord = whichWord(bitPosition);
 441   unsigned hiWord = whichWord(bitPosition + numBits - 1);
 442
 443   // Single word result extracting bits from a single word source.
 444   if (loWord == hiWord)
 445     return APInt(numBits, U.pVal[loWord] >> loBit);
 446
 447   // Extracting bits that start on a source word boundary can be done
 448   // as a fast memory copy.
 449   if (loBit == 0)
 450     return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
 451
 452   // General case - shift + copy source words directly into place.
 453   APInt Result(numBits, 0);
 454   unsigned NumSrcWords = getNumWords();
 455   unsigned NumDstWords = Result.getNumWords();
 456
 457   uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
 458   for (unsigned word = 0; word < NumDstWords; ++word) {
 459     uint64_t w0 = U.pVal[loWord + word];
 460     uint64_t w1 =
 461         (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
 462     DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
 463   }
 464
 465   return Result.clearUnusedBits();
 466 }
 467
 468 uint64_t APInt::extractBitsAsZExtValue(unsigned numBits,
 469                                        unsigned bitPosition) const {
 470   assert(numBits > 0 && "Can't extract zero bits");
 471   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
 472          "Illegal bit extraction");
 473   assert(numBits <= 64 && "Illegal bit extraction");
 474
 475   uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
 476   if (isSingleWord())
 477     return (U.VAL >> bitPosition) & maskBits;
 478
 479   unsigned loBit = whichBit(bitPosition);
 480   unsigned loWord = whichWord(bitPosition);
 481   unsigned hiWord = whichWord(bitPosition + numBits - 1);
 482   if (loWord == hiWord)
 483     return (U.pVal[loWord] >> loBit) & maskBits;
 484
 485   static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
 486   unsigned wordBits = 8 * sizeof(WordType);
 487   uint64_t retBits = U.pVal[loWord] >> loBit;
 488   retBits |= U.pVal[hiWord] << (wordBits - loBit);
 489   retBits &= maskBits;
 490   return retBits;
 491 }
 492
 493 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
 494   assert(!str.empty() && "Invalid string length");
 495   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
 496           radix == 36) &&
 497          "Radix should be 2, 8, 10, 16, or 36!");
 498
 499   size_t slen = str.size();
 500
 501   // Each computation below needs to know if it's negative.
 502   StringRef::iterator p = str.begin();
 503   unsigned isNegative = *p == '-';
 504   if (*p == '-' || *p == '+') {
 505     p++;
 506     slen--;
 507     assert(slen && "String is only a sign, needs a value.");
 508   }
 509
 510   // For radixes of power-of-two values, the bits required is accurately and
 511   // easily computed
 512   if (radix == 2)
 513     return slen + isNegative;
 514   if (radix == 8)
 515     return slen * 3 + isNegative;
 516   if (radix == 16)
 517     return slen * 4 + isNegative;
 518
 519   // FIXME: base 36
 520
 521   // This is grossly inefficient but accurate. We could probably do something
 522   // with a computation of roughly slen*64/20 and then adjust by the value of
 523   // the first few digits. But, I'm not sure how accurate that could be.
 524
 525   // Compute a sufficient number of bits that is always large enough but might
 526   // be too large. This avoids the assertion in the constructor. This
 527   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
 528   // bits in that case.
 529   unsigned sufficient
 530     = radix == 10? (slen == 1 ? 4 : slen * 64/18)
 531                  : (slen == 1 ? 7 : slen * 16/3);
 532
 533   // Convert to the actual binary value.
 534   APInt tmp(sufficient, StringRef(p, slen), radix);
 535
 536   // Compute how many bits are required. If the log is infinite, assume we need
 537   // just bit. If the log is exact and value is negative, then the value is
 538   // MinSignedValue with (log + 1) bits.
 539   unsigned log = tmp.logBase2();
 540   if (log == (unsigned)-1) {
 541     return isNegative + 1;
 542   } else if (isNegative && tmp.isPowerOf2()) {
 543     return isNegative + log;
 544   } else {
 545     return isNegative + log + 1;
 546   }
 547 }
 548
 549 hash_code llvm::hash_value(const APInt &Arg) {
 550   if (Arg.isSingleWord())
 551     return hash_combine(Arg.U.VAL);
 552
 553   return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords());
 554 }
 555
 556 bool APInt::isSplat(unsigned SplatSizeInBits) const {
 557   assert(getBitWidth() % SplatSizeInBits == 0 &&
 558          "SplatSizeInBits must divide width!");
 559   // We can check that all parts of an integer are equal by making use of a
 560   // little trick: rotate and check if it's still the same value.
 561   return *this == rotl(SplatSizeInBits);
 562 }
 563
 564 /// This function returns the high "numBits" bits of this APInt.
 565 APInt APInt::getHiBits(unsigned numBits) const {
 566   return this->lshr(BitWidth - numBits);
 567 }
 568
 569 /// This function returns the low "numBits" bits of this APInt.
 570 APInt APInt::getLoBits(unsigned numBits) const {
 571   APInt Result(getLowBitsSet(BitWidth, numBits));
 572   Result &= *this;
 573   return Result;
 574 }
 575
 576 /// Return a value containing V broadcasted over NewLen bits.
 577 APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
 578   assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
 579
 580   APInt Val = V.zextOrSelf(NewLen);
 581   for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
 582     Val |= Val << I;
 583
 584   return Val;
 585 }
 586
 587 unsigned APInt::countLeadingZerosSlowCase() const {
 588   unsigned Count = 0;
 589   for (int i = getNumWords()-1; i >= 0; --i) {
 590     uint64_t V = U.pVal[i];
 591     if (V == 0)
 592       Count += APINT_BITS_PER_WORD;
 593     else {
 594       Count += llvm::countLeadingZeros(V);
 595       break;
 596     }
 597   }
 598   // Adjust for unused bits in the most significant word (they are zero).
 599   unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
 600   Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
 601   return Count;
 602 }
 603
 604 unsigned APInt::countLeadingOnesSlowCase() const {
 605   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
 606   unsigned shift;
 607   if (!highWordBits) {
 608     highWordBits = APINT_BITS_PER_WORD;
 609     shift = 0;
 610   } else {
 611     shift = APINT_BITS_PER_WORD - highWordBits;
 612   }
 613   int i = getNumWords() - 1;
 614   unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
 615   if (Count == highWordBits) {
 616     for (i--; i >= 0; --i) {
 617       if (U.pVal[i] == WORDTYPE_MAX)
 618         Count += APINT_BITS_PER_WORD;
 619       else {
 620         Count += llvm::countLeadingOnes(U.pVal[i]);
 621         break;
 622       }
 623     }
 624   }
 625   return Count;
 626 }
 627
 628 unsigned APInt::countTrailingZerosSlowCase() const {
 629   unsigned Count = 0;
 630   unsigned i = 0;
 631   for (; i < getNumWords() && U.pVal[i] == 0; ++i)
 632     Count += APINT_BITS_PER_WORD;
 633   if (i < getNumWords())
 634     Count += llvm::countTrailingZeros(U.pVal[i]);
 635   return std::min(Count, BitWidth);
 636 }
 637
 638 unsigned APInt::countTrailingOnesSlowCase() const {
 639   unsigned Count = 0;
 640   unsigned i = 0;
 641   for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
 642     Count += APINT_BITS_PER_WORD;
 643   if (i < getNumWords())
 644     Count += llvm::countTrailingOnes(U.pVal[i]);
 645   assert(Count <= BitWidth);
 646   return Count;
 647 }
 648
 649 unsigned APInt::countPopulationSlowCase() const {
 650   unsigned Count = 0;
 651   for (unsigned i = 0; i < getNumWords(); ++i)
 652     Count += llvm::countPopulation(U.pVal[i]);
 653   return Count;
 654 }
 655
 656 bool APInt::intersectsSlowCase(const APInt &RHS) const {
 657   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
 658     if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
 659       return true;
 660
 661   return false;
 662 }
 663
 664 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
 665   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
 666     if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
 667       return false;
 668
 669   return true;
 670 }
 671
 672 APInt APInt::byteSwap() const {
 673   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
 674   if (BitWidth == 16)
 675     return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
 676   if (BitWidth == 32)
 677     return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
 678   if (BitWidth == 48) {
 679     unsigned Tmp1 = unsigned(U.VAL >> 16);
 680     Tmp1 = ByteSwap_32(Tmp1);
 681     uint16_t Tmp2 = uint16_t(U.VAL);
 682     Tmp2 = ByteSwap_16(Tmp2);
 683     return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
 684   }
 685   if (BitWidth == 64)
 686     return APInt(BitWidth, ByteSwap_64(U.VAL));
 687
 688   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
 689   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
 690     Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
 691   if (Result.BitWidth != BitWidth) {
 692     Result.lshrInPlace(Result.BitWidth - BitWidth);
 693     Result.BitWidth = BitWidth;
 694   }
 695   return Result;
 696 }
 697
 698 APInt APInt::reverseBits() const {
 699   switch (BitWidth) {
 700   case 64:
 701     return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
 702   case 32:
 703     return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
 704   case 16:
 705     return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
 706   case 8:
 707     return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
 708   default:
 709     break;
 710   }
 711
 712   APInt Val(*this);
 713   APInt Reversed(BitWidth, 0);
 714   unsigned S = BitWidth;
 715
 716   for (; Val != 0; Val.lshrInPlace(1)) {
 717     Reversed <<= 1;
 718     Reversed |= Val[0];
 719     --S;
 720   }
 721
 722   Reversed <<= S;
 723   return Reversed;
 724 }
 725
 726 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
 727   // Fast-path a common case.
 728   if (A == B) return A;
 729
 730   // Corner cases: if either operand is zero, the other is the gcd.
 731   if (!A) return B;
 732   if (!B) return A;
 733
 734   // Count common powers of 2 and remove all other powers of 2.
 735   unsigned Pow2;
 736   {
 737     unsigned Pow2_A = A.countTrailingZeros();
 738     unsigned Pow2_B = B.countTrailingZeros();
 739     if (Pow2_A > Pow2_B) {
 740       A.lshrInPlace(Pow2_A - Pow2_B);
 741       Pow2 = Pow2_B;
 742     } else if (Pow2_B > Pow2_A) {
 743       B.lshrInPlace(Pow2_B - Pow2_A);
 744       Pow2 = Pow2_A;
 745     } else {
 746       Pow2 = Pow2_A;
 747     }
 748   }
 749
 750   // Both operands are odd multiples of 2^Pow_2:
 751   //
 752   //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
 753   //
 754   // This is a modified version of Stein's algorithm, taking advantage of
 755   // efficient countTrailingZeros().
 756   while (A != B) {
 757     if (A.ugt(B)) {
 758       A -= B;
 759       A.lshrInPlace(A.countTrailingZeros() - Pow2);
 760     } else {
 761       B -= A;
 762       B.lshrInPlace(B.countTrailingZeros() - Pow2);
 763     }
 764   }
 765
 766   return A;
 767 }
 768
 769 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
 770   uint64_t I = bit_cast<uint64_t>(Double);
 771
 772   // Get the sign bit from the highest order bit
 773   bool isNeg = I >> 63;
 774
 775   // Get the 11-bit exponent and adjust for the 1023 bit bias
 776   int64_t exp = ((I >> 52) & 0x7ff) - 1023;
 777
 778   // If the exponent is negative, the value is < 0 so just return 0.
 779   if (exp < 0)
 780     return APInt(width, 0u);
 781
 782   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
 783   uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
 784
 785   // If the exponent doesn't shift all bits out of the mantissa
 786   if (exp < 52)
 787     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
 788                     APInt(width, mantissa >> (52 - exp));
 789
 790   // If the client didn't provide enough bits for us to shift the mantissa into
 791   // then the result is undefined, just return 0
 792   if (width <= exp - 52)
 793     return APInt(width, 0);
 794
 795   // Otherwise, we have to shift the mantissa bits up to the right location
 796   APInt Tmp(width, mantissa);
 797   Tmp <<= (unsigned)exp - 52;
 798   return isNeg ? -Tmp : Tmp;
 799 }
 800
 801 /// This function converts this APInt to a double.
 802 /// The layout for double is as following (IEEE Standard 754):
 803 ///  --------------------------------------
 804 /// |  Sign    Exponent    Fraction    Bias |
 805 /// |-------------------------------------- |
 806 /// |  1[63]   11[62-52]   52[51-00]   1023 |
 807 ///  --------------------------------------
 808 double APInt::roundToDouble(bool isSigned) const {
 809
 810   // Handle the simple case where the value is contained in one uint64_t.
 811   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
 812   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
 813     if (isSigned) {
 814       int64_t sext = SignExtend64(getWord(0), BitWidth);
 815       return double(sext);
 816     } else
 817       return double(getWord(0));
 818   }
 819
 820   // Determine if the value is negative.
 821   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
 822
 823   // Construct the absolute value if we're negative.
 824   APInt Tmp(isNeg ? -(*this) : (*this));
 825
 826   // Figure out how many bits we're using.
 827   unsigned n = Tmp.getActiveBits();
 828
 829   // The exponent (without bias normalization) is just the number of bits
 830   // we are using. Note that the sign bit is gone since we constructed the
 831   // absolute value.
 832   uint64_t exp = n;
 833
 834   // Return infinity for exponent overflow
 835   if (exp > 1023) {
 836     if (!isSigned || !isNeg)
 837       return std::numeric_limits<double>::infinity();
 838     else
 839       return -std::numeric_limits<double>::infinity();
 840   }
 841   exp += 1023; // Increment for 1023 bias
 842
 843   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
 844   // extract the high 52 bits from the correct words in pVal.
 845   uint64_t mantissa;
 846   unsigned hiWord = whichWord(n-1);
 847   if (hiWord == 0) {
 848     mantissa = Tmp.U.pVal[0];
 849     if (n > 52)
 850       mantissa >>= n - 52; // shift down, we want the top 52 bits.
 851   } else {
 852     assert(hiWord > 0 && "huh?");
 853     uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
 854     uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
 855     mantissa = hibits | lobits;
 856   }
 857
 858   // The leading bit of mantissa is implicit, so get rid of it.
 859   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
 860   uint64_t I = sign | (exp << 52) | mantissa;
 861   return bit_cast<double>(I);
 862 }
 863
 864 // Truncate to new width.
 865 APInt APInt::trunc(unsigned width) const {
 866   assert(width < BitWidth && "Invalid APInt Truncate request");
 867   assert(width && "Can't truncate to 0 bits");
 868
 869   if (width <= APINT_BITS_PER_WORD)
 870     return APInt(width, getRawData()[0]);
 871
 872   APInt Result(getMemory(getNumWords(width)), width);
 873
 874   // Copy full words.
 875   unsigned i;
 876   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
 877     Result.U.pVal[i] = U.pVal[i];
 878
 879   // Truncate and copy any partial word.
 880   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
 881   if (bits != 0)
 882     Result.U.pVal[i] = U.pVal[i] << bits >> bits;
 883
 884   return Result;
 885 }
 886
 887 // Truncate to new width with unsigned saturation.
 888 APInt APInt::truncUSat(unsigned width) const {
 889   assert(width < BitWidth && "Invalid APInt Truncate request");
 890   assert(width && "Can't truncate to 0 bits");
 891
 892   // Can we just losslessly truncate it?
 893   if (isIntN(width))
 894     return trunc(width);
 895   // If not, then just return the new limit.
 896   return APInt::getMaxValue(width);
 897 }
 898
 899 // Truncate to new width with signed saturation.
 900 APInt APInt::truncSSat(unsigned width) const {
 901   assert(width < BitWidth && "Invalid APInt Truncate request");
 902   assert(width && "Can't truncate to 0 bits");
 903
 904   // Can we just losslessly truncate it?
 905   if (isSignedIntN(width))
 906     return trunc(width);
 907   // If not, then just return the new limits.
 908   return isNegative() ? APInt::getSignedMinValue(width)
 909                       : APInt::getSignedMaxValue(width);
 910 }
 911
 912 // Sign extend to a new width.
 913 APInt APInt::sext(unsigned Width) const {
 914   assert(Width > BitWidth && "Invalid APInt SignExtend request");
 915
 916   if (Width <= APINT_BITS_PER_WORD)
 917     return APInt(Width, SignExtend64(U.VAL, BitWidth));
 918
 919   APInt Result(getMemory(getNumWords(Width)), Width);
 920
 921   // Copy words.
 922   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
 923
 924   // Sign extend the last word since there may be unused bits in the input.
 925   Result.U.pVal[getNumWords() - 1] =
 926       SignExtend64(Result.U.pVal[getNumWords() - 1],
 927                    ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
 928
 929   // Fill with sign bits.
 930   std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
 931               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
 932   Result.clearUnusedBits();
 933   return Result;
 934 }
 935
 936 //  Zero extend to a new width.
 937 APInt APInt::zext(unsigned width) const {
 938   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
 939
 940   if (width <= APINT_BITS_PER_WORD)
 941     return APInt(width, U.VAL);
 942
 943   APInt Result(getMemory(getNumWords(width)), width);
 944
 945   // Copy words.
 946   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
 947
 948   // Zero remaining words.
 949   std::memset(Result.U.pVal + getNumWords(), 0,
 950               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
 951
 952   return Result;
 953 }
 954
 955 APInt APInt::zextOrTrunc(unsigned width) const {
 956   if (BitWidth < width)
 957     return zext(width);
 958   if (BitWidth > width)
 959     return trunc(width);
 960   return *this;
 961 }
 962
 963 APInt APInt::sextOrTrunc(unsigned width) const {
 964   if (BitWidth < width)
 965     return sext(width);
 966   if (BitWidth > width)
 967     return trunc(width);
 968   return *this;
 969 }
 970
 971 APInt APInt::zextOrSelf(unsigned width) const {
 972   if (BitWidth < width)
 973     return zext(width);
 974   return *this;
 975 }
 976
 977 APInt APInt::sextOrSelf(unsigned width) const {
 978   if (BitWidth < width)
 979     return sext(width);
 980   return *this;
 981 }
 982
 983 /// Arithmetic right-shift this APInt by shiftAmt.
 984 /// Arithmetic right-shift function.
 985 void APInt::ashrInPlace(const APInt &shiftAmt) {
 986   ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
 987 }
 988
 989 /// Arithmetic right-shift this APInt by shiftAmt.
 990 /// Arithmetic right-shift function.
 991 void APInt::ashrSlowCase(unsigned ShiftAmt) {
 992   // Don't bother performing a no-op shift.
 993   if (!ShiftAmt)
 994     return;
 995
 996   // Save the original sign bit for later.
 997   bool Negative = isNegative();
 998
 999   // WordShift is the inter-part shift; BitShift is intra-part shift.
1000   unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
1001   unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
1002
1003   unsigned WordsToMove = getNumWords() - WordShift;
1004   if (WordsToMove != 0) {
1005     // Sign extend the last word to fill in the unused bits.
1006     U.pVal[getNumWords() - 1] = SignExtend64(
1007         U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
1008
1009     // Fastpath for moving by whole words.
1010     if (BitShift == 0) {
1011       std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
1012     } else {
1013       // Move the words containing significant bits.
1014       for (unsigned i = 0; i != WordsToMove - 1; ++i)
1015         U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
1016                     (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
1017
1018       // Handle the last word which has no high bits to copy.
1019       U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
1020       // Sign extend one more time.
1021       U.pVal[WordsToMove - 1] =
1022           SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
1023     }
1024   }
1025
1026   // Fill in the remainder based on the original sign.
1027   std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
1028               WordShift * APINT_WORD_SIZE);
1029   clearUnusedBits();
1030 }
1031
1032 /// Logical right-shift this APInt by shiftAmt.
1033 /// Logical right-shift function.
1034 void APInt::lshrInPlace(const APInt &shiftAmt) {
1035   lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
1036 }
1037
1038 /// Logical right-shift this APInt by shiftAmt.
1039 /// Logical right-shift function.
1040 void APInt::lshrSlowCase(unsigned ShiftAmt) {
1041   tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
1042 }
1043
1044 /// Left-shift this APInt by shiftAmt.
1045 /// Left-shift function.
1046 APInt &APInt::operator<<=(const APInt &shiftAmt) {
1047   // It's undefined behavior in C to shift by BitWidth or greater.
1048   *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
1049   return *this;
1050 }
1051
1052 void APInt::shlSlowCase(unsigned ShiftAmt) {
1053   tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
1054   clearUnusedBits();
1055 }
1056
1057 // Calculate the rotate amount modulo the bit width.
1058 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
1059   unsigned rotBitWidth = rotateAmt.getBitWidth();
1060   APInt rot = rotateAmt;
1061   if (rotBitWidth < BitWidth) {
1062     // Extend the rotate APInt, so that the urem doesn't divide by 0.
1063     // e.g. APInt(1, 32) would give APInt(1, 0).
1064     rot = rotateAmt.zext(BitWidth);
1065   }
1066   rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
1067   return rot.getLimitedValue(BitWidth);
1068 }
1069
1070 APInt APInt::rotl(const APInt &rotateAmt) const {
1071   return rotl(rotateModulo(BitWidth, rotateAmt));
1072 }
1073
1074 APInt APInt::rotl(unsigned rotateAmt) const {
1075   rotateAmt %= BitWidth;
1076   if (rotateAmt == 0)
1077     return *this;
1078   return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1079 }
1080
1081 APInt APInt::rotr(const APInt &rotateAmt) const {
1082   return rotr(rotateModulo(BitWidth, rotateAmt));
1083 }
1084
1085 APInt APInt::rotr(unsigned rotateAmt) const {
1086   rotateAmt %= BitWidth;
1087   if (rotateAmt == 0)
1088     return *this;
1089   return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1090 }
1091
1092 // Square Root - this method computes and returns the square root of "this".
1093 // Three mechanisms are used for computation. For small values (<= 5 bits),
1094 // a table lookup is done. This gets some performance for common cases. For
1095 // values using less than 52 bits, the value is converted to double and then
1096 // the libc sqrt function is called. The result is rounded and then converted
1097 // back to a uint64_t which is then used to construct the result. Finally,
1098 // the Babylonian method for computing square roots is used.
1099 APInt APInt::sqrt() const {
1100
1101   // Determine the magnitude of the value.
1102   unsigned magnitude = getActiveBits();
1103
1104   // Use a fast table for some small values. This also gets rid of some
1105   // rounding errors in libc sqrt for small values.
1106   if (magnitude <= 5) {
1107     static const uint8_t results[32] = {
1108       /*     0 */ 0,
1109       /*  1- 2 */ 1, 1,
1110       /*  3- 6 */ 2, 2, 2, 2,
1111       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1112       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1113       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1114       /*    31 */ 6
1115     };
1116     return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1117   }
1118
1119   // If the magnitude of the value fits in less than 52 bits (the precision of
1120   // an IEEE double precision floating point value), then we can use the
1121   // libc sqrt function which will probably use a hardware sqrt computation.
1122   // This should be faster than the algorithm below.
1123   if (magnitude < 52) {
1124     return APInt(BitWidth,
1125                  uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1126                                                                : U.pVal[0])))));
1127   }
1128
1129   // Okay, all the short cuts are exhausted. We must compute it. The following
1130   // is a classical Babylonian method for computing the square root. This code
1131   // was adapted to APInt from a wikipedia article on such computations.
1132   // See http://www.wikipedia.org/ and go to the page named
1133   // Calculate_an_integer_square_root.
1134   unsigned nbits = BitWidth, i = 4;
1135   APInt testy(BitWidth, 16);
1136   APInt x_old(BitWidth, 1);
1137   APInt x_new(BitWidth, 0);
1138   APInt two(BitWidth, 2);
1139
1140   // Select a good starting value using binary logarithms.
1141   for (;; i += 2, testy = testy.shl(2))
1142     if (i >= nbits || this->ule(testy)) {
1143       x_old = x_old.shl(i / 2);
1144       break;
1145     }
1146
1147   // Use the Babylonian method to arrive at the integer square root:
1148   for (;;) {
1149     x_new = (this->udiv(x_old) + x_old).udiv(two);
1150     if (x_old.ule(x_new))
1151       break;
1152     x_old = x_new;
1153   }
1154
1155   // Make sure we return the closest approximation
1156   // NOTE: The rounding calculation below is correct. It will produce an
1157   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1158   // determined to be a rounding issue with pari/gp as it begins to use a
1159   // floating point representation after 192 bits. There are no discrepancies
1160   // between this algorithm and pari/gp for bit widths < 192 bits.
1161   APInt square(x_old * x_old);
1162   APInt nextSquare((x_old + 1) * (x_old +1));
1163   if (this->ult(square))
1164     return x_old;
1165   assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1166   APInt midpoint((nextSquare - square).udiv(two));
1167   APInt offset(*this - square);
1168   if (offset.ult(midpoint))
1169     return x_old;
1170   return x_old + 1;
1171 }
1172
1173 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1174 /// iterative extended Euclidean algorithm is used to solve for this value,
1175 /// however we simplify it to speed up calculating only the inverse, and take
1176 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1177 /// (potentially large) APInts around.
1178 /// WARNING: a value of '0' may be returned,
1179 ///          signifying that no multiplicative inverse exists!
1180 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1181   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1182
1183   // Using the properties listed at the following web page (accessed 06/21/08):
1184   //   http://www.numbertheory.org/php/euclid.html
1185   // (especially the properties numbered 3, 4 and 9) it can be proved that
1186   // BitWidth bits suffice for all the computations in the algorithm implemented
1187   // below. More precisely, this number of bits suffice if the multiplicative
1188   // inverse exists, but may not suffice for the general extended Euclidean
1189   // algorithm.
1190
1191   APInt r[2] = { modulo, *this };
1192   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1193   APInt q(BitWidth, 0);
1194
1195   unsigned i;
1196   for (i = 0; r[i^1] != 0; i ^= 1) {
1197     // An overview of the math without the confusing bit-flipping:
1198     // q = r[i-2] / r[i-1]
1199     // r[i] = r[i-2] % r[i-1]
1200     // t[i] = t[i-2] - t[i-1] * q
1201     udivrem(r[i], r[i^1], q, r[i]);
1202     t[i] -= t[i^1] * q;
1203   }
1204
1205   // If this APInt and the modulo are not coprime, there is no multiplicative
1206   // inverse, so return 0. We check this by looking at the next-to-last
1207   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1208   // algorithm.
1209   if (r[i] != 1)
1210     return APInt(BitWidth, 0);
1211
1212   // The next-to-last t is the multiplicative inverse.  However, we are
1213   // interested in a positive inverse. Calculate a positive one from a negative
1214   // one if necessary. A simple addition of the modulo suffices because
1215   // abs(t[i]) is known to be less than *this/2 (see the link above).
1216   if (t[i].isNegative())
1217     t[i] += modulo;
1218
1219   return std::move(t[i]);
1220 }
1221
1222 /// Calculate the magic numbers required to implement a signed integer division
1223 /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1224 /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1225 /// Warren, Jr., chapter 10.
1226 APInt::ms APInt::magic() const {
1227   const APInt& d = *this;
1228   unsigned p;
1229   APInt ad, anc, delta, q1, r1, q2, r2, t;
1230   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1231   struct ms mag;
1232
1233   ad = d.abs();
1234   t = signedMin + (d.lshr(d.getBitWidth() - 1));
1235   anc = t - 1 - t.urem(ad);   // absolute value of nc
1236   p = d.getBitWidth() - 1;    // initialize p
1237   q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1238   r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1239   q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1240   r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1241   do {
1242     p = p + 1;
1243     q1 = q1<<1;          // update q1 = 2p/abs(nc)
1244     r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1245     if (r1.uge(anc)) {  // must be unsigned comparison
1246       q1 = q1 + 1;
1247       r1 = r1 - anc;
1248     }
1249     q2 = q2<<1;          // update q2 = 2p/abs(d)
1250     r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1251     if (r2.uge(ad)) {   // must be unsigned comparison
1252       q2 = q2 + 1;
1253       r2 = r2 - ad;
1254     }
1255     delta = ad - r2;
1256   } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1257
1258   mag.m = q2 + 1;
1259   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1260   mag.s = p - d.getBitWidth();          // resulting shift
1261   return mag;
1262 }
1263
1264 /// Calculate the magic numbers required to implement an unsigned integer
1265 /// division by a constant as a sequence of multiplies, adds and shifts.
1266 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1267 /// S. Warren, Jr., chapter 10.
1268 /// LeadingZeros can be used to simplify the calculation if the upper bits
1269 /// of the divided value are known zero.
1270 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1271   const APInt& d = *this;
1272   unsigned p;
1273   APInt nc, delta, q1, r1, q2, r2;
1274   struct mu magu;
1275   magu.a = 0;               // initialize "add" indicator
1276   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1277   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1278   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1279
1280   nc = allOnes - (allOnes - d).urem(d);
1281   p = d.getBitWidth() - 1;  // initialize p
1282   q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1283   r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1284   q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1285   r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1286   do {
1287     p = p + 1;
1288     if (r1.uge(nc - r1)) {
1289       q1 = q1 + q1 + 1;  // update q1
1290       r1 = r1 + r1 - nc; // update r1
1291     }
1292     else {
1293       q1 = q1+q1; // update q1
1294       r1 = r1+r1; // update r1
1295     }
1296     if ((r2 + 1).uge(d - r2)) {
1297       if (q2.uge(signedMax)) magu.a = 1;
1298       q2 = q2+q2 + 1;     // update q2
1299       r2 = r2+r2 + 1 - d; // update r2
1300     }
1301     else {
1302       if (q2.uge(signedMin)) magu.a = 1;
1303       q2 = q2+q2;     // update q2
1304       r2 = r2+r2 + 1; // update r2
1305     }
1306     delta = d - 1 - r2;
1307   } while (p < d.getBitWidth()*2 &&
1308            (q1.ult(delta) || (q1 == delta && r1 == 0)));
1309   magu.m = q2 + 1; // resulting magic number
1310   magu.s = p - d.getBitWidth();  // resulting shift
1311   return magu;
1312 }
1313
1314 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1315 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1316 /// variables here have the same names as in the algorithm. Comments explain
1317 /// the algorithm and any deviation from it.
1318 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1319                      unsigned m, unsigned n) {
1320   assert(u && "Must provide dividend");
1321   assert(v && "Must provide divisor");
1322   assert(q && "Must provide quotient");
1323   assert(u != v && u != q && v != q && "Must use different memory");
1324   assert(n>1 && "n must be > 1");
1325
1326   // b denotes the base of the number system. In our case b is 2^32.
1327   const uint64_t b = uint64_t(1) << 32;
1328
1329 // The DEBUG macros here tend to be spam in the debug output if you're not
1330 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1331 #ifdef KNUTH_DEBUG
1332 #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1333 #else
1334 #define DEBUG_KNUTH(X) do {} while(false)
1335 #endif
1336
1337   DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1338   DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1339   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1340   DEBUG_KNUTH(dbgs() << " by");
1341   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1342   DEBUG_KNUTH(dbgs() << '\n');
1343   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1344   // u and v by d. Note that we have taken Knuth's advice here to use a power
1345   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1346   // 2 allows us to shift instead of multiply and it is easy to determine the
1347   // shift amount from the leading zeros.  We are basically normalizing the u
1348   // and v so that its high bits are shifted to the top of v's range without
1349   // overflow. Note that this can require an extra word in u so that u must
1350   // be of length m+n+1.
1351   unsigned shift = countLeadingZeros(v[n-1]);
1352   uint32_t v_carry = 0;
1353   uint32_t u_carry = 0;
1354   if (shift) {
1355     for (unsigned i = 0; i < m+n; ++i) {
1356       uint32_t u_tmp = u[i] >> (32 - shift);
1357       u[i] = (u[i] << shift) | u_carry;
1358       u_carry = u_tmp;
1359     }
1360     for (unsigned i = 0; i < n; ++i) {
1361       uint32_t v_tmp = v[i] >> (32 - shift);
1362       v[i] = (v[i] << shift) | v_carry;
1363       v_carry = v_tmp;
1364     }
1365   }
1366   u[m+n] = u_carry;
1367
1368   DEBUG_KNUTH(dbgs() << "KnuthDiv:   normal:");
1369   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1370   DEBUG_KNUTH(dbgs() << " by");
1371   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1372   DEBUG_KNUTH(dbgs() << '\n');
1373
1374   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1375   int j = m;
1376   do {
1377     DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1378     // D3. [Calculate q'.].
1379     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1380     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1381     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1382     // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1383     // on v[n-2] determines at high speed most of the cases in which the trial
1384     // value qp is one too large, and it eliminates all cases where qp is two
1385     // too large.
1386     uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1387     DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1388     uint64_t qp = dividend / v[n-1];
1389     uint64_t rp = dividend % v[n-1];
1390     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1391       qp--;
1392       rp += v[n-1];
1393       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1394         qp--;
1395     }
1396     DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1397
1398     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1399     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1400     // consists of a simple multiplication by a one-place number, combined with
1401     // a subtraction.
1402     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1403     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1404     // true value plus b**(n+1), namely as the b's complement of
1405     // the true value, and a "borrow" to the left should be remembered.
1406     int64_t borrow = 0;
1407     for (unsigned i = 0; i < n; ++i) {
1408       uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1409       int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1410       u[j+i] = Lo_32(subres);
1411       borrow = Hi_32(p) - Hi_32(subres);
1412       DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1413                         << ", borrow = " << borrow << '\n');
1414     }
1415     bool isNeg = u[j+n] < borrow;
1416     u[j+n] -= Lo_32(borrow);
1417
1418     DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1419     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1420     DEBUG_KNUTH(dbgs() << '\n');
1421
1422     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1423     // negative, go to step D6; otherwise go on to step D7.
1424     q[j] = Lo_32(qp);
1425     if (isNeg) {
1426       // D6. [Add back]. The probability that this step is necessary is very
1427       // small, on the order of only 2/b. Make sure that test data accounts for
1428       // this possibility. Decrease q[j] by 1
1429       q[j]--;
1430       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1431       // A carry will occur to the left of u[j+n], and it should be ignored
1432       // since it cancels with the borrow that occurred in D4.
1433       bool carry = false;
1434       for (unsigned i = 0; i < n; i++) {
1435         uint32_t limit = std::min(u[j+i],v[i]);
1436         u[j+i] += v[i] + carry;
1437         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1438       }
1439       u[j+n] += carry;
1440     }
1441     DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1442     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1443     DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1444
1445     // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1446   } while (--j >= 0);
1447
1448   DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1449   DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1450   DEBUG_KNUTH(dbgs() << '\n');
1451
1452   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1453   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1454   // compute the remainder (urem uses this).
1455   if (r) {
1456     // The value d is expressed by the "shift" value above since we avoided
1457     // multiplication by d by using a shift left. So, all we have to do is
1458     // shift right here.
1459     if (shift) {
1460       uint32_t carry = 0;
1461       DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1462       for (int i = n-1; i >= 0; i--) {
1463         r[i] = (u[i] >> shift) | carry;
1464         carry = u[i] << (32 - shift);
1465         DEBUG_KNUTH(dbgs() << " " << r[i]);
1466       }
1467     } else {
1468       for (int i = n-1; i >= 0; i--) {
1469         r[i] = u[i];
1470         DEBUG_KNUTH(dbgs() << " " << r[i]);
1471       }
1472     }
1473     DEBUG_KNUTH(dbgs() << '\n');
1474   }
1475   DEBUG_KNUTH(dbgs() << '\n');
1476 }
1477
1478 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1479                    unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1480   assert(lhsWords >= rhsWords && "Fractional result");
1481
1482   // First, compose the values into an array of 32-bit words instead of
1483   // 64-bit words. This is a necessity of both the "short division" algorithm
1484   // and the Knuth "classical algorithm" which requires there to be native
1485   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1486   // can't use 64-bit operands here because we don't have native results of
1487   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1488   // work on large-endian machines.
1489   unsigned n = rhsWords * 2;
1490   unsigned m = (lhsWords * 2) - n;
1491
1492   // Allocate space for the temporary values we need either on the stack, if
1493   // it will fit, or on the heap if it won't.
1494   uint32_t SPACE[128];
1495   uint32_t *U = nullptr;
1496   uint32_t *V = nullptr;
1497   uint32_t *Q = nullptr;
1498   uint32_t *R = nullptr;
1499   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1500     U = &SPACE[0];
1501     V = &SPACE[m+n+1];
1502     Q = &SPACE[(m+n+1) + n];
1503     if (Remainder)
1504       R = &SPACE[(m+n+1) + n + (m+n)];
1505   } else {
1506     U = new uint32_t[m + n + 1];
1507     V = new uint32_t[n];
1508     Q = new uint32_t[m+n];
1509     if (Remainder)
1510       R = new uint32_t[n];
1511   }
1512
1513   // Initialize the dividend
1514   memset(U, 0, (m+n+1)*sizeof(uint32_t));
1515   for (unsigned i = 0; i < lhsWords; ++i) {
1516     uint64_t tmp = LHS[i];
1517     U[i * 2] = Lo_32(tmp);
1518     U[i * 2 + 1] = Hi_32(tmp);
1519   }
1520   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1521
1522   // Initialize the divisor
1523   memset(V, 0, (n)*sizeof(uint32_t));
1524   for (unsigned i = 0; i < rhsWords; ++i) {
1525     uint64_t tmp = RHS[i];
1526     V[i * 2] = Lo_32(tmp);
1527     V[i * 2 + 1] = Hi_32(tmp);
1528   }
1529
1530   // initialize the quotient and remainder
1531   memset(Q, 0, (m+n) * sizeof(uint32_t));
1532   if (Remainder)
1533     memset(R, 0, n * sizeof(uint32_t));
1534
1535   // Now, adjust m and n for the Knuth division. n is the number of words in
1536   // the divisor. m is the number of words by which the dividend exceeds the
1537   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1538   // contain any zero words or the Knuth algorithm fails.
1539   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1540     n--;
1541     m++;
1542   }
1543   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1544     m--;
1545
1546   // If we're left with only a single word for the divisor, Knuth doesn't work
1547   // so we implement the short division algorithm here. This is much simpler
1548   // and faster because we are certain that we can divide a 64-bit quantity
1549   // by a 32-bit quantity at hardware speed and short division is simply a
1550   // series of such operations. This is just like doing short division but we
1551   // are using base 2^32 instead of base 10.
1552   assert(n != 0 && "Divide by zero?");
1553   if (n == 1) {
1554     uint32_t divisor = V[0];
1555     uint32_t remainder = 0;
1556     for (int i = m; i >= 0; i--) {
1557       uint64_t partial_dividend = Make_64(remainder, U[i]);
1558       if (partial_dividend == 0) {
1559         Q[i] = 0;
1560         remainder = 0;
1561       } else if (partial_dividend < divisor) {
1562         Q[i] = 0;
1563         remainder = Lo_32(partial_dividend);
1564       } else if (partial_dividend == divisor) {
1565         Q[i] = 1;
1566         remainder = 0;
1567       } else {
1568         Q[i] = Lo_32(partial_dividend / divisor);
1569         remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1570       }
1571     }
1572     if (R)
1573       R[0] = remainder;
1574   } else {
1575     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1576     // case n > 1.
1577     KnuthDiv(U, V, Q, R, m, n);
1578   }
1579
1580   // If the caller wants the quotient
1581   if (Quotient) {
1582     for (unsigned i = 0; i < lhsWords; ++i)
1583       Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1584   }
1585
1586   // If the caller wants the remainder
1587   if (Remainder) {
1588     for (unsigned i = 0; i < rhsWords; ++i)
1589       Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1590   }
1591
1592   // Clean up the memory we allocated.
1593   if (U != &SPACE[0]) {
1594     delete [] U;
1595     delete [] V;
1596     delete [] Q;
1597     delete [] R;
1598   }
1599 }
1600
1601 APInt APInt::udiv(const APInt &RHS) const {
1602   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1603
1604   // First, deal with the easy case
1605   if (isSingleWord()) {
1606     assert(RHS.U.VAL != 0 && "Divide by zero?");
1607     return APInt(BitWidth, U.VAL / RHS.U.VAL);
1608   }
1609
1610   // Get some facts about the LHS and RHS number of bits and words
1611   unsigned lhsWords = getNumWords(getActiveBits());
1612   unsigned rhsBits  = RHS.getActiveBits();
1613   unsigned rhsWords = getNumWords(rhsBits);
1614   assert(rhsWords && "Divided by zero???");
1615
1616   // Deal with some degenerate cases
1617   if (!lhsWords)
1618     // 0 / X ===> 0
1619     return APInt(BitWidth, 0);
1620   if (rhsBits == 1)
1621     // X / 1 ===> X
1622     return *this;
1623   if (lhsWords < rhsWords || this->ult(RHS))
1624     // X / Y ===> 0, iff X < Y
1625     return APInt(BitWidth, 0);
1626   if (*this == RHS)
1627     // X / X ===> 1
1628     return APInt(BitWidth, 1);
1629   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1630     // All high words are zero, just use native divide
1631     return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1632
1633   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1634   APInt Quotient(BitWidth, 0); // to hold result.
1635   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1636   return Quotient;
1637 }
1638
1639 APInt APInt::udiv(uint64_t RHS) const {
1640   assert(RHS != 0 && "Divide by zero?");
1641
1642   // First, deal with the easy case
1643   if (isSingleWord())
1644     return APInt(BitWidth, U.VAL / RHS);
1645
1646   // Get some facts about the LHS words.
1647   unsigned lhsWords = getNumWords(getActiveBits());
1648
1649   // Deal with some degenerate cases
1650   if (!lhsWords)
1651     // 0 / X ===> 0
1652     return APInt(BitWidth, 0);
1653   if (RHS == 1)
1654     // X / 1 ===> X
1655     return *this;
1656   if (this->ult(RHS))
1657     // X / Y ===> 0, iff X < Y
1658     return APInt(BitWidth, 0);
1659   if (*this == RHS)
1660     // X / X ===> 1
1661     return APInt(BitWidth, 1);
1662   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1663     // All high words are zero, just use native divide
1664     return APInt(BitWidth, this->U.pVal[0] / RHS);
1665
1666   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1667   APInt Quotient(BitWidth, 0); // to hold result.
1668   divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1669   return Quotient;
1670 }
1671
1672 APInt APInt::sdiv(const APInt &RHS) const {
1673   if (isNegative()) {
1674     if (RHS.isNegative())
1675       return (-(*this)).udiv(-RHS);
1676     return -((-(*this)).udiv(RHS));
1677   }
1678   if (RHS.isNegative())
1679     return -(this->udiv(-RHS));
1680   return this->udiv(RHS);
1681 }
1682
1683 APInt APInt::sdiv(int64_t RHS) const {
1684   if (isNegative()) {
1685     if (RHS < 0)
1686       return (-(*this)).udiv(-RHS);
1687     return -((-(*this)).udiv(RHS));
1688   }
1689   if (RHS < 0)
1690     return -(this->udiv(-RHS));
1691   return this->udiv(RHS);
1692 }
1693
1694 APInt APInt::urem(const APInt &RHS) const {
1695   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1696   if (isSingleWord()) {
1697     assert(RHS.U.VAL != 0 && "Remainder by zero?");
1698     return APInt(BitWidth, U.VAL % RHS.U.VAL);
1699   }
1700
1701   // Get some facts about the LHS
1702   unsigned lhsWords = getNumWords(getActiveBits());
1703
1704   // Get some facts about the RHS
1705   unsigned rhsBits = RHS.getActiveBits();
1706   unsigned rhsWords = getNumWords(rhsBits);
1707   assert(rhsWords && "Performing remainder operation by zero ???");
1708
1709   // Check the degenerate cases
1710   if (lhsWords == 0)
1711     // 0 % Y ===> 0
1712     return APInt(BitWidth, 0);
1713   if (rhsBits == 1)
1714     // X % 1 ===> 0
1715     return APInt(BitWidth, 0);
1716   if (lhsWords < rhsWords || this->ult(RHS))
1717     // X % Y ===> X, iff X < Y
1718     return *this;
1719   if (*this == RHS)
1720     // X % X == 0;
1721     return APInt(BitWidth, 0);
1722   if (lhsWords == 1)
1723     // All high words are zero, just use native remainder
1724     return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1725
1726   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1727   APInt Remainder(BitWidth, 0);
1728   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1729   return Remainder;
1730 }
1731
1732 uint64_t APInt::urem(uint64_t RHS) const {
1733   assert(RHS != 0 && "Remainder by zero?");
1734
1735   if (isSingleWord())
1736     return U.VAL % RHS;
1737
1738   // Get some facts about the LHS
1739   unsigned lhsWords = getNumWords(getActiveBits());
1740
1741   // Check the degenerate cases
1742   if (lhsWords == 0)
1743     // 0 % Y ===> 0
1744     return 0;
1745   if (RHS == 1)
1746     // X % 1 ===> 0
1747     return 0;
1748   if (this->ult(RHS))
1749     // X % Y ===> X, iff X < Y
1750     return getZExtValue();
1751   if (*this == RHS)
1752     // X % X == 0;
1753     return 0;
1754   if (lhsWords == 1)
1755     // All high words are zero, just use native remainder
1756     return U.pVal[0] % RHS;
1757
1758   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1759   uint64_t Remainder;
1760   divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1761   return Remainder;
1762 }
1763
1764 APInt APInt::srem(const APInt &RHS) const {
1765   if (isNegative()) {
1766     if (RHS.isNegative())
1767       return -((-(*this)).urem(-RHS));
1768     return -((-(*this)).urem(RHS));
1769   }
1770   if (RHS.isNegative())
1771     return this->urem(-RHS);
1772   return this->urem(RHS);
1773 }
1774
1775 int64_t APInt::srem(int64_t RHS) const {
1776   if (isNegative()) {
1777     if (RHS < 0)
1778       return -((-(*this)).urem(-RHS));
1779     return -((-(*this)).urem(RHS));
1780   }
1781   if (RHS < 0)
1782     return this->urem(-RHS);
1783   return this->urem(RHS);
1784 }
1785
1786 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1787                     APInt &Quotient, APInt &Remainder) {
1788   assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1789   unsigned BitWidth = LHS.BitWidth;
1790
1791   // First, deal with the easy case
1792   if (LHS.isSingleWord()) {
1793     assert(RHS.U.VAL != 0 && "Divide by zero?");
1794     uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1795     uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1796     Quotient = APInt(BitWidth, QuotVal);
1797     Remainder = APInt(BitWidth, RemVal);
1798     return;
1799   }
1800
1801   // Get some size facts about the dividend and divisor
1802   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1803   unsigned rhsBits  = RHS.getActiveBits();
1804   unsigned rhsWords = getNumWords(rhsBits);
1805   assert(rhsWords && "Performing divrem operation by zero ???");
1806
1807   // Check the degenerate cases
1808   if (lhsWords == 0) {
1809     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1810     Remainder = APInt(BitWidth, 0);   // 0 % Y ===> 0
1811     return;
1812   }
1813
1814   if (rhsBits == 1) {
1815     Quotient = LHS;                   // X / 1 ===> X
1816     Remainder = APInt(BitWidth, 0);   // X % 1 ===> 0
1817   }
1818
1819   if (lhsWords < rhsWords || LHS.ult(RHS)) {
1820     Remainder = LHS;                  // X % Y ===> X, iff X < Y
1821     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1822     return;
1823   }
1824
1825   if (LHS == RHS) {
1826     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1827     Remainder = APInt(BitWidth, 0);   // X % X ===> 0;
1828     return;
1829   }
1830
1831   // Make sure there is enough space to hold the results.
1832   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1833   // change the size. This is necessary if Quotient or Remainder is aliased
1834   // with LHS or RHS.
1835   Quotient.reallocate(BitWidth);
1836   Remainder.reallocate(BitWidth);
1837
1838   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1839     // There is only one word to consider so use the native versions.
1840     uint64_t lhsValue = LHS.U.pVal[0];
1841     uint64_t rhsValue = RHS.U.pVal[0];
1842     Quotient = lhsValue / rhsValue;
1843     Remainder = lhsValue % rhsValue;
1844     return;
1845   }
1846
1847   // Okay, lets do it the long way
1848   divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1849          Remainder.U.pVal);
1850   // Clear the rest of the Quotient and Remainder.
1851   std::memset(Quotient.U.pVal + lhsWords, 0,
1852               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1853   std::memset(Remainder.U.pVal + rhsWords, 0,
1854               (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1855 }
1856
1857 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1858                     uint64_t &Remainder) {
1859   assert(RHS != 0 && "Divide by zero?");
1860   unsigned BitWidth = LHS.BitWidth;
1861
1862   // First, deal with the easy case
1863   if (LHS.isSingleWord()) {
1864     uint64_t QuotVal = LHS.U.VAL / RHS;
1865     Remainder = LHS.U.VAL % RHS;
1866     Quotient = APInt(BitWidth, QuotVal);
1867     return;
1868   }
1869
1870   // Get some size facts about the dividend and divisor
1871   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1872
1873   // Check the degenerate cases
1874   if (lhsWords == 0) {
1875     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1876     Remainder = 0;                    // 0 % Y ===> 0
1877     return;
1878   }
1879
1880   if (RHS == 1) {
1881     Quotient = LHS;                   // X / 1 ===> X
1882     Remainder = 0;                    // X % 1 ===> 0
1883     return;
1884   }
1885
1886   if (LHS.ult(RHS)) {
1887     Remainder = LHS.getZExtValue();   // X % Y ===> X, iff X < Y
1888     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1889     return;
1890   }
1891
1892   if (LHS == RHS) {
1893     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1894     Remainder = 0;                    // X % X ===> 0;
1895     return;
1896   }
1897
1898   // Make sure there is enough space to hold the results.
1899   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1900   // change the size. This is necessary if Quotient is aliased with LHS.
1901   Quotient.reallocate(BitWidth);
1902
1903   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1904     // There is only one word to consider so use the native versions.
1905     uint64_t lhsValue = LHS.U.pVal[0];
1906     Quotient = lhsValue / RHS;
1907     Remainder = lhsValue % RHS;
1908     return;
1909   }
1910
1911   // Okay, lets do it the long way
1912   divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1913   // Clear the rest of the Quotient.
1914   std::memset(Quotient.U.pVal + lhsWords, 0,
1915               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1916 }
1917
1918 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1919                     APInt &Quotient, APInt &Remainder) {
1920   if (LHS.isNegative()) {
1921     if (RHS.isNegative())
1922       APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1923     else {
1924       APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1925       Quotient.negate();
1926     }
1927     Remainder.negate();
1928   } else if (RHS.isNegative()) {
1929     APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1930     Quotient.negate();
1931   } else {
1932     APInt::udivrem(LHS, RHS, Quotient, Remainder);
1933   }
1934 }
1935
1936 void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1937                     APInt &Quotient, int64_t &Remainder) {
1938   uint64_t R = Remainder;
1939   if (LHS.isNegative()) {
1940     if (RHS < 0)
1941       APInt::udivrem(-LHS, -RHS, Quotient, R);
1942     else {
1943       APInt::udivrem(-LHS, RHS, Quotient, R);
1944       Quotient.negate();
1945     }
1946     R = -R;
1947   } else if (RHS < 0) {
1948     APInt::udivrem(LHS, -RHS, Quotient, R);
1949     Quotient.negate();
1950   } else {
1951     APInt::udivrem(LHS, RHS, Quotient, R);
1952   }
1953   Remainder = R;
1954 }
1955
1956 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1957   APInt Res = *this+RHS;
1958   Overflow = isNonNegative() == RHS.isNonNegative() &&
1959              Res.isNonNegative() != isNonNegative();
1960   return Res;
1961 }
1962
1963 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1964   APInt Res = *this+RHS;
1965   Overflow = Res.ult(RHS);
1966   return Res;
1967 }
1968
1969 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1970   APInt Res = *this - RHS;
1971   Overflow = isNonNegative() != RHS.isNonNegative() &&
1972              Res.isNonNegative() != isNonNegative();
1973   return Res;
1974 }
1975
1976 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1977   APInt Res = *this-RHS;
1978   Overflow = Res.ugt(*this);
1979   return Res;
1980 }
1981
1982 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1983   // MININT/-1  -->  overflow.
1984   Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1985   return sdiv(RHS);
1986 }
1987
1988 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1989   APInt Res = *this * RHS;
1990
1991   if (*this != 0 && RHS != 0)
1992     Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1993   else
1994     Overflow = false;
1995   return Res;
1996 }
1997
1998 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1999   if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
2000     Overflow = true;
2001     return *this * RHS;
2002   }
2003
2004   APInt Res = lshr(1) * RHS;
2005   Overflow = Res.isNegative();
2006   Res <<= 1;
2007   if ((*this)[0]) {
2008     Res += RHS;
2009     if (Res.ult(RHS))
2010       Overflow = true;
2011   }
2012   return Res;
2013 }
2014
2015 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
2016   Overflow = ShAmt.uge(getBitWidth());
2017   if (Overflow)
2018     return APInt(BitWidth, 0);
2019
2020   if (isNonNegative()) // Don't allow sign change.
2021     Overflow = ShAmt.uge(countLeadingZeros());
2022   else
2023     Overflow = ShAmt.uge(countLeadingOnes());
2024
2025   return *this << ShAmt;
2026 }
2027
2028 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
2029   Overflow = ShAmt.uge(getBitWidth());
2030   if (Overflow)
2031     return APInt(BitWidth, 0);
2032
2033   Overflow = ShAmt.ugt(countLeadingZeros());
2034
2035   return *this << ShAmt;
2036 }
2037
2038 APInt APInt::sadd_sat(const APInt &RHS) const {
2039   bool Overflow;
2040   APInt Res = sadd_ov(RHS, Overflow);
2041   if (!Overflow)
2042     return Res;
2043
2044   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2045                       : APInt::getSignedMaxValue(BitWidth);
2046 }
2047
2048 APInt APInt::uadd_sat(const APInt &RHS) const {
2049   bool Overflow;
2050   APInt Res = uadd_ov(RHS, Overflow);
2051   if (!Overflow)
2052     return Res;
2053
2054   return APInt::getMaxValue(BitWidth);
2055 }
2056
2057 APInt APInt::ssub_sat(const APInt &RHS) const {
2058   bool Overflow;
2059   APInt Res = ssub_ov(RHS, Overflow);
2060   if (!Overflow)
2061     return Res;
2062
2063   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2064                       : APInt::getSignedMaxValue(BitWidth);
2065 }
2066
2067 APInt APInt::usub_sat(const APInt &RHS) const {
2068   bool Overflow;
2069   APInt Res = usub_ov(RHS, Overflow);
2070   if (!Overflow)
2071     return Res;
2072
2073   return APInt(BitWidth, 0);
2074 }
2075
2076 APInt APInt::smul_sat(const APInt &RHS) const {
2077   bool Overflow;
2078   APInt Res = smul_ov(RHS, Overflow);
2079   if (!Overflow)
2080     return Res;
2081
2082   // The result is negative if one and only one of inputs is negative.
2083   bool ResIsNegative = isNegative() ^ RHS.isNegative();
2084
2085   return ResIsNegative ? APInt::getSignedMinValue(BitWidth)
2086                        : APInt::getSignedMaxValue(BitWidth);
2087 }
2088
2089 APInt APInt::umul_sat(const APInt &RHS) const {
2090   bool Overflow;
2091   APInt Res = umul_ov(RHS, Overflow);
2092   if (!Overflow)
2093     return Res;
2094
2095   return APInt::getMaxValue(BitWidth);
2096 }
2097
2098 APInt APInt::sshl_sat(const APInt &RHS) const {
2099   bool Overflow;
2100   APInt Res = sshl_ov(RHS, Overflow);
2101   if (!Overflow)
2102     return Res;
2103
2104   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2105                       : APInt::getSignedMaxValue(BitWidth);
2106 }
2107
2108 APInt APInt::ushl_sat(const APInt &RHS) const {
2109   bool Overflow;
2110   APInt Res = ushl_ov(RHS, Overflow);
2111   if (!Overflow)
2112     return Res;
2113
2114   return APInt::getMaxValue(BitWidth);
2115 }
2116
2117 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2118   // Check our assumptions here
2119   assert(!str.empty() && "Invalid string length");
2120   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
2121           radix == 36) &&
2122          "Radix should be 2, 8, 10, 16, or 36!");
2123
2124   StringRef::iterator p = str.begin();
2125   size_t slen = str.size();
2126   bool isNeg = *p == '-';
2127   if (*p == '-' || *p == '+') {
2128     p++;
2129     slen--;
2130     assert(slen && "String is only a sign, needs a value.");
2131   }
2132   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2133   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2134   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2135   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2136          "Insufficient bit width");
2137
2138   // Allocate memory if needed
2139   if (isSingleWord())
2140     U.VAL = 0;
2141   else
2142     U.pVal = getClearedMemory(getNumWords());
2143
2144   // Figure out if we can shift instead of multiply
2145   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2146
2147   // Enter digit traversal loop
2148   for (StringRef::iterator e = str.end(); p != e; ++p) {
2149     unsigned digit = getDigit(*p, radix);
2150     assert(digit < radix && "Invalid character in digit string");
2151
2152     // Shift or multiply the value by the radix
2153     if (slen > 1) {
2154       if (shift)
2155         *this <<= shift;
2156       else
2157         *this *= radix;
2158     }
2159
2160     // Add in the digit we just interpreted
2161     *this += digit;
2162   }
2163   // If its negative, put it in two's complement form
2164   if (isNeg)
2165     this->negate();
2166 }
2167
2168 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2169                      bool Signed, bool formatAsCLiteral) const {
2170   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2171           Radix == 36) &&
2172          "Radix should be 2, 8, 10, 16, or 36!");
2173
2174   const char *Prefix = "";
2175   if (formatAsCLiteral) {
2176     switch (Radix) {
2177       case 2:
2178         // Binary literals are a non-standard extension added in gcc 4.3:
2179         // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2180         Prefix = "0b";
2181         break;
2182       case 8:
2183         Prefix = "0";
2184         break;
2185       case 10:
2186         break; // No prefix
2187       case 16:
2188         Prefix = "0x";
2189         break;
2190       default:
2191         llvm_unreachable("Invalid radix!");
2192     }
2193   }
2194
2195   // First, check for a zero value and just short circuit the logic below.
2196   if (*this == 0) {
2197     while (*Prefix) {
2198       Str.push_back(*Prefix);
2199       ++Prefix;
2200     };
2201     Str.push_back('0');
2202     return;
2203   }
2204
2205   static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2206
2207   if (isSingleWord()) {
2208     char Buffer[65];
2209     char *BufPtr = std::end(Buffer);
2210
2211     uint64_t N;
2212     if (!Signed) {
2213       N = getZExtValue();
2214     } else {
2215       int64_t I = getSExtValue();
2216       if (I >= 0) {
2217         N = I;
2218       } else {
2219         Str.push_back('-');
2220         N = -(uint64_t)I;
2221       }
2222     }
2223
2224     while (*Prefix) {
2225       Str.push_back(*Prefix);
2226       ++Prefix;
2227     };
2228
2229     while (N) {
2230       *--BufPtr = Digits[N % Radix];
2231       N /= Radix;
2232     }
2233     Str.append(BufPtr, std::end(Buffer));
2234     return;
2235   }
2236
2237   APInt Tmp(*this);
2238
2239   if (Signed && isNegative()) {
2240     // They want to print the signed version and it is a negative value
2241     // Flip the bits and add one to turn it into the equivalent positive
2242     // value and put a '-' in the result.
2243     Tmp.negate();
2244     Str.push_back('-');
2245   }
2246
2247   while (*Prefix) {
2248     Str.push_back(*Prefix);
2249     ++Prefix;
2250   };
2251
2252   // We insert the digits backward, then reverse them to get the right order.
2253   unsigned StartDig = Str.size();
2254
2255   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2256   // because the number of bits per digit (1, 3 and 4 respectively) divides
2257   // equally.  We just shift until the value is zero.
2258   if (Radix == 2 || Radix == 8 || Radix == 16) {
2259     // Just shift tmp right for each digit width until it becomes zero
2260     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2261     unsigned MaskAmt = Radix - 1;
2262
2263     while (Tmp.getBoolValue()) {
2264       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2265       Str.push_back(Digits[Digit]);
2266       Tmp.lshrInPlace(ShiftAmt);
2267     }
2268   } else {
2269     while (Tmp.getBoolValue()) {
2270       uint64_t Digit;
2271       udivrem(Tmp, Radix, Tmp, Digit);
2272       assert(Digit < Radix && "divide failed");
2273       Str.push_back(Digits[Digit]);
2274     }
2275   }
2276
2277   // Reverse the digits before returning.
2278   std::reverse(Str.begin()+StartDig, Str.end());
2279 }
2280
2281 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2282 /// It is better to pass in a SmallVector/SmallString to the methods above.
2283 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2284   SmallString<40> S;
2285   toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2286   return S.str();
2287 }
2288
2289 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2290 LLVM_DUMP_METHOD void APInt::dump() const {
2291   SmallString<40> S, U;
2292   this->toStringUnsigned(U);
2293   this->toStringSigned(S);
2294   dbgs() << "APInt(" << BitWidth << "b, "
2295          << U << "u " << S << "s)\n";
2296 }
2297 #endif
2298
2299 void APInt::print(raw_ostream &OS, bool isSigned) const {
2300   SmallString<40> S;
2301   this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2302   OS << S;
2303 }
2304
2305 // This implements a variety of operations on a representation of
2306 // arbitrary precision, two's-complement, bignum integer values.
2307
2308 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2309 // and unrestricting assumption.
2310 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2311               "Part width must be divisible by 2!");
2312
2313 /* Some handy functions local to this file.  */
2314
2315 /* Returns the integer part with the least significant BITS set.
2316    BITS cannot be zero.  */
2317 static inline APInt::WordType lowBitMask(unsigned bits) {
2318   assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2319
2320   return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2321 }
2322
2323 /* Returns the value of the lower half of PART.  */
2324 static inline APInt::WordType lowHalf(APInt::WordType part) {
2325   return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2326 }
2327
2328 /* Returns the value of the upper half of PART.  */
2329 static inline APInt::WordType highHalf(APInt::WordType part) {
2330   return part >> (APInt::APINT_BITS_PER_WORD / 2);
2331 }
2332
2333 /* Returns the bit number of the most significant set bit of a part.
2334    If the input number has no bits set -1U is returned.  */
2335 static unsigned partMSB(APInt::WordType value) {
2336   return findLastSet(value, ZB_Max);
2337 }
2338
2339 /* Returns the bit number of the least significant set bit of a
2340    part.  If the input number has no bits set -1U is returned.  */
2341 static unsigned partLSB(APInt::WordType value) {
2342   return findFirstSet(value, ZB_Max);
2343 }
2344
2345 /* Sets the least significant part of a bignum to the input value, and
2346    zeroes out higher parts.  */
2347 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2348   assert(parts > 0);
2349
2350   dst[0] = part;
2351   for (unsigned i = 1; i < parts; i++)
2352     dst[i] = 0;
2353 }
2354
2355 /* Assign one bignum to another.  */
2356 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2357   for (unsigned i = 0; i < parts; i++)
2358     dst[i] = src[i];
2359 }
2360
2361 /* Returns true if a bignum is zero, false otherwise.  */
2362 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2363   for (unsigned i = 0; i < parts; i++)
2364     if (src[i])
2365       return false;
2366
2367   return true;
2368 }
2369
2370 /* Extract the given bit of a bignum; returns 0 or 1.  */
2371 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2372   return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2373 }
2374
2375 /* Set the given bit of a bignum. */
2376 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2377   parts[whichWord(bit)] |= maskBit(bit);
2378 }
2379
2380 /* Clears the given bit of a bignum. */
2381 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2382   parts[whichWord(bit)] &= ~maskBit(bit);
2383 }
2384
2385 /* Returns the bit number of the least significant set bit of a
2386    number.  If the input number has no bits set -1U is returned.  */
2387 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2388   for (unsigned i = 0; i < n; i++) {
2389     if (parts[i] != 0) {
2390       unsigned lsb = partLSB(parts[i]);
2391
2392       return lsb + i * APINT_BITS_PER_WORD;
2393     }
2394   }
2395
2396   return -1U;
2397 }
2398
2399 /* Returns the bit number of the most significant set bit of a number.
2400    If the input number has no bits set -1U is returned.  */
2401 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2402   do {
2403     --n;
2404
2405     if (parts[n] != 0) {
2406       unsigned msb = partMSB(parts[n]);
2407
2408       return msb + n * APINT_BITS_PER_WORD;
2409     }
2410   } while (n);
2411
2412   return -1U;
2413 }
2414
2415 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2416    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2417    the least significant bit of DST.  All high bits above srcBITS in
2418    DST are zero-filled.  */
2419 void
2420 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2421                  unsigned srcBits, unsigned srcLSB) {
2422   unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2423   assert(dstParts <= dstCount);
2424
2425   unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2426   tcAssign (dst, src + firstSrcPart, dstParts);
2427
2428   unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2429   tcShiftRight (dst, dstParts, shift);
2430
2431   /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2432      in DST.  If this is less that srcBits, append the rest, else
2433      clear the high bits.  */
2434   unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2435   if (n < srcBits) {
2436     WordType mask = lowBitMask (srcBits - n);
2437     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2438                           << n % APINT_BITS_PER_WORD);
2439   } else if (n > srcBits) {
2440     if (srcBits % APINT_BITS_PER_WORD)
2441       dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2442   }
2443
2444   /* Clear high parts.  */
2445   while (dstParts < dstCount)
2446     dst[dstParts++] = 0;
2447 }
2448
2449 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2450 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2451                              WordType c, unsigned parts) {
2452   assert(c <= 1);
2453
2454   for (unsigned i = 0; i < parts; i++) {
2455     WordType l = dst[i];
2456     if (c) {
2457       dst[i] += rhs[i] + 1;
2458       c = (dst[i] <= l);
2459     } else {
2460       dst[i] += rhs[i];
2461       c = (dst[i] < l);
2462     }
2463   }
2464
2465   return c;
2466 }
2467
2468 /// This function adds a single "word" integer, src, to the multiple
2469 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2470 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2471 /// @returns the carry of the addition.
2472 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2473                                  unsigned parts) {
2474   for (unsigned i = 0; i < parts; ++i) {
2475     dst[i] += src;
2476     if (dst[i] >= src)
2477       return 0; // No need to carry so exit early.
2478     src = 1; // Carry one to next digit.
2479   }
2480
2481   return 1;
2482 }
2483
2484 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2485 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2486                                   WordType c, unsigned parts) {
2487   assert(c <= 1);
2488
2489   for (unsigned i = 0; i < parts; i++) {
2490     WordType l = dst[i];
2491     if (c) {
2492       dst[i] -= rhs[i] + 1;
2493       c = (dst[i] >= l);
2494     } else {
2495       dst[i] -= rhs[i];
2496       c = (dst[i] > l);
2497     }
2498   }
2499
2500   return c;
2501 }
2502
2503 /// This function subtracts a single "word" (64-bit word), src, from
2504 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2505 /// no further borrowing is needed or it runs out of "words" in dst.  The result
2506 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2507 /// exhausted. In other words, if src > dst then this function returns 1,
2508 /// otherwise 0.
2509 /// @returns the borrow out of the subtraction
2510 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2511                                       unsigned parts) {
2512   for (unsigned i = 0; i < parts; ++i) {
2513     WordType Dst = dst[i];
2514     dst[i] -= src;
2515     if (src <= Dst)
2516       return 0; // No need to borrow so exit early.
2517     src = 1; // We have to "borrow 1" from next "word"
2518   }
2519
2520   return 1;
2521 }
2522
2523 /* Negate a bignum in-place.  */
2524 void APInt::tcNegate(WordType *dst, unsigned parts) {
2525   tcComplement(dst, parts);
2526   tcIncrement(dst, parts);
2527 }
2528
2529 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2530     DST  = SRC * MULTIPLIER + CARRY   if add is false
2531
2532     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2533     they must start at the same point, i.e. DST == SRC.
2534
2535     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2536     returned.  Otherwise DST is filled with the least significant
2537     DSTPARTS parts of the result, and if all of the omitted higher
2538     parts were zero return zero, otherwise overflow occurred and
2539     return one.  */
2540 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2541                           WordType multiplier, WordType carry,
2542                           unsigned srcParts, unsigned dstParts,
2543                           bool add) {
2544   /* Otherwise our writes of DST kill our later reads of SRC.  */
2545   assert(dst <= src || dst >= src + srcParts);
2546   assert(dstParts <= srcParts + 1);
2547
2548   /* N loops; minimum of dstParts and srcParts.  */
2549   unsigned n = std::min(dstParts, srcParts);
2550
2551   for (unsigned i = 0; i < n; i++) {
2552     WordType low, mid, high, srcPart;
2553
2554       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2555
2556          This cannot overflow, because
2557
2558          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2559
2560          which is less than n^2.  */
2561
2562     srcPart = src[i];
2563
2564     if (multiplier == 0 || srcPart == 0) {
2565       low = carry;
2566       high = 0;
2567     } else {
2568       low = lowHalf(srcPart) * lowHalf(multiplier);
2569       high = highHalf(srcPart) * highHalf(multiplier);
2570
2571       mid = lowHalf(srcPart) * highHalf(multiplier);
2572       high += highHalf(mid);
2573       mid <<= APINT_BITS_PER_WORD / 2;
2574       if (low + mid < low)
2575         high++;
2576       low += mid;
2577
2578       mid = highHalf(srcPart) * lowHalf(multiplier);
2579       high += highHalf(mid);
2580       mid <<= APINT_BITS_PER_WORD / 2;
2581       if (low + mid < low)
2582         high++;
2583       low += mid;
2584
2585       /* Now add carry.  */
2586       if (low + carry < low)
2587         high++;
2588       low += carry;
2589     }
2590
2591     if (add) {
2592       /* And now DST[i], and store the new low part there.  */
2593       if (low + dst[i] < low)
2594         high++;
2595       dst[i] += low;
2596     } else
2597       dst[i] = low;
2598
2599     carry = high;
2600   }
2601
2602   if (srcParts < dstParts) {
2603     /* Full multiplication, there is no overflow.  */
2604     assert(srcParts + 1 == dstParts);
2605     dst[srcParts] = carry;
2606     return 0;
2607   }
2608
2609   /* We overflowed if there is carry.  */
2610   if (carry)
2611     return 1;
2612
2613   /* We would overflow if any significant unwritten parts would be
2614      non-zero.  This is true if any remaining src parts are non-zero
2615      and the multiplier is non-zero.  */
2616   if (multiplier)
2617     for (unsigned i = dstParts; i < srcParts; i++)
2618       if (src[i])
2619         return 1;
2620
2621   /* We fitted in the narrow destination.  */
2622   return 0;
2623 }
2624
2625 /* DST = LHS * RHS, where DST has the same width as the operands and
2626    is filled with the least significant parts of the result.  Returns
2627    one if overflow occurred, otherwise zero.  DST must be disjoint
2628    from both operands.  */
2629 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2630                       const WordType *rhs, unsigned parts) {
2631   assert(dst != lhs && dst != rhs);
2632
2633   int overflow = 0;
2634   tcSet(dst, 0, parts);
2635
2636   for (unsigned i = 0; i < parts; i++)
2637     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2638                                parts - i, true);
2639
2640   return overflow;
2641 }
2642
2643 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2644 /// operands. No overflow occurs. DST must be disjoint from both operands.
2645 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2646                            const WordType *rhs, unsigned lhsParts,
2647                            unsigned rhsParts) {
2648   /* Put the narrower number on the LHS for less loops below.  */
2649   if (lhsParts > rhsParts)
2650     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2651
2652   assert(dst != lhs && dst != rhs);
2653
2654   tcSet(dst, 0, rhsParts);
2655
2656   for (unsigned i = 0; i < lhsParts; i++)
2657     tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2658 }
2659
2660 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2661    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2662    set REMAINDER to the remainder, return zero.  i.e.
2663
2664    OLD_LHS = RHS * LHS + REMAINDER
2665
2666    SCRATCH is a bignum of the same size as the operands and result for
2667    use by the routine; its contents need not be initialized and are
2668    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2669 */
2670 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2671                     WordType *remainder, WordType *srhs,
2672                     unsigned parts) {
2673   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2674
2675   unsigned shiftCount = tcMSB(rhs, parts) + 1;
2676   if (shiftCount == 0)
2677     return true;
2678
2679   shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2680   unsigned n = shiftCount / APINT_BITS_PER_WORD;
2681   WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2682
2683   tcAssign(srhs, rhs, parts);
2684   tcShiftLeft(srhs, parts, shiftCount);
2685   tcAssign(remainder, lhs, parts);
2686   tcSet(lhs, 0, parts);
2687
2688   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2689      the total.  */
2690   for (;;) {
2691     int compare = tcCompare(remainder, srhs, parts);
2692     if (compare >= 0) {
2693       tcSubtract(remainder, srhs, 0, parts);
2694       lhs[n] |= mask;
2695     }
2696
2697     if (shiftCount == 0)
2698       break;
2699     shiftCount--;
2700     tcShiftRight(srhs, parts, 1);
2701     if ((mask >>= 1) == 0) {
2702       mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2703       n--;
2704     }
2705   }
2706
2707   return false;
2708 }
2709
2710 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2711 /// no restrictions on Count.
2712 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2713   // Don't bother performing a no-op shift.
2714   if (!Count)
2715     return;
2716
2717   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2718   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2719   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2720
2721   // Fastpath for moving by whole words.
2722   if (BitShift == 0) {
2723     std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2724   } else {
2725     while (Words-- > WordShift) {
2726       Dst[Words] = Dst[Words - WordShift] << BitShift;
2727       if (Words > WordShift)
2728         Dst[Words] |=
2729           Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2730     }
2731   }
2732
2733   // Fill in the remainder with 0s.
2734   std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2735 }
2736
2737 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2738 /// are no restrictions on Count.
2739 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2740   // Don't bother performing a no-op shift.
2741   if (!Count)
2742     return;
2743
2744   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2745   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2746   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2747
2748   unsigned WordsToMove = Words - WordShift;
2749   // Fastpath for moving by whole words.
2750   if (BitShift == 0) {
2751     std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2752   } else {
2753     for (unsigned i = 0; i != WordsToMove; ++i) {
2754       Dst[i] = Dst[i + WordShift] >> BitShift;
2755       if (i + 1 != WordsToMove)
2756         Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2757     }
2758   }
2759
2760   // Fill in the remainder with 0s.
2761   std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2762 }
2763
2764 /* Bitwise and of two bignums.  */
2765 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2766   for (unsigned i = 0; i < parts; i++)
2767     dst[i] &= rhs[i];
2768 }
2769
2770 /* Bitwise inclusive or of two bignums.  */
2771 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2772   for (unsigned i = 0; i < parts; i++)
2773     dst[i] |= rhs[i];
2774 }
2775
2776 /* Bitwise exclusive or of two bignums.  */
2777 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2778   for (unsigned i = 0; i < parts; i++)
2779     dst[i] ^= rhs[i];
2780 }
2781
2782 /* Complement a bignum in-place.  */
2783 void APInt::tcComplement(WordType *dst, unsigned parts) {
2784   for (unsigned i = 0; i < parts; i++)
2785     dst[i] = ~dst[i];
2786 }
2787
2788 /* Comparison (unsigned) of two bignums.  */
2789 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2790                      unsigned parts) {
2791   while (parts) {
2792     parts--;
2793     if (lhs[parts] != rhs[parts])
2794       return (lhs[parts] > rhs[parts]) ? 1 : -1;
2795   }
2796
2797   return 0;
2798 }
2799
2800 /* Set the least significant BITS bits of a bignum, clear the
2801    rest.  */
2802 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2803                                       unsigned bits) {
2804   unsigned i = 0;
2805   while (bits > APINT_BITS_PER_WORD) {
2806     dst[i++] = ~(WordType) 0;
2807     bits -= APINT_BITS_PER_WORD;
2808   }
2809
2810   if (bits)
2811     dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2812
2813   while (i < parts)
2814     dst[i++] = 0;
2815 }
2816
2817 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2818                                    APInt::Rounding RM) {
2819   // Currently udivrem always rounds down.
2820   switch (RM) {
2821   case APInt::Rounding::DOWN:
2822   case APInt::Rounding::TOWARD_ZERO:
2823     return A.udiv(B);
2824   case APInt::Rounding::UP: {
2825     APInt Quo, Rem;
2826     APInt::udivrem(A, B, Quo, Rem);
2827     if (Rem == 0)
2828       return Quo;
2829     return Quo + 1;
2830   }
2831   }
2832   llvm_unreachable("Unknown APInt::Rounding enum");
2833 }
2834
2835 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2836                                    APInt::Rounding RM) {
2837   switch (RM) {
2838   case APInt::Rounding::DOWN:
2839   case APInt::Rounding::UP: {
2840     APInt Quo, Rem;
2841     APInt::sdivrem(A, B, Quo, Rem);
2842     if (Rem == 0)
2843       return Quo;
2844     // This algorithm deals with arbitrary rounding mode used by sdivrem.
2845     // We want to check whether the non-integer part of the mathematical value
2846     // is negative or not. If the non-integer part is negative, we need to round
2847     // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2848     // already rounded down.
2849     if (RM == APInt::Rounding::DOWN) {
2850       if (Rem.isNegative() != B.isNegative())
2851         return Quo - 1;
2852       return Quo;
2853     }
2854     if (Rem.isNegative() != B.isNegative())
2855       return Quo;
2856     return Quo + 1;
2857   }
2858   // Currently sdiv rounds towards zero.
2859   case APInt::Rounding::TOWARD_ZERO:
2860     return A.sdiv(B);
2861   }
2862   llvm_unreachable("Unknown APInt::Rounding enum");
2863 }
2864
2865 Optional<APInt>
2866 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2867                                            unsigned RangeWidth) {
2868   unsigned CoeffWidth = A.getBitWidth();
2869   assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2870   assert(RangeWidth <= CoeffWidth &&
2871          "Value range width should be less than coefficient width");
2872   assert(RangeWidth > 1 && "Value range bit width should be > 1");
2873
2874   LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2875                     << "x + " << C << ", rw:" << RangeWidth << '\n');
2876
2877   // Identify 0 as a (non)solution immediately.
2878   if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2879     LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2880     return APInt(CoeffWidth, 0);
2881   }
2882
2883   // The result of APInt arithmetic has the same bit width as the operands,
2884   // so it can actually lose high bits. A product of two n-bit integers needs
2885   // 2n-1 bits to represent the full value.
2886   // The operation done below (on quadratic coefficients) that can produce
2887   // the largest value is the evaluation of the equation during bisection,
2888   // which needs 3 times the bitwidth of the coefficient, so the total number
2889   // of required bits is 3n.
2890   //
2891   // The purpose of this extension is to simulate the set Z of all integers,
2892   // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2893   // and negative numbers (not so much in a modulo arithmetic). The method
2894   // used to solve the equation is based on the standard formula for real
2895   // numbers, and uses the concepts of "positive" and "negative" with their
2896   // usual meanings.
2897   CoeffWidth *= 3;
2898   A = A.sext(CoeffWidth);
2899   B = B.sext(CoeffWidth);
2900   C = C.sext(CoeffWidth);
2901
2902   // Make A > 0 for simplicity. Negate cannot overflow at this point because
2903   // the bit width has increased.
2904   if (A.isNegative()) {
2905     A.negate();
2906     B.negate();
2907     C.negate();
2908   }
2909
2910   // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2911   // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2912   // and R = 2^BitWidth.
2913   // Since we're trying not only to find exact solutions, but also values
2914   // that "wrap around", such a set will always have a solution, i.e. an x
2915   // that satisfies at least one of the equations, or such that |q(x)|
2916   // exceeds kR, while |q(x-1)| for the same k does not.
2917   //
2918   // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2919   // positive solution n (in the above sense), and also such that the n
2920   // will be the least among all solutions corresponding to k = 0, 1, ...
2921   // (more precisely, the least element in the set
2922   //   { n(k) | k is such that a solution n(k) exists }).
2923   //
2924   // Consider the parabola (over real numbers) that corresponds to the
2925   // quadratic equation. Since A > 0, the arms of the parabola will point
2926   // up. Picking different values of k will shift it up and down by R.
2927   //
2928   // We want to shift the parabola in such a way as to reduce the problem
2929   // of solving q(x) = kR to solving shifted_q(x) = 0.
2930   // (The interesting solutions are the ceilings of the real number
2931   // solutions.)
2932   APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2933   APInt TwoA = 2 * A;
2934   APInt SqrB = B * B;
2935   bool PickLow;
2936
2937   auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2938     assert(A.isStrictlyPositive());
2939     APInt T = V.abs().urem(A);
2940     if (T.isNullValue())
2941       return V;
2942     return V.isNegative() ? V+T : V+(A-T);
2943   };
2944
2945   // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2946   // iff B is positive.
2947   if (B.isNonNegative()) {
2948     // If B >= 0, the vertex it at a negative location (or at 0), so in
2949     // order to have a non-negative solution we need to pick k that makes
2950     // C-kR negative. To satisfy all the requirements for the solution
2951     // that we are looking for, it needs to be closest to 0 of all k.
2952     C = C.srem(R);
2953     if (C.isStrictlyPositive())
2954       C -= R;
2955     // Pick the greater solution.
2956     PickLow = false;
2957   } else {
2958     // If B < 0, the vertex is at a positive location. For any solution
2959     // to exist, the discriminant must be non-negative. This means that
2960     // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2961     // lower bound on values of k: kR >= C - B^2/4A.
2962     APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2963     // Round LowkR up (towards +inf) to the nearest kR.
2964     LowkR = RoundUp(LowkR, R);
2965
2966     // If there exists k meeting the condition above, and such that
2967     // C-kR > 0, there will be two positive real number solutions of
2968     // q(x) = kR. Out of all such values of k, pick the one that makes
2969     // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2970     // In other words, find maximum k such that LowkR <= kR < C.
2971     if (C.sgt(LowkR)) {
2972       // If LowkR < C, then such a k is guaranteed to exist because
2973       // LowkR itself is a multiple of R.
2974       C -= -RoundUp(-C, R);      // C = C - RoundDown(C, R)
2975       // Pick the smaller solution.
2976       PickLow = true;
2977     } else {
2978       // If C-kR < 0 for all potential k's, it means that one solution
2979       // will be negative, while the other will be positive. The positive
2980       // solution will shift towards 0 if the parabola is moved up.
2981       // Pick the kR closest to the lower bound (i.e. make C-kR closest
2982       // to 0, or in other words, out of all parabolas that have solutions,
2983       // pick the one that is the farthest "up").
2984       // Since LowkR is itself a multiple of R, simply take C-LowkR.
2985       C -= LowkR;
2986       // Pick the greater solution.
2987       PickLow = false;
2988     }
2989   }
2990
2991   LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2992                     << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2993
2994   APInt D = SqrB - 4*A*C;
2995   assert(D.isNonNegative() && "Negative discriminant");
2996   APInt SQ = D.sqrt();
2997
2998   APInt Q = SQ * SQ;
2999   bool InexactSQ = Q != D;
3000   // The calculated SQ may actually be greater than the exact (non-integer)
3001   // value. If that's the case, decrement SQ to get a value that is lower.
3002   if (Q.sgt(D))
3003     SQ -= 1;
3004
3005   APInt X;
3006   APInt Rem;
3007
3008   // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
3009   // When using the quadratic formula directly, the calculated low root
3010   // may be greater than the exact one, since we would be subtracting SQ.
3011   // To make sure that the calculated root is not greater than the exact
3012   // one, subtract SQ+1 when calculating the low root (for inexact value
3013   // of SQ).
3014   if (PickLow)
3015     APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
3016   else
3017     APInt::sdivrem(-B + SQ, TwoA, X, Rem);
3018
3019   // The updated coefficients should be such that the (exact) solution is
3020   // positive. Since APInt division rounds towards 0, the calculated one
3021   // can be 0, but cannot be negative.
3022   assert(X.isNonNegative() && "Solution should be non-negative");
3023
3024   if (!InexactSQ && Rem.isNullValue()) {
3025     LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
3026     return X;
3027   }
3028
3029   assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
3030   // The exact value of the square root of D should be between SQ and SQ+1.
3031   // This implies that the solution should be between that corresponding to
3032   // SQ (i.e. X) and that corresponding to SQ+1.
3033   //
3034   // The calculated X cannot be greater than the exact (real) solution.
3035   // Actually it must be strictly less than the exact solution, while
3036   // X+1 will be greater than or equal to it.
3037
3038   APInt VX = (A*X + B)*X + C;
3039   APInt VY = VX + TwoA*X + A + B;
3040   bool SignChange = VX.isNegative() != VY.isNegative() ||
3041                     VX.isNullValue() != VY.isNullValue();
3042   // If the sign did not change between X and X+1, X is not a valid solution.
3043   // This could happen when the actual (exact) roots don't have an integer
3044   // between them, so they would both be contained between X and X+1.
3045   if (!SignChange) {
3046     LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
3047     return None;
3048   }
3049
3050   X += 1;
3051   LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
3052   return X;
3053 }
3054
3055 Optional<unsigned>
3056 llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) {
3057   assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth");
3058   if (A == B)
3059     return llvm::None;
3060   return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1);
3061 }
3062
3063 /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
3064 /// with the integer held in IntVal.
3065 void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
3066                             unsigned StoreBytes) {
3067   assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
3068   const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
3069
3070   if (sys::IsLittleEndianHost) {
3071     // Little-endian host - the source is ordered from LSB to MSB.  Order the
3072     // destination from LSB to MSB: Do a straight copy.
3073     memcpy(Dst, Src, StoreBytes);
3074   } else {
3075     // Big-endian host - the source is an array of 64 bit words ordered from
3076     // LSW to MSW.  Each word is ordered from MSB to LSB.  Order the destination
3077     // from MSB to LSB: Reverse the word order, but not the bytes in a word.
3078     while (StoreBytes > sizeof(uint64_t)) {
3079       StoreBytes -= sizeof(uint64_t);
3080       // May not be aligned so use memcpy.
3081       memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
3082       Src += sizeof(uint64_t);
3083     }
3084
3085     memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
3086   }
3087 }
3088
3089 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
3090 /// from Src into IntVal, which is assumed to be wide enough and to hold zero.
3091 void llvm::LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
3092   assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
3093   uint8_t *Dst = reinterpret_cast<uint8_t *>(
3094                    const_cast<uint64_t *>(IntVal.getRawData()));
3095
3096   if (sys::IsLittleEndianHost)
3097     // Little-endian host - the destination must be ordered from LSB to MSB.
3098     // The source is ordered from LSB to MSB: Do a straight copy.
3099     memcpy(Dst, Src, LoadBytes);
3100   else {
3101     // Big-endian - the destination is an array of 64 bit words ordered from
3102     // LSW to MSW.  Each word must be ordered from MSB to LSB.  The source is
3103     // ordered from MSB to LSB: Reverse the word order, but not the bytes in
3104     // a word.
3105     while (LoadBytes > sizeof(uint64_t)) {
3106       LoadBytes -= sizeof(uint64_t);
3107       // May not be aligned so use memcpy.
3108       memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
3109       Dst += sizeof(uint64_t);
3110     }
3111
3112     memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
3113   }
3114 }