contrib/llvm-project/llvm/lib/Support/APInt.cpp

   1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file implements a class to represent arbitrary precision integer
  10 // constant values and provide a variety of arithmetic operations on them.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "llvm/ADT/APInt.h"
  15 #include "llvm/ADT/ArrayRef.h"
  16 #include "llvm/ADT/FoldingSet.h"
  17 #include "llvm/ADT/Hashing.h"
  18 #include "llvm/ADT/Optional.h"
  19 #include "llvm/ADT/SmallString.h"
  20 #include "llvm/ADT/StringRef.h"
  21 #include "llvm/ADT/bit.h"
  22 #include "llvm/Config/llvm-config.h"
  23 #include "llvm/Support/Debug.h"
  24 #include "llvm/Support/ErrorHandling.h"
  25 #include "llvm/Support/MathExtras.h"
  26 #include "llvm/Support/raw_ostream.h"
  27 #include <climits>
  28 #include <cmath>
  29 #include <cstdlib>
  30 #include <cstring>
  31 using namespace llvm;
  32
  33 #define DEBUG_TYPE "apint"
  34
  35 /// A utility function for allocating memory, checking for allocation failures,
  36 /// and ensuring the contents are zeroed.
  37 inline static uint64_t* getClearedMemory(unsigned numWords) {
  38   uint64_t *result = new uint64_t[numWords];
  39   memset(result, 0, numWords * sizeof(uint64_t));
  40   return result;
  41 }
  42
  43 /// A utility function for allocating memory and checking for allocation
  44 /// failure.  The content is not zeroed.
  45 inline static uint64_t* getMemory(unsigned numWords) {
  46   return new uint64_t[numWords];
  47 }
  48
  49 /// A utility function that converts a character to a digit.
  50 inline static unsigned getDigit(char cdigit, uint8_t radix) {
  51   unsigned r;
  52
  53   if (radix == 16 || radix == 36) {
  54     r = cdigit - '0';
  55     if (r <= 9)
  56       return r;
  57
  58     r = cdigit - 'A';
  59     if (r <= radix - 11U)
  60       return r + 10;
  61
  62     r = cdigit - 'a';
  63     if (r <= radix - 11U)
  64       return r + 10;
  65
  66     radix = 10;
  67   }
  68
  69   r = cdigit - '0';
  70   if (r < radix)
  71     return r;
  72
  73   return -1U;
  74 }
  75
  76
  77 void APInt::initSlowCase(uint64_t val, bool isSigned) {
  78   U.pVal = getClearedMemory(getNumWords());
  79   U.pVal[0] = val;
  80   if (isSigned && int64_t(val) < 0)
  81     for (unsigned i = 1; i < getNumWords(); ++i)
  82       U.pVal[i] = WORDTYPE_MAX;
  83   clearUnusedBits();
  84 }
  85
  86 void APInt::initSlowCase(const APInt& that) {
  87   U.pVal = getMemory(getNumWords());
  88   memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
  89 }
  90
  91 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
  92   assert(BitWidth && "Bitwidth too small");
  93   assert(bigVal.data() && "Null pointer detected!");
  94   if (isSingleWord())
  95     U.VAL = bigVal[0];
  96   else {
  97     // Get memory, cleared to 0
  98     U.pVal = getClearedMemory(getNumWords());
  99     // Calculate the number of words to copy
 100     unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
 101     // Copy the words from bigVal to pVal
 102     memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
 103   }
 104   // Make sure unused high bits are cleared
 105   clearUnusedBits();
 106 }
 107
 108 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
 109   : BitWidth(numBits) {
 110   initFromArray(bigVal);
 111 }
 112
 113 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
 114   : BitWidth(numBits) {
 115   initFromArray(makeArrayRef(bigVal, numWords));
 116 }
 117
 118 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
 119   : BitWidth(numbits) {
 120   assert(BitWidth && "Bitwidth too small");
 121   fromString(numbits, Str, radix);
 122 }
 123
 124 void APInt::reallocate(unsigned NewBitWidth) {
 125   // If the number of words is the same we can just change the width and stop.
 126   if (getNumWords() == getNumWords(NewBitWidth)) {
 127     BitWidth = NewBitWidth;
 128     return;
 129   }
 130
 131   // If we have an allocation, delete it.
 132   if (!isSingleWord())
 133     delete [] U.pVal;
 134
 135   // Update BitWidth.
 136   BitWidth = NewBitWidth;
 137
 138   // If we are supposed to have an allocation, create it.
 139   if (!isSingleWord())
 140     U.pVal = getMemory(getNumWords());
 141 }
 142
 143 void APInt::AssignSlowCase(const APInt& RHS) {
 144   // Don't do anything for X = X
 145   if (this == &RHS)
 146     return;
 147
 148   // Adjust the bit width and handle allocations as necessary.
 149   reallocate(RHS.getBitWidth());
 150
 151   // Copy the data.
 152   if (isSingleWord())
 153     U.VAL = RHS.U.VAL;
 154   else
 155     memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
 156 }
 157
 158 /// This method 'profiles' an APInt for use with FoldingSet.
 159 void APInt::Profile(FoldingSetNodeID& ID) const {
 160   ID.AddInteger(BitWidth);
 161
 162   if (isSingleWord()) {
 163     ID.AddInteger(U.VAL);
 164     return;
 165   }
 166
 167   unsigned NumWords = getNumWords();
 168   for (unsigned i = 0; i < NumWords; ++i)
 169     ID.AddInteger(U.pVal[i]);
 170 }
 171
 172 /// Prefix increment operator. Increments the APInt by one.
 173 APInt& APInt::operator++() {
 174   if (isSingleWord())
 175     ++U.VAL;
 176   else
 177     tcIncrement(U.pVal, getNumWords());
 178   return clearUnusedBits();
 179 }
 180
 181 /// Prefix decrement operator. Decrements the APInt by one.
 182 APInt& APInt::operator--() {
 183   if (isSingleWord())
 184     --U.VAL;
 185   else
 186     tcDecrement(U.pVal, getNumWords());
 187   return clearUnusedBits();
 188 }
 189
 190 /// Adds the RHS APint to this APInt.
 191 /// @returns this, after addition of RHS.
 192 /// Addition assignment operator.
 193 APInt& APInt::operator+=(const APInt& RHS) {
 194   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 195   if (isSingleWord())
 196     U.VAL += RHS.U.VAL;
 197   else
 198     tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
 199   return clearUnusedBits();
 200 }
 201
 202 APInt& APInt::operator+=(uint64_t RHS) {
 203   if (isSingleWord())
 204     U.VAL += RHS;
 205   else
 206     tcAddPart(U.pVal, RHS, getNumWords());
 207   return clearUnusedBits();
 208 }
 209
 210 /// Subtracts the RHS APInt from this APInt
 211 /// @returns this, after subtraction
 212 /// Subtraction assignment operator.
 213 APInt& APInt::operator-=(const APInt& RHS) {
 214   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 215   if (isSingleWord())
 216     U.VAL -= RHS.U.VAL;
 217   else
 218     tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
 219   return clearUnusedBits();
 220 }
 221
 222 APInt& APInt::operator-=(uint64_t RHS) {
 223   if (isSingleWord())
 224     U.VAL -= RHS;
 225   else
 226     tcSubtractPart(U.pVal, RHS, getNumWords());
 227   return clearUnusedBits();
 228 }
 229
 230 APInt APInt::operator*(const APInt& RHS) const {
 231   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 232   if (isSingleWord())
 233     return APInt(BitWidth, U.VAL * RHS.U.VAL);
 234
 235   APInt Result(getMemory(getNumWords()), getBitWidth());
 236
 237   tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
 238
 239   Result.clearUnusedBits();
 240   return Result;
 241 }
 242
 243 void APInt::AndAssignSlowCase(const APInt& RHS) {
 244   tcAnd(U.pVal, RHS.U.pVal, getNumWords());
 245 }
 246
 247 void APInt::OrAssignSlowCase(const APInt& RHS) {
 248   tcOr(U.pVal, RHS.U.pVal, getNumWords());
 249 }
 250
 251 void APInt::XorAssignSlowCase(const APInt& RHS) {
 252   tcXor(U.pVal, RHS.U.pVal, getNumWords());
 253 }
 254
 255 APInt& APInt::operator*=(const APInt& RHS) {
 256   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 257   *this = *this * RHS;
 258   return *this;
 259 }
 260
 261 APInt& APInt::operator*=(uint64_t RHS) {
 262   if (isSingleWord()) {
 263     U.VAL *= RHS;
 264   } else {
 265     unsigned NumWords = getNumWords();
 266     tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
 267   }
 268   return clearUnusedBits();
 269 }
 270
 271 bool APInt::EqualSlowCase(const APInt& RHS) const {
 272   return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
 273 }
 274
 275 int APInt::compare(const APInt& RHS) const {
 276   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
 277   if (isSingleWord())
 278     return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
 279
 280   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
 281 }
 282
 283 int APInt::compareSigned(const APInt& RHS) const {
 284   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
 285   if (isSingleWord()) {
 286     int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
 287     int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
 288     return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
 289   }
 290
 291   bool lhsNeg = isNegative();
 292   bool rhsNeg = RHS.isNegative();
 293
 294   // If the sign bits don't match, then (LHS < RHS) if LHS is negative
 295   if (lhsNeg != rhsNeg)
 296     return lhsNeg ? -1 : 1;
 297
 298   // Otherwise we can just use an unsigned comparison, because even negative
 299   // numbers compare correctly this way if both have the same signed-ness.
 300   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
 301 }
 302
 303 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
 304   unsigned loWord = whichWord(loBit);
 305   unsigned hiWord = whichWord(hiBit);
 306
 307   // Create an initial mask for the low word with zeros below loBit.
 308   uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
 309
 310   // If hiBit is not aligned, we need a high mask.
 311   unsigned hiShiftAmt = whichBit(hiBit);
 312   if (hiShiftAmt != 0) {
 313     // Create a high mask with zeros above hiBit.
 314     uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
 315     // If loWord and hiWord are equal, then we combine the masks. Otherwise,
 316     // set the bits in hiWord.
 317     if (hiWord == loWord)
 318       loMask &= hiMask;
 319     else
 320       U.pVal[hiWord] |= hiMask;
 321   }
 322   // Apply the mask to the low word.
 323   U.pVal[loWord] |= loMask;
 324
 325   // Fill any words between loWord and hiWord with all ones.
 326   for (unsigned word = loWord + 1; word < hiWord; ++word)
 327     U.pVal[word] = WORDTYPE_MAX;
 328 }
 329
 330 /// Toggle every bit to its opposite value.
 331 void APInt::flipAllBitsSlowCase() {
 332   tcComplement(U.pVal, getNumWords());
 333   clearUnusedBits();
 334 }
 335
 336 /// Toggle a given bit to its opposite value whose position is given
 337 /// as "bitPosition".
 338 /// Toggles a given bit to its opposite value.
 339 void APInt::flipBit(unsigned bitPosition) {
 340   assert(bitPosition < BitWidth && "Out of the bit-width range!");
 341   if ((*this)[bitPosition]) clearBit(bitPosition);
 342   else setBit(bitPosition);
 343 }
 344
 345 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
 346   unsigned subBitWidth = subBits.getBitWidth();
 347   assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
 348          "Illegal bit insertion");
 349
 350   // Insertion is a direct copy.
 351   if (subBitWidth == BitWidth) {
 352     *this = subBits;
 353     return;
 354   }
 355
 356   // Single word result can be done as a direct bitmask.
 357   if (isSingleWord()) {
 358     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
 359     U.VAL &= ~(mask << bitPosition);
 360     U.VAL |= (subBits.U.VAL << bitPosition);
 361     return;
 362   }
 363
 364   unsigned loBit = whichBit(bitPosition);
 365   unsigned loWord = whichWord(bitPosition);
 366   unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
 367
 368   // Insertion within a single word can be done as a direct bitmask.
 369   if (loWord == hi1Word) {
 370     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
 371     U.pVal[loWord] &= ~(mask << loBit);
 372     U.pVal[loWord] |= (subBits.U.VAL << loBit);
 373     return;
 374   }
 375
 376   // Insert on word boundaries.
 377   if (loBit == 0) {
 378     // Direct copy whole words.
 379     unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
 380     memcpy(U.pVal + loWord, subBits.getRawData(),
 381            numWholeSubWords * APINT_WORD_SIZE);
 382
 383     // Mask+insert remaining bits.
 384     unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
 385     if (remainingBits != 0) {
 386       uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
 387       U.pVal[hi1Word] &= ~mask;
 388       U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
 389     }
 390     return;
 391   }
 392
 393   // General case - set/clear individual bits in dst based on src.
 394   // TODO - there is scope for optimization here, but at the moment this code
 395   // path is barely used so prefer readability over performance.
 396   for (unsigned i = 0; i != subBitWidth; ++i) {
 397     if (subBits[i])
 398       setBit(bitPosition + i);
 399     else
 400       clearBit(bitPosition + i);
 401   }
 402 }
 403
 404 void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) {
 405   uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
 406   subBits &= maskBits;
 407   if (isSingleWord()) {
 408     U.VAL &= ~(maskBits << bitPosition);
 409     U.VAL |= subBits << bitPosition;
 410     return;
 411   }
 412
 413   unsigned loBit = whichBit(bitPosition);
 414   unsigned loWord = whichWord(bitPosition);
 415   unsigned hiWord = whichWord(bitPosition + numBits - 1);
 416   if (loWord == hiWord) {
 417     U.pVal[loWord] &= ~(maskBits << loBit);
 418     U.pVal[loWord] |= subBits << loBit;
 419     return;
 420   }
 421
 422   static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
 423   unsigned wordBits = 8 * sizeof(WordType);
 424   U.pVal[loWord] &= ~(maskBits << loBit);
 425   U.pVal[loWord] |= subBits << loBit;
 426
 427   U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit));
 428   U.pVal[hiWord] |= subBits >> (wordBits - loBit);
 429 }
 430
 431 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
 432   assert(numBits > 0 && "Can't extract zero bits");
 433   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
 434          "Illegal bit extraction");
 435
 436   if (isSingleWord())
 437     return APInt(numBits, U.VAL >> bitPosition);
 438
 439   unsigned loBit = whichBit(bitPosition);
 440   unsigned loWord = whichWord(bitPosition);
 441   unsigned hiWord = whichWord(bitPosition + numBits - 1);
 442
 443   // Single word result extracting bits from a single word source.
 444   if (loWord == hiWord)
 445     return APInt(numBits, U.pVal[loWord] >> loBit);
 446
 447   // Extracting bits that start on a source word boundary can be done
 448   // as a fast memory copy.
 449   if (loBit == 0)
 450     return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
 451
 452   // General case - shift + copy source words directly into place.
 453   APInt Result(numBits, 0);
 454   unsigned NumSrcWords = getNumWords();
 455   unsigned NumDstWords = Result.getNumWords();
 456
 457   uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
 458   for (unsigned word = 0; word < NumDstWords; ++word) {
 459     uint64_t w0 = U.pVal[loWord + word];
 460     uint64_t w1 =
 461         (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
 462     DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
 463   }
 464
 465   return Result.clearUnusedBits();
 466 }
 467
 468 uint64_t APInt::extractBitsAsZExtValue(unsigned numBits,
 469                                        unsigned bitPosition) const {
 470   assert(numBits > 0 && "Can't extract zero bits");
 471   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
 472          "Illegal bit extraction");
 473   assert(numBits <= 64 && "Illegal bit extraction");
 474
 475   uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
 476   if (isSingleWord())
 477     return (U.VAL >> bitPosition) & maskBits;
 478
 479   unsigned loBit = whichBit(bitPosition);
 480   unsigned loWord = whichWord(bitPosition);
 481   unsigned hiWord = whichWord(bitPosition + numBits - 1);
 482   if (loWord == hiWord)
 483     return (U.pVal[loWord] >> loBit) & maskBits;
 484
 485   static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
 486   unsigned wordBits = 8 * sizeof(WordType);
 487   uint64_t retBits = U.pVal[loWord] >> loBit;
 488   retBits |= U.pVal[hiWord] << (wordBits - loBit);
 489   retBits &= maskBits;
 490   return retBits;
 491 }
 492
 493 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
 494   assert(!str.empty() && "Invalid string length");
 495   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
 496           radix == 36) &&
 497          "Radix should be 2, 8, 10, 16, or 36!");
 498
 499   size_t slen = str.size();
 500
 501   // Each computation below needs to know if it's negative.
 502   StringRef::iterator p = str.begin();
 503   unsigned isNegative = *p == '-';
 504   if (*p == '-' || *p == '+') {
 505     p++;
 506     slen--;
 507     assert(slen && "String is only a sign, needs a value.");
 508   }
 509
 510   // For radixes of power-of-two values, the bits required is accurately and
 511   // easily computed
 512   if (radix == 2)
 513     return slen + isNegative;
 514   if (radix == 8)
 515     return slen * 3 + isNegative;
 516   if (radix == 16)
 517     return slen * 4 + isNegative;
 518
 519   // FIXME: base 36
 520
 521   // This is grossly inefficient but accurate. We could probably do something
 522   // with a computation of roughly slen*64/20 and then adjust by the value of
 523   // the first few digits. But, I'm not sure how accurate that could be.
 524
 525   // Compute a sufficient number of bits that is always large enough but might
 526   // be too large. This avoids the assertion in the constructor. This
 527   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
 528   // bits in that case.
 529   unsigned sufficient
 530     = radix == 10? (slen == 1 ? 4 : slen * 64/18)
 531                  : (slen == 1 ? 7 : slen * 16/3);
 532
 533   // Convert to the actual binary value.
 534   APInt tmp(sufficient, StringRef(p, slen), radix);
 535
 536   // Compute how many bits are required. If the log is infinite, assume we need
 537   // just bit. If the log is exact and value is negative, then the value is
 538   // MinSignedValue with (log + 1) bits.
 539   unsigned log = tmp.logBase2();
 540   if (log == (unsigned)-1) {
 541     return isNegative + 1;
 542   } else if (isNegative && tmp.isPowerOf2()) {
 543     return isNegative + log;
 544   } else {
 545     return isNegative + log + 1;
 546   }
 547 }
 548
 549 hash_code llvm::hash_value(const APInt &Arg) {
 550   if (Arg.isSingleWord())
 551     return hash_combine(Arg.U.VAL);
 552
 553   return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords());
 554 }
 555
 556 bool APInt::isSplat(unsigned SplatSizeInBits) const {
 557   assert(getBitWidth() % SplatSizeInBits == 0 &&
 558          "SplatSizeInBits must divide width!");
 559   // We can check that all parts of an integer are equal by making use of a
 560   // little trick: rotate and check if it's still the same value.
 561   return *this == rotl(SplatSizeInBits);
 562 }
 563
 564 /// This function returns the high "numBits" bits of this APInt.
 565 APInt APInt::getHiBits(unsigned numBits) const {
 566   return this->lshr(BitWidth - numBits);
 567 }
 568
 569 /// This function returns the low "numBits" bits of this APInt.
 570 APInt APInt::getLoBits(unsigned numBits) const {
 571   APInt Result(getLowBitsSet(BitWidth, numBits));
 572   Result &= *this;
 573   return Result;
 574 }
 575
 576 /// Return a value containing V broadcasted over NewLen bits.
 577 APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
 578   assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
 579
 580   APInt Val = V.zextOrSelf(NewLen);
 581   for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
 582     Val |= Val << I;
 583
 584   return Val;
 585 }
 586
 587 unsigned APInt::countLeadingZerosSlowCase() const {
 588   unsigned Count = 0;
 589   for (int i = getNumWords()-1; i >= 0; --i) {
 590     uint64_t V = U.pVal[i];
 591     if (V == 0)
 592       Count += APINT_BITS_PER_WORD;
 593     else {
 594       Count += llvm::countLeadingZeros(V);
 595       break;
 596     }
 597   }
 598   // Adjust for unused bits in the most significant word (they are zero).
 599   unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
 600   Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
 601   return Count;
 602 }
 603
 604 unsigned APInt::countLeadingOnesSlowCase() const {
 605   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
 606   unsigned shift;
 607   if (!highWordBits) {
 608     highWordBits = APINT_BITS_PER_WORD;
 609     shift = 0;
 610   } else {
 611     shift = APINT_BITS_PER_WORD - highWordBits;
 612   }
 613   int i = getNumWords() - 1;
 614   unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
 615   if (Count == highWordBits) {
 616     for (i--; i >= 0; --i) {
 617       if (U.pVal[i] == WORDTYPE_MAX)
 618         Count += APINT_BITS_PER_WORD;
 619       else {
 620         Count += llvm::countLeadingOnes(U.pVal[i]);
 621         break;
 622       }
 623     }
 624   }
 625   return Count;
 626 }
 627
 628 unsigned APInt::countTrailingZerosSlowCase() const {
 629   unsigned Count = 0;
 630   unsigned i = 0;
 631   for (; i < getNumWords() && U.pVal[i] == 0; ++i)
 632     Count += APINT_BITS_PER_WORD;
 633   if (i < getNumWords())
 634     Count += llvm::countTrailingZeros(U.pVal[i]);
 635   return std::min(Count, BitWidth);
 636 }
 637
 638 unsigned APInt::countTrailingOnesSlowCase() const {
 639   unsigned Count = 0;
 640   unsigned i = 0;
 641   for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
 642     Count += APINT_BITS_PER_WORD;
 643   if (i < getNumWords())
 644     Count += llvm::countTrailingOnes(U.pVal[i]);
 645   assert(Count <= BitWidth);
 646   return Count;
 647 }
 648
 649 unsigned APInt::countPopulationSlowCase() const {
 650   unsigned Count = 0;
 651   for (unsigned i = 0; i < getNumWords(); ++i)
 652     Count += llvm::countPopulation(U.pVal[i]);
 653   return Count;
 654 }
 655
 656 bool APInt::intersectsSlowCase(const APInt &RHS) const {
 657   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
 658     if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
 659       return true;
 660
 661   return false;
 662 }
 663
 664 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
 665   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
 666     if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
 667       return false;
 668
 669   return true;
 670 }
 671
 672 APInt APInt::byteSwap() const {
 673   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
 674   if (BitWidth == 16)
 675     return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
 676   if (BitWidth == 32)
 677     return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
 678   if (BitWidth == 48) {
 679     unsigned Tmp1 = unsigned(U.VAL >> 16);
 680     Tmp1 = ByteSwap_32(Tmp1);
 681     uint16_t Tmp2 = uint16_t(U.VAL);
 682     Tmp2 = ByteSwap_16(Tmp2);
 683     return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
 684   }
 685   if (BitWidth == 64)
 686     return APInt(BitWidth, ByteSwap_64(U.VAL));
 687
 688   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
 689   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
 690     Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
 691   if (Result.BitWidth != BitWidth) {
 692     Result.lshrInPlace(Result.BitWidth - BitWidth);
 693     Result.BitWidth = BitWidth;
 694   }
 695   return Result;
 696 }
 697
 698 APInt APInt::reverseBits() const {
 699   switch (BitWidth) {
 700   case 64:
 701     return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
 702   case 32:
 703     return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
 704   case 16:
 705     return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
 706   case 8:
 707     return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
 708   default:
 709     break;
 710   }
 711
 712   APInt Val(*this);
 713   APInt Reversed(BitWidth, 0);
 714   unsigned S = BitWidth;
 715
 716   for (; Val != 0; Val.lshrInPlace(1)) {
 717     Reversed <<= 1;
 718     Reversed |= Val[0];
 719     --S;
 720   }
 721
 722   Reversed <<= S;
 723   return Reversed;
 724 }
 725
 726 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
 727   // Fast-path a common case.
 728   if (A == B) return A;
 729
 730   // Corner cases: if either operand is zero, the other is the gcd.
 731   if (!A) return B;
 732   if (!B) return A;
 733
 734   // Count common powers of 2 and remove all other powers of 2.
 735   unsigned Pow2;
 736   {
 737     unsigned Pow2_A = A.countTrailingZeros();
 738     unsigned Pow2_B = B.countTrailingZeros();
 739     if (Pow2_A > Pow2_B) {
 740       A.lshrInPlace(Pow2_A - Pow2_B);
 741       Pow2 = Pow2_B;
 742     } else if (Pow2_B > Pow2_A) {
 743       B.lshrInPlace(Pow2_B - Pow2_A);
 744       Pow2 = Pow2_A;
 745     } else {
 746       Pow2 = Pow2_A;
 747     }
 748   }
 749
 750   // Both operands are odd multiples of 2^Pow_2:
 751   //
 752   //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
 753   //
 754   // This is a modified version of Stein's algorithm, taking advantage of
 755   // efficient countTrailingZeros().
 756   while (A != B) {
 757     if (A.ugt(B)) {
 758       A -= B;
 759       A.lshrInPlace(A.countTrailingZeros() - Pow2);
 760     } else {
 761       B -= A;
 762       B.lshrInPlace(B.countTrailingZeros() - Pow2);
 763     }
 764   }
 765
 766   return A;
 767 }
 768
 769 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
 770   uint64_t I = bit_cast<uint64_t>(Double);
 771
 772   // Get the sign bit from the highest order bit
 773   bool isNeg = I >> 63;
 774
 775   // Get the 11-bit exponent and adjust for the 1023 bit bias
 776   int64_t exp = ((I >> 52) & 0x7ff) - 1023;
 777
 778   // If the exponent is negative, the value is < 0 so just return 0.
 779   if (exp < 0)
 780     return APInt(width, 0u);
 781
 782   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
 783   uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
 784
 785   // If the exponent doesn't shift all bits out of the mantissa
 786   if (exp < 52)
 787     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
 788                     APInt(width, mantissa >> (52 - exp));
 789
 790   // If the client didn't provide enough bits for us to shift the mantissa into
 791   // then the result is undefined, just return 0
 792   if (width <= exp - 52)
 793     return APInt(width, 0);
 794
 795   // Otherwise, we have to shift the mantissa bits up to the right location
 796   APInt Tmp(width, mantissa);
 797   Tmp <<= (unsigned)exp - 52;
 798   return isNeg ? -Tmp : Tmp;
 799 }
 800
 801 /// This function converts this APInt to a double.
 802 /// The layout for double is as following (IEEE Standard 754):
 803 ///  --------------------------------------
 804 /// |  Sign    Exponent    Fraction    Bias |
 805 /// |-------------------------------------- |
 806 /// |  1[63]   11[62-52]   52[51-00]   1023 |
 807 ///  --------------------------------------
 808 double APInt::roundToDouble(bool isSigned) const {
 809
 810   // Handle the simple case where the value is contained in one uint64_t.
 811   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
 812   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
 813     if (isSigned) {
 814       int64_t sext = SignExtend64(getWord(0), BitWidth);
 815       return double(sext);
 816     } else
 817       return double(getWord(0));
 818   }
 819
 820   // Determine if the value is negative.
 821   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
 822
 823   // Construct the absolute value if we're negative.
 824   APInt Tmp(isNeg ? -(*this) : (*this));
 825
 826   // Figure out how many bits we're using.
 827   unsigned n = Tmp.getActiveBits();
 828
 829   // The exponent (without bias normalization) is just the number of bits
 830   // we are using. Note that the sign bit is gone since we constructed the
 831   // absolute value.
 832   uint64_t exp = n;
 833
 834   // Return infinity for exponent overflow
 835   if (exp > 1023) {
 836     if (!isSigned || !isNeg)
 837       return std::numeric_limits<double>::infinity();
 838     else
 839       return -std::numeric_limits<double>::infinity();
 840   }
 841   exp += 1023; // Increment for 1023 bias
 842
 843   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
 844   // extract the high 52 bits from the correct words in pVal.
 845   uint64_t mantissa;
 846   unsigned hiWord = whichWord(n-1);
 847   if (hiWord == 0) {
 848     mantissa = Tmp.U.pVal[0];
 849     if (n > 52)
 850       mantissa >>= n - 52; // shift down, we want the top 52 bits.
 851   } else {
 852     assert(hiWord > 0 && "huh?");
 853     uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
 854     uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
 855     mantissa = hibits | lobits;
 856   }
 857
 858   // The leading bit of mantissa is implicit, so get rid of it.
 859   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
 860   uint64_t I = sign | (exp << 52) | mantissa;
 861   return bit_cast<double>(I);
 862 }
 863
 864 // Truncate to new width.
 865 APInt APInt::trunc(unsigned width) const {
 866   assert(width < BitWidth && "Invalid APInt Truncate request");
 867   assert(width && "Can't truncate to 0 bits");
 868
 869   if (width <= APINT_BITS_PER_WORD)
 870     return APInt(width, getRawData()[0]);
 871
 872   APInt Result(getMemory(getNumWords(width)), width);
 873
 874   // Copy full words.
 875   unsigned i;
 876   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
 877     Result.U.pVal[i] = U.pVal[i];
 878
 879   // Truncate and copy any partial word.
 880   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
 881   if (bits != 0)
 882     Result.U.pVal[i] = U.pVal[i] << bits >> bits;
 883
 884   return Result;
 885 }
 886
 887 // Sign extend to a new width.
 888 APInt APInt::sext(unsigned Width) const {
 889   assert(Width > BitWidth && "Invalid APInt SignExtend request");
 890
 891   if (Width <= APINT_BITS_PER_WORD)
 892     return APInt(Width, SignExtend64(U.VAL, BitWidth));
 893
 894   APInt Result(getMemory(getNumWords(Width)), Width);
 895
 896   // Copy words.
 897   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
 898
 899   // Sign extend the last word since there may be unused bits in the input.
 900   Result.U.pVal[getNumWords() - 1] =
 901       SignExtend64(Result.U.pVal[getNumWords() - 1],
 902                    ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
 903
 904   // Fill with sign bits.
 905   std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
 906               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
 907   Result.clearUnusedBits();
 908   return Result;
 909 }
 910
 911 //  Zero extend to a new width.
 912 APInt APInt::zext(unsigned width) const {
 913   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
 914
 915   if (width <= APINT_BITS_PER_WORD)
 916     return APInt(width, U.VAL);
 917
 918   APInt Result(getMemory(getNumWords(width)), width);
 919
 920   // Copy words.
 921   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
 922
 923   // Zero remaining words.
 924   std::memset(Result.U.pVal + getNumWords(), 0,
 925               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
 926
 927   return Result;
 928 }
 929
 930 APInt APInt::zextOrTrunc(unsigned width) const {
 931   if (BitWidth < width)
 932     return zext(width);
 933   if (BitWidth > width)
 934     return trunc(width);
 935   return *this;
 936 }
 937
 938 APInt APInt::sextOrTrunc(unsigned width) const {
 939   if (BitWidth < width)
 940     return sext(width);
 941   if (BitWidth > width)
 942     return trunc(width);
 943   return *this;
 944 }
 945
 946 APInt APInt::zextOrSelf(unsigned width) const {
 947   if (BitWidth < width)
 948     return zext(width);
 949   return *this;
 950 }
 951
 952 APInt APInt::sextOrSelf(unsigned width) const {
 953   if (BitWidth < width)
 954     return sext(width);
 955   return *this;
 956 }
 957
 958 /// Arithmetic right-shift this APInt by shiftAmt.
 959 /// Arithmetic right-shift function.
 960 void APInt::ashrInPlace(const APInt &shiftAmt) {
 961   ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
 962 }
 963
 964 /// Arithmetic right-shift this APInt by shiftAmt.
 965 /// Arithmetic right-shift function.
 966 void APInt::ashrSlowCase(unsigned ShiftAmt) {
 967   // Don't bother performing a no-op shift.
 968   if (!ShiftAmt)
 969     return;
 970
 971   // Save the original sign bit for later.
 972   bool Negative = isNegative();
 973
 974   // WordShift is the inter-part shift; BitShift is intra-part shift.
 975   unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
 976   unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
 977
 978   unsigned WordsToMove = getNumWords() - WordShift;
 979   if (WordsToMove != 0) {
 980     // Sign extend the last word to fill in the unused bits.
 981     U.pVal[getNumWords() - 1] = SignExtend64(
 982         U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
 983
 984     // Fastpath for moving by whole words.
 985     if (BitShift == 0) {
 986       std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
 987     } else {
 988       // Move the words containing significant bits.
 989       for (unsigned i = 0; i != WordsToMove - 1; ++i)
 990         U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
 991                     (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
 992
 993       // Handle the last word which has no high bits to copy.
 994       U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
 995       // Sign extend one more time.
 996       U.pVal[WordsToMove - 1] =
 997           SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
 998     }
 999   }
1000
1001   // Fill in the remainder based on the original sign.
1002   std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
1003               WordShift * APINT_WORD_SIZE);
1004   clearUnusedBits();
1005 }
1006
1007 /// Logical right-shift this APInt by shiftAmt.
1008 /// Logical right-shift function.
1009 void APInt::lshrInPlace(const APInt &shiftAmt) {
1010   lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
1011 }
1012
1013 /// Logical right-shift this APInt by shiftAmt.
1014 /// Logical right-shift function.
1015 void APInt::lshrSlowCase(unsigned ShiftAmt) {
1016   tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
1017 }
1018
1019 /// Left-shift this APInt by shiftAmt.
1020 /// Left-shift function.
1021 APInt &APInt::operator<<=(const APInt &shiftAmt) {
1022   // It's undefined behavior in C to shift by BitWidth or greater.
1023   *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
1024   return *this;
1025 }
1026
1027 void APInt::shlSlowCase(unsigned ShiftAmt) {
1028   tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
1029   clearUnusedBits();
1030 }
1031
1032 // Calculate the rotate amount modulo the bit width.
1033 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
1034   unsigned rotBitWidth = rotateAmt.getBitWidth();
1035   APInt rot = rotateAmt;
1036   if (rotBitWidth < BitWidth) {
1037     // Extend the rotate APInt, so that the urem doesn't divide by 0.
1038     // e.g. APInt(1, 32) would give APInt(1, 0).
1039     rot = rotateAmt.zext(BitWidth);
1040   }
1041   rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
1042   return rot.getLimitedValue(BitWidth);
1043 }
1044
1045 APInt APInt::rotl(const APInt &rotateAmt) const {
1046   return rotl(rotateModulo(BitWidth, rotateAmt));
1047 }
1048
1049 APInt APInt::rotl(unsigned rotateAmt) const {
1050   rotateAmt %= BitWidth;
1051   if (rotateAmt == 0)
1052     return *this;
1053   return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1054 }
1055
1056 APInt APInt::rotr(const APInt &rotateAmt) const {
1057   return rotr(rotateModulo(BitWidth, rotateAmt));
1058 }
1059
1060 APInt APInt::rotr(unsigned rotateAmt) const {
1061   rotateAmt %= BitWidth;
1062   if (rotateAmt == 0)
1063     return *this;
1064   return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1065 }
1066
1067 // Square Root - this method computes and returns the square root of "this".
1068 // Three mechanisms are used for computation. For small values (<= 5 bits),
1069 // a table lookup is done. This gets some performance for common cases. For
1070 // values using less than 52 bits, the value is converted to double and then
1071 // the libc sqrt function is called. The result is rounded and then converted
1072 // back to a uint64_t which is then used to construct the result. Finally,
1073 // the Babylonian method for computing square roots is used.
1074 APInt APInt::sqrt() const {
1075
1076   // Determine the magnitude of the value.
1077   unsigned magnitude = getActiveBits();
1078
1079   // Use a fast table for some small values. This also gets rid of some
1080   // rounding errors in libc sqrt for small values.
1081   if (magnitude <= 5) {
1082     static const uint8_t results[32] = {
1083       /*     0 */ 0,
1084       /*  1- 2 */ 1, 1,
1085       /*  3- 6 */ 2, 2, 2, 2,
1086       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1087       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1088       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1089       /*    31 */ 6
1090     };
1091     return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1092   }
1093
1094   // If the magnitude of the value fits in less than 52 bits (the precision of
1095   // an IEEE double precision floating point value), then we can use the
1096   // libc sqrt function which will probably use a hardware sqrt computation.
1097   // This should be faster than the algorithm below.
1098   if (magnitude < 52) {
1099     return APInt(BitWidth,
1100                  uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1101                                                                : U.pVal[0])))));
1102   }
1103
1104   // Okay, all the short cuts are exhausted. We must compute it. The following
1105   // is a classical Babylonian method for computing the square root. This code
1106   // was adapted to APInt from a wikipedia article on such computations.
1107   // See http://www.wikipedia.org/ and go to the page named
1108   // Calculate_an_integer_square_root.
1109   unsigned nbits = BitWidth, i = 4;
1110   APInt testy(BitWidth, 16);
1111   APInt x_old(BitWidth, 1);
1112   APInt x_new(BitWidth, 0);
1113   APInt two(BitWidth, 2);
1114
1115   // Select a good starting value using binary logarithms.
1116   for (;; i += 2, testy = testy.shl(2))
1117     if (i >= nbits || this->ule(testy)) {
1118       x_old = x_old.shl(i / 2);
1119       break;
1120     }
1121
1122   // Use the Babylonian method to arrive at the integer square root:
1123   for (;;) {
1124     x_new = (this->udiv(x_old) + x_old).udiv(two);
1125     if (x_old.ule(x_new))
1126       break;
1127     x_old = x_new;
1128   }
1129
1130   // Make sure we return the closest approximation
1131   // NOTE: The rounding calculation below is correct. It will produce an
1132   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1133   // determined to be a rounding issue with pari/gp as it begins to use a
1134   // floating point representation after 192 bits. There are no discrepancies
1135   // between this algorithm and pari/gp for bit widths < 192 bits.
1136   APInt square(x_old * x_old);
1137   APInt nextSquare((x_old + 1) * (x_old +1));
1138   if (this->ult(square))
1139     return x_old;
1140   assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1141   APInt midpoint((nextSquare - square).udiv(two));
1142   APInt offset(*this - square);
1143   if (offset.ult(midpoint))
1144     return x_old;
1145   return x_old + 1;
1146 }
1147
1148 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1149 /// iterative extended Euclidean algorithm is used to solve for this value,
1150 /// however we simplify it to speed up calculating only the inverse, and take
1151 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1152 /// (potentially large) APInts around.
1153 /// WARNING: a value of '0' may be returned,
1154 ///          signifying that no multiplicative inverse exists!
1155 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1156   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1157
1158   // Using the properties listed at the following web page (accessed 06/21/08):
1159   //   http://www.numbertheory.org/php/euclid.html
1160   // (especially the properties numbered 3, 4 and 9) it can be proved that
1161   // BitWidth bits suffice for all the computations in the algorithm implemented
1162   // below. More precisely, this number of bits suffice if the multiplicative
1163   // inverse exists, but may not suffice for the general extended Euclidean
1164   // algorithm.
1165
1166   APInt r[2] = { modulo, *this };
1167   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1168   APInt q(BitWidth, 0);
1169
1170   unsigned i;
1171   for (i = 0; r[i^1] != 0; i ^= 1) {
1172     // An overview of the math without the confusing bit-flipping:
1173     // q = r[i-2] / r[i-1]
1174     // r[i] = r[i-2] % r[i-1]
1175     // t[i] = t[i-2] - t[i-1] * q
1176     udivrem(r[i], r[i^1], q, r[i]);
1177     t[i] -= t[i^1] * q;
1178   }
1179
1180   // If this APInt and the modulo are not coprime, there is no multiplicative
1181   // inverse, so return 0. We check this by looking at the next-to-last
1182   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1183   // algorithm.
1184   if (r[i] != 1)
1185     return APInt(BitWidth, 0);
1186
1187   // The next-to-last t is the multiplicative inverse.  However, we are
1188   // interested in a positive inverse. Calculate a positive one from a negative
1189   // one if necessary. A simple addition of the modulo suffices because
1190   // abs(t[i]) is known to be less than *this/2 (see the link above).
1191   if (t[i].isNegative())
1192     t[i] += modulo;
1193
1194   return std::move(t[i]);
1195 }
1196
1197 /// Calculate the magic numbers required to implement a signed integer division
1198 /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1199 /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1200 /// Warren, Jr., chapter 10.
1201 APInt::ms APInt::magic() const {
1202   const APInt& d = *this;
1203   unsigned p;
1204   APInt ad, anc, delta, q1, r1, q2, r2, t;
1205   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1206   struct ms mag;
1207
1208   ad = d.abs();
1209   t = signedMin + (d.lshr(d.getBitWidth() - 1));
1210   anc = t - 1 - t.urem(ad);   // absolute value of nc
1211   p = d.getBitWidth() - 1;    // initialize p
1212   q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1213   r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1214   q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1215   r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1216   do {
1217     p = p + 1;
1218     q1 = q1<<1;          // update q1 = 2p/abs(nc)
1219     r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1220     if (r1.uge(anc)) {  // must be unsigned comparison
1221       q1 = q1 + 1;
1222       r1 = r1 - anc;
1223     }
1224     q2 = q2<<1;          // update q2 = 2p/abs(d)
1225     r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1226     if (r2.uge(ad)) {   // must be unsigned comparison
1227       q2 = q2 + 1;
1228       r2 = r2 - ad;
1229     }
1230     delta = ad - r2;
1231   } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1232
1233   mag.m = q2 + 1;
1234   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1235   mag.s = p - d.getBitWidth();          // resulting shift
1236   return mag;
1237 }
1238
1239 /// Calculate the magic numbers required to implement an unsigned integer
1240 /// division by a constant as a sequence of multiplies, adds and shifts.
1241 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1242 /// S. Warren, Jr., chapter 10.
1243 /// LeadingZeros can be used to simplify the calculation if the upper bits
1244 /// of the divided value are known zero.
1245 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1246   const APInt& d = *this;
1247   unsigned p;
1248   APInt nc, delta, q1, r1, q2, r2;
1249   struct mu magu;
1250   magu.a = 0;               // initialize "add" indicator
1251   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1252   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1253   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1254
1255   nc = allOnes - (allOnes - d).urem(d);
1256   p = d.getBitWidth() - 1;  // initialize p
1257   q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1258   r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1259   q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1260   r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1261   do {
1262     p = p + 1;
1263     if (r1.uge(nc - r1)) {
1264       q1 = q1 + q1 + 1;  // update q1
1265       r1 = r1 + r1 - nc; // update r1
1266     }
1267     else {
1268       q1 = q1+q1; // update q1
1269       r1 = r1+r1; // update r1
1270     }
1271     if ((r2 + 1).uge(d - r2)) {
1272       if (q2.uge(signedMax)) magu.a = 1;
1273       q2 = q2+q2 + 1;     // update q2
1274       r2 = r2+r2 + 1 - d; // update r2
1275     }
1276     else {
1277       if (q2.uge(signedMin)) magu.a = 1;
1278       q2 = q2+q2;     // update q2
1279       r2 = r2+r2 + 1; // update r2
1280     }
1281     delta = d - 1 - r2;
1282   } while (p < d.getBitWidth()*2 &&
1283            (q1.ult(delta) || (q1 == delta && r1 == 0)));
1284   magu.m = q2 + 1; // resulting magic number
1285   magu.s = p - d.getBitWidth();  // resulting shift
1286   return magu;
1287 }
1288
1289 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1290 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1291 /// variables here have the same names as in the algorithm. Comments explain
1292 /// the algorithm and any deviation from it.
1293 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1294                      unsigned m, unsigned n) {
1295   assert(u && "Must provide dividend");
1296   assert(v && "Must provide divisor");
1297   assert(q && "Must provide quotient");
1298   assert(u != v && u != q && v != q && "Must use different memory");
1299   assert(n>1 && "n must be > 1");
1300
1301   // b denotes the base of the number system. In our case b is 2^32.
1302   const uint64_t b = uint64_t(1) << 32;
1303
1304 // The DEBUG macros here tend to be spam in the debug output if you're not
1305 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1306 #ifdef KNUTH_DEBUG
1307 #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1308 #else
1309 #define DEBUG_KNUTH(X) do {} while(false)
1310 #endif
1311
1312   DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1313   DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1314   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1315   DEBUG_KNUTH(dbgs() << " by");
1316   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1317   DEBUG_KNUTH(dbgs() << '\n');
1318   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1319   // u and v by d. Note that we have taken Knuth's advice here to use a power
1320   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1321   // 2 allows us to shift instead of multiply and it is easy to determine the
1322   // shift amount from the leading zeros.  We are basically normalizing the u
1323   // and v so that its high bits are shifted to the top of v's range without
1324   // overflow. Note that this can require an extra word in u so that u must
1325   // be of length m+n+1.
1326   unsigned shift = countLeadingZeros(v[n-1]);
1327   uint32_t v_carry = 0;
1328   uint32_t u_carry = 0;
1329   if (shift) {
1330     for (unsigned i = 0; i < m+n; ++i) {
1331       uint32_t u_tmp = u[i] >> (32 - shift);
1332       u[i] = (u[i] << shift) | u_carry;
1333       u_carry = u_tmp;
1334     }
1335     for (unsigned i = 0; i < n; ++i) {
1336       uint32_t v_tmp = v[i] >> (32 - shift);
1337       v[i] = (v[i] << shift) | v_carry;
1338       v_carry = v_tmp;
1339     }
1340   }
1341   u[m+n] = u_carry;
1342
1343   DEBUG_KNUTH(dbgs() << "KnuthDiv:   normal:");
1344   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1345   DEBUG_KNUTH(dbgs() << " by");
1346   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1347   DEBUG_KNUTH(dbgs() << '\n');
1348
1349   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1350   int j = m;
1351   do {
1352     DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1353     // D3. [Calculate q'.].
1354     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1355     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1356     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1357     // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1358     // on v[n-2] determines at high speed most of the cases in which the trial
1359     // value qp is one too large, and it eliminates all cases where qp is two
1360     // too large.
1361     uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1362     DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1363     uint64_t qp = dividend / v[n-1];
1364     uint64_t rp = dividend % v[n-1];
1365     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1366       qp--;
1367       rp += v[n-1];
1368       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1369         qp--;
1370     }
1371     DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1372
1373     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1374     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1375     // consists of a simple multiplication by a one-place number, combined with
1376     // a subtraction.
1377     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1378     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1379     // true value plus b**(n+1), namely as the b's complement of
1380     // the true value, and a "borrow" to the left should be remembered.
1381     int64_t borrow = 0;
1382     for (unsigned i = 0; i < n; ++i) {
1383       uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1384       int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1385       u[j+i] = Lo_32(subres);
1386       borrow = Hi_32(p) - Hi_32(subres);
1387       DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1388                         << ", borrow = " << borrow << '\n');
1389     }
1390     bool isNeg = u[j+n] < borrow;
1391     u[j+n] -= Lo_32(borrow);
1392
1393     DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1394     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1395     DEBUG_KNUTH(dbgs() << '\n');
1396
1397     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1398     // negative, go to step D6; otherwise go on to step D7.
1399     q[j] = Lo_32(qp);
1400     if (isNeg) {
1401       // D6. [Add back]. The probability that this step is necessary is very
1402       // small, on the order of only 2/b. Make sure that test data accounts for
1403       // this possibility. Decrease q[j] by 1
1404       q[j]--;
1405       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1406       // A carry will occur to the left of u[j+n], and it should be ignored
1407       // since it cancels with the borrow that occurred in D4.
1408       bool carry = false;
1409       for (unsigned i = 0; i < n; i++) {
1410         uint32_t limit = std::min(u[j+i],v[i]);
1411         u[j+i] += v[i] + carry;
1412         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1413       }
1414       u[j+n] += carry;
1415     }
1416     DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1417     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1418     DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1419
1420     // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1421   } while (--j >= 0);
1422
1423   DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1424   DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1425   DEBUG_KNUTH(dbgs() << '\n');
1426
1427   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1428   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1429   // compute the remainder (urem uses this).
1430   if (r) {
1431     // The value d is expressed by the "shift" value above since we avoided
1432     // multiplication by d by using a shift left. So, all we have to do is
1433     // shift right here.
1434     if (shift) {
1435       uint32_t carry = 0;
1436       DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1437       for (int i = n-1; i >= 0; i--) {
1438         r[i] = (u[i] >> shift) | carry;
1439         carry = u[i] << (32 - shift);
1440         DEBUG_KNUTH(dbgs() << " " << r[i]);
1441       }
1442     } else {
1443       for (int i = n-1; i >= 0; i--) {
1444         r[i] = u[i];
1445         DEBUG_KNUTH(dbgs() << " " << r[i]);
1446       }
1447     }
1448     DEBUG_KNUTH(dbgs() << '\n');
1449   }
1450   DEBUG_KNUTH(dbgs() << '\n');
1451 }
1452
1453 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1454                    unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1455   assert(lhsWords >= rhsWords && "Fractional result");
1456
1457   // First, compose the values into an array of 32-bit words instead of
1458   // 64-bit words. This is a necessity of both the "short division" algorithm
1459   // and the Knuth "classical algorithm" which requires there to be native
1460   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1461   // can't use 64-bit operands here because we don't have native results of
1462   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1463   // work on large-endian machines.
1464   unsigned n = rhsWords * 2;
1465   unsigned m = (lhsWords * 2) - n;
1466
1467   // Allocate space for the temporary values we need either on the stack, if
1468   // it will fit, or on the heap if it won't.
1469   uint32_t SPACE[128];
1470   uint32_t *U = nullptr;
1471   uint32_t *V = nullptr;
1472   uint32_t *Q = nullptr;
1473   uint32_t *R = nullptr;
1474   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1475     U = &SPACE[0];
1476     V = &SPACE[m+n+1];
1477     Q = &SPACE[(m+n+1) + n];
1478     if (Remainder)
1479       R = &SPACE[(m+n+1) + n + (m+n)];
1480   } else {
1481     U = new uint32_t[m + n + 1];
1482     V = new uint32_t[n];
1483     Q = new uint32_t[m+n];
1484     if (Remainder)
1485       R = new uint32_t[n];
1486   }
1487
1488   // Initialize the dividend
1489   memset(U, 0, (m+n+1)*sizeof(uint32_t));
1490   for (unsigned i = 0; i < lhsWords; ++i) {
1491     uint64_t tmp = LHS[i];
1492     U[i * 2] = Lo_32(tmp);
1493     U[i * 2 + 1] = Hi_32(tmp);
1494   }
1495   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1496
1497   // Initialize the divisor
1498   memset(V, 0, (n)*sizeof(uint32_t));
1499   for (unsigned i = 0; i < rhsWords; ++i) {
1500     uint64_t tmp = RHS[i];
1501     V[i * 2] = Lo_32(tmp);
1502     V[i * 2 + 1] = Hi_32(tmp);
1503   }
1504
1505   // initialize the quotient and remainder
1506   memset(Q, 0, (m+n) * sizeof(uint32_t));
1507   if (Remainder)
1508     memset(R, 0, n * sizeof(uint32_t));
1509
1510   // Now, adjust m and n for the Knuth division. n is the number of words in
1511   // the divisor. m is the number of words by which the dividend exceeds the
1512   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1513   // contain any zero words or the Knuth algorithm fails.
1514   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1515     n--;
1516     m++;
1517   }
1518   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1519     m--;
1520
1521   // If we're left with only a single word for the divisor, Knuth doesn't work
1522   // so we implement the short division algorithm here. This is much simpler
1523   // and faster because we are certain that we can divide a 64-bit quantity
1524   // by a 32-bit quantity at hardware speed and short division is simply a
1525   // series of such operations. This is just like doing short division but we
1526   // are using base 2^32 instead of base 10.
1527   assert(n != 0 && "Divide by zero?");
1528   if (n == 1) {
1529     uint32_t divisor = V[0];
1530     uint32_t remainder = 0;
1531     for (int i = m; i >= 0; i--) {
1532       uint64_t partial_dividend = Make_64(remainder, U[i]);
1533       if (partial_dividend == 0) {
1534         Q[i] = 0;
1535         remainder = 0;
1536       } else if (partial_dividend < divisor) {
1537         Q[i] = 0;
1538         remainder = Lo_32(partial_dividend);
1539       } else if (partial_dividend == divisor) {
1540         Q[i] = 1;
1541         remainder = 0;
1542       } else {
1543         Q[i] = Lo_32(partial_dividend / divisor);
1544         remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1545       }
1546     }
1547     if (R)
1548       R[0] = remainder;
1549   } else {
1550     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1551     // case n > 1.
1552     KnuthDiv(U, V, Q, R, m, n);
1553   }
1554
1555   // If the caller wants the quotient
1556   if (Quotient) {
1557     for (unsigned i = 0; i < lhsWords; ++i)
1558       Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1559   }
1560
1561   // If the caller wants the remainder
1562   if (Remainder) {
1563     for (unsigned i = 0; i < rhsWords; ++i)
1564       Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1565   }
1566
1567   // Clean up the memory we allocated.
1568   if (U != &SPACE[0]) {
1569     delete [] U;
1570     delete [] V;
1571     delete [] Q;
1572     delete [] R;
1573   }
1574 }
1575
1576 APInt APInt::udiv(const APInt &RHS) const {
1577   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1578
1579   // First, deal with the easy case
1580   if (isSingleWord()) {
1581     assert(RHS.U.VAL != 0 && "Divide by zero?");
1582     return APInt(BitWidth, U.VAL / RHS.U.VAL);
1583   }
1584
1585   // Get some facts about the LHS and RHS number of bits and words
1586   unsigned lhsWords = getNumWords(getActiveBits());
1587   unsigned rhsBits  = RHS.getActiveBits();
1588   unsigned rhsWords = getNumWords(rhsBits);
1589   assert(rhsWords && "Divided by zero???");
1590
1591   // Deal with some degenerate cases
1592   if (!lhsWords)
1593     // 0 / X ===> 0
1594     return APInt(BitWidth, 0);
1595   if (rhsBits == 1)
1596     // X / 1 ===> X
1597     return *this;
1598   if (lhsWords < rhsWords || this->ult(RHS))
1599     // X / Y ===> 0, iff X < Y
1600     return APInt(BitWidth, 0);
1601   if (*this == RHS)
1602     // X / X ===> 1
1603     return APInt(BitWidth, 1);
1604   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1605     // All high words are zero, just use native divide
1606     return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1607
1608   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1609   APInt Quotient(BitWidth, 0); // to hold result.
1610   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1611   return Quotient;
1612 }
1613
1614 APInt APInt::udiv(uint64_t RHS) const {
1615   assert(RHS != 0 && "Divide by zero?");
1616
1617   // First, deal with the easy case
1618   if (isSingleWord())
1619     return APInt(BitWidth, U.VAL / RHS);
1620
1621   // Get some facts about the LHS words.
1622   unsigned lhsWords = getNumWords(getActiveBits());
1623
1624   // Deal with some degenerate cases
1625   if (!lhsWords)
1626     // 0 / X ===> 0
1627     return APInt(BitWidth, 0);
1628   if (RHS == 1)
1629     // X / 1 ===> X
1630     return *this;
1631   if (this->ult(RHS))
1632     // X / Y ===> 0, iff X < Y
1633     return APInt(BitWidth, 0);
1634   if (*this == RHS)
1635     // X / X ===> 1
1636     return APInt(BitWidth, 1);
1637   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1638     // All high words are zero, just use native divide
1639     return APInt(BitWidth, this->U.pVal[0] / RHS);
1640
1641   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1642   APInt Quotient(BitWidth, 0); // to hold result.
1643   divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1644   return Quotient;
1645 }
1646
1647 APInt APInt::sdiv(const APInt &RHS) const {
1648   if (isNegative()) {
1649     if (RHS.isNegative())
1650       return (-(*this)).udiv(-RHS);
1651     return -((-(*this)).udiv(RHS));
1652   }
1653   if (RHS.isNegative())
1654     return -(this->udiv(-RHS));
1655   return this->udiv(RHS);
1656 }
1657
1658 APInt APInt::sdiv(int64_t RHS) const {
1659   if (isNegative()) {
1660     if (RHS < 0)
1661       return (-(*this)).udiv(-RHS);
1662     return -((-(*this)).udiv(RHS));
1663   }
1664   if (RHS < 0)
1665     return -(this->udiv(-RHS));
1666   return this->udiv(RHS);
1667 }
1668
1669 APInt APInt::urem(const APInt &RHS) const {
1670   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1671   if (isSingleWord()) {
1672     assert(RHS.U.VAL != 0 && "Remainder by zero?");
1673     return APInt(BitWidth, U.VAL % RHS.U.VAL);
1674   }
1675
1676   // Get some facts about the LHS
1677   unsigned lhsWords = getNumWords(getActiveBits());
1678
1679   // Get some facts about the RHS
1680   unsigned rhsBits = RHS.getActiveBits();
1681   unsigned rhsWords = getNumWords(rhsBits);
1682   assert(rhsWords && "Performing remainder operation by zero ???");
1683
1684   // Check the degenerate cases
1685   if (lhsWords == 0)
1686     // 0 % Y ===> 0
1687     return APInt(BitWidth, 0);
1688   if (rhsBits == 1)
1689     // X % 1 ===> 0
1690     return APInt(BitWidth, 0);
1691   if (lhsWords < rhsWords || this->ult(RHS))
1692     // X % Y ===> X, iff X < Y
1693     return *this;
1694   if (*this == RHS)
1695     // X % X == 0;
1696     return APInt(BitWidth, 0);
1697   if (lhsWords == 1)
1698     // All high words are zero, just use native remainder
1699     return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1700
1701   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1702   APInt Remainder(BitWidth, 0);
1703   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1704   return Remainder;
1705 }
1706
1707 uint64_t APInt::urem(uint64_t RHS) const {
1708   assert(RHS != 0 && "Remainder by zero?");
1709
1710   if (isSingleWord())
1711     return U.VAL % RHS;
1712
1713   // Get some facts about the LHS
1714   unsigned lhsWords = getNumWords(getActiveBits());
1715
1716   // Check the degenerate cases
1717   if (lhsWords == 0)
1718     // 0 % Y ===> 0
1719     return 0;
1720   if (RHS == 1)
1721     // X % 1 ===> 0
1722     return 0;
1723   if (this->ult(RHS))
1724     // X % Y ===> X, iff X < Y
1725     return getZExtValue();
1726   if (*this == RHS)
1727     // X % X == 0;
1728     return 0;
1729   if (lhsWords == 1)
1730     // All high words are zero, just use native remainder
1731     return U.pVal[0] % RHS;
1732
1733   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1734   uint64_t Remainder;
1735   divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1736   return Remainder;
1737 }
1738
1739 APInt APInt::srem(const APInt &RHS) const {
1740   if (isNegative()) {
1741     if (RHS.isNegative())
1742       return -((-(*this)).urem(-RHS));
1743     return -((-(*this)).urem(RHS));
1744   }
1745   if (RHS.isNegative())
1746     return this->urem(-RHS);
1747   return this->urem(RHS);
1748 }
1749
1750 int64_t APInt::srem(int64_t RHS) const {
1751   if (isNegative()) {
1752     if (RHS < 0)
1753       return -((-(*this)).urem(-RHS));
1754     return -((-(*this)).urem(RHS));
1755   }
1756   if (RHS < 0)
1757     return this->urem(-RHS);
1758   return this->urem(RHS);
1759 }
1760
1761 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1762                     APInt &Quotient, APInt &Remainder) {
1763   assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1764   unsigned BitWidth = LHS.BitWidth;
1765
1766   // First, deal with the easy case
1767   if (LHS.isSingleWord()) {
1768     assert(RHS.U.VAL != 0 && "Divide by zero?");
1769     uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1770     uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1771     Quotient = APInt(BitWidth, QuotVal);
1772     Remainder = APInt(BitWidth, RemVal);
1773     return;
1774   }
1775
1776   // Get some size facts about the dividend and divisor
1777   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1778   unsigned rhsBits  = RHS.getActiveBits();
1779   unsigned rhsWords = getNumWords(rhsBits);
1780   assert(rhsWords && "Performing divrem operation by zero ???");
1781
1782   // Check the degenerate cases
1783   if (lhsWords == 0) {
1784     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1785     Remainder = APInt(BitWidth, 0);   // 0 % Y ===> 0
1786     return;
1787   }
1788
1789   if (rhsBits == 1) {
1790     Quotient = LHS;                   // X / 1 ===> X
1791     Remainder = APInt(BitWidth, 0);   // X % 1 ===> 0
1792   }
1793
1794   if (lhsWords < rhsWords || LHS.ult(RHS)) {
1795     Remainder = LHS;                  // X % Y ===> X, iff X < Y
1796     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1797     return;
1798   }
1799
1800   if (LHS == RHS) {
1801     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1802     Remainder = APInt(BitWidth, 0);   // X % X ===> 0;
1803     return;
1804   }
1805
1806   // Make sure there is enough space to hold the results.
1807   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1808   // change the size. This is necessary if Quotient or Remainder is aliased
1809   // with LHS or RHS.
1810   Quotient.reallocate(BitWidth);
1811   Remainder.reallocate(BitWidth);
1812
1813   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1814     // There is only one word to consider so use the native versions.
1815     uint64_t lhsValue = LHS.U.pVal[0];
1816     uint64_t rhsValue = RHS.U.pVal[0];
1817     Quotient = lhsValue / rhsValue;
1818     Remainder = lhsValue % rhsValue;
1819     return;
1820   }
1821
1822   // Okay, lets do it the long way
1823   divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1824          Remainder.U.pVal);
1825   // Clear the rest of the Quotient and Remainder.
1826   std::memset(Quotient.U.pVal + lhsWords, 0,
1827               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1828   std::memset(Remainder.U.pVal + rhsWords, 0,
1829               (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1830 }
1831
1832 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1833                     uint64_t &Remainder) {
1834   assert(RHS != 0 && "Divide by zero?");
1835   unsigned BitWidth = LHS.BitWidth;
1836
1837   // First, deal with the easy case
1838   if (LHS.isSingleWord()) {
1839     uint64_t QuotVal = LHS.U.VAL / RHS;
1840     Remainder = LHS.U.VAL % RHS;
1841     Quotient = APInt(BitWidth, QuotVal);
1842     return;
1843   }
1844
1845   // Get some size facts about the dividend and divisor
1846   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1847
1848   // Check the degenerate cases
1849   if (lhsWords == 0) {
1850     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1851     Remainder = 0;                    // 0 % Y ===> 0
1852     return;
1853   }
1854
1855   if (RHS == 1) {
1856     Quotient = LHS;                   // X / 1 ===> X
1857     Remainder = 0;                    // X % 1 ===> 0
1858     return;
1859   }
1860
1861   if (LHS.ult(RHS)) {
1862     Remainder = LHS.getZExtValue();   // X % Y ===> X, iff X < Y
1863     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1864     return;
1865   }
1866
1867   if (LHS == RHS) {
1868     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1869     Remainder = 0;                    // X % X ===> 0;
1870     return;
1871   }
1872
1873   // Make sure there is enough space to hold the results.
1874   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1875   // change the size. This is necessary if Quotient is aliased with LHS.
1876   Quotient.reallocate(BitWidth);
1877
1878   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1879     // There is only one word to consider so use the native versions.
1880     uint64_t lhsValue = LHS.U.pVal[0];
1881     Quotient = lhsValue / RHS;
1882     Remainder = lhsValue % RHS;
1883     return;
1884   }
1885
1886   // Okay, lets do it the long way
1887   divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1888   // Clear the rest of the Quotient.
1889   std::memset(Quotient.U.pVal + lhsWords, 0,
1890               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1891 }
1892
1893 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1894                     APInt &Quotient, APInt &Remainder) {
1895   if (LHS.isNegative()) {
1896     if (RHS.isNegative())
1897       APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1898     else {
1899       APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1900       Quotient.negate();
1901     }
1902     Remainder.negate();
1903   } else if (RHS.isNegative()) {
1904     APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1905     Quotient.negate();
1906   } else {
1907     APInt::udivrem(LHS, RHS, Quotient, Remainder);
1908   }
1909 }
1910
1911 void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1912                     APInt &Quotient, int64_t &Remainder) {
1913   uint64_t R = Remainder;
1914   if (LHS.isNegative()) {
1915     if (RHS < 0)
1916       APInt::udivrem(-LHS, -RHS, Quotient, R);
1917     else {
1918       APInt::udivrem(-LHS, RHS, Quotient, R);
1919       Quotient.negate();
1920     }
1921     R = -R;
1922   } else if (RHS < 0) {
1923     APInt::udivrem(LHS, -RHS, Quotient, R);
1924     Quotient.negate();
1925   } else {
1926     APInt::udivrem(LHS, RHS, Quotient, R);
1927   }
1928   Remainder = R;
1929 }
1930
1931 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1932   APInt Res = *this+RHS;
1933   Overflow = isNonNegative() == RHS.isNonNegative() &&
1934              Res.isNonNegative() != isNonNegative();
1935   return Res;
1936 }
1937
1938 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1939   APInt Res = *this+RHS;
1940   Overflow = Res.ult(RHS);
1941   return Res;
1942 }
1943
1944 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1945   APInt Res = *this - RHS;
1946   Overflow = isNonNegative() != RHS.isNonNegative() &&
1947              Res.isNonNegative() != isNonNegative();
1948   return Res;
1949 }
1950
1951 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1952   APInt Res = *this-RHS;
1953   Overflow = Res.ugt(*this);
1954   return Res;
1955 }
1956
1957 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1958   // MININT/-1  -->  overflow.
1959   Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1960   return sdiv(RHS);
1961 }
1962
1963 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1964   APInt Res = *this * RHS;
1965
1966   if (*this != 0 && RHS != 0)
1967     Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1968   else
1969     Overflow = false;
1970   return Res;
1971 }
1972
1973 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1974   if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
1975     Overflow = true;
1976     return *this * RHS;
1977   }
1978
1979   APInt Res = lshr(1) * RHS;
1980   Overflow = Res.isNegative();
1981   Res <<= 1;
1982   if ((*this)[0]) {
1983     Res += RHS;
1984     if (Res.ult(RHS))
1985       Overflow = true;
1986   }
1987   return Res;
1988 }
1989
1990 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1991   Overflow = ShAmt.uge(getBitWidth());
1992   if (Overflow)
1993     return APInt(BitWidth, 0);
1994
1995   if (isNonNegative()) // Don't allow sign change.
1996     Overflow = ShAmt.uge(countLeadingZeros());
1997   else
1998     Overflow = ShAmt.uge(countLeadingOnes());
1999
2000   return *this << ShAmt;
2001 }
2002
2003 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
2004   Overflow = ShAmt.uge(getBitWidth());
2005   if (Overflow)
2006     return APInt(BitWidth, 0);
2007
2008   Overflow = ShAmt.ugt(countLeadingZeros());
2009
2010   return *this << ShAmt;
2011 }
2012
2013 APInt APInt::sadd_sat(const APInt &RHS) const {
2014   bool Overflow;
2015   APInt Res = sadd_ov(RHS, Overflow);
2016   if (!Overflow)
2017     return Res;
2018
2019   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2020                       : APInt::getSignedMaxValue(BitWidth);
2021 }
2022
2023 APInt APInt::uadd_sat(const APInt &RHS) const {
2024   bool Overflow;
2025   APInt Res = uadd_ov(RHS, Overflow);
2026   if (!Overflow)
2027     return Res;
2028
2029   return APInt::getMaxValue(BitWidth);
2030 }
2031
2032 APInt APInt::ssub_sat(const APInt &RHS) const {
2033   bool Overflow;
2034   APInt Res = ssub_ov(RHS, Overflow);
2035   if (!Overflow)
2036     return Res;
2037
2038   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2039                       : APInt::getSignedMaxValue(BitWidth);
2040 }
2041
2042 APInt APInt::usub_sat(const APInt &RHS) const {
2043   bool Overflow;
2044   APInt Res = usub_ov(RHS, Overflow);
2045   if (!Overflow)
2046     return Res;
2047
2048   return APInt(BitWidth, 0);
2049 }
2050
2051
2052 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2053   // Check our assumptions here
2054   assert(!str.empty() && "Invalid string length");
2055   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
2056           radix == 36) &&
2057          "Radix should be 2, 8, 10, 16, or 36!");
2058
2059   StringRef::iterator p = str.begin();
2060   size_t slen = str.size();
2061   bool isNeg = *p == '-';
2062   if (*p == '-' || *p == '+') {
2063     p++;
2064     slen--;
2065     assert(slen && "String is only a sign, needs a value.");
2066   }
2067   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2068   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2069   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2070   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2071          "Insufficient bit width");
2072
2073   // Allocate memory if needed
2074   if (isSingleWord())
2075     U.VAL = 0;
2076   else
2077     U.pVal = getClearedMemory(getNumWords());
2078
2079   // Figure out if we can shift instead of multiply
2080   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2081
2082   // Enter digit traversal loop
2083   for (StringRef::iterator e = str.end(); p != e; ++p) {
2084     unsigned digit = getDigit(*p, radix);
2085     assert(digit < radix && "Invalid character in digit string");
2086
2087     // Shift or multiply the value by the radix
2088     if (slen > 1) {
2089       if (shift)
2090         *this <<= shift;
2091       else
2092         *this *= radix;
2093     }
2094
2095     // Add in the digit we just interpreted
2096     *this += digit;
2097   }
2098   // If its negative, put it in two's complement form
2099   if (isNeg)
2100     this->negate();
2101 }
2102
2103 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2104                      bool Signed, bool formatAsCLiteral) const {
2105   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2106           Radix == 36) &&
2107          "Radix should be 2, 8, 10, 16, or 36!");
2108
2109   const char *Prefix = "";
2110   if (formatAsCLiteral) {
2111     switch (Radix) {
2112       case 2:
2113         // Binary literals are a non-standard extension added in gcc 4.3:
2114         // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2115         Prefix = "0b";
2116         break;
2117       case 8:
2118         Prefix = "0";
2119         break;
2120       case 10:
2121         break; // No prefix
2122       case 16:
2123         Prefix = "0x";
2124         break;
2125       default:
2126         llvm_unreachable("Invalid radix!");
2127     }
2128   }
2129
2130   // First, check for a zero value and just short circuit the logic below.
2131   if (*this == 0) {
2132     while (*Prefix) {
2133       Str.push_back(*Prefix);
2134       ++Prefix;
2135     };
2136     Str.push_back('0');
2137     return;
2138   }
2139
2140   static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2141
2142   if (isSingleWord()) {
2143     char Buffer[65];
2144     char *BufPtr = std::end(Buffer);
2145
2146     uint64_t N;
2147     if (!Signed) {
2148       N = getZExtValue();
2149     } else {
2150       int64_t I = getSExtValue();
2151       if (I >= 0) {
2152         N = I;
2153       } else {
2154         Str.push_back('-');
2155         N = -(uint64_t)I;
2156       }
2157     }
2158
2159     while (*Prefix) {
2160       Str.push_back(*Prefix);
2161       ++Prefix;
2162     };
2163
2164     while (N) {
2165       *--BufPtr = Digits[N % Radix];
2166       N /= Radix;
2167     }
2168     Str.append(BufPtr, std::end(Buffer));
2169     return;
2170   }
2171
2172   APInt Tmp(*this);
2173
2174   if (Signed && isNegative()) {
2175     // They want to print the signed version and it is a negative value
2176     // Flip the bits and add one to turn it into the equivalent positive
2177     // value and put a '-' in the result.
2178     Tmp.negate();
2179     Str.push_back('-');
2180   }
2181
2182   while (*Prefix) {
2183     Str.push_back(*Prefix);
2184     ++Prefix;
2185   };
2186
2187   // We insert the digits backward, then reverse them to get the right order.
2188   unsigned StartDig = Str.size();
2189
2190   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2191   // because the number of bits per digit (1, 3 and 4 respectively) divides
2192   // equally.  We just shift until the value is zero.
2193   if (Radix == 2 || Radix == 8 || Radix == 16) {
2194     // Just shift tmp right for each digit width until it becomes zero
2195     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2196     unsigned MaskAmt = Radix - 1;
2197
2198     while (Tmp.getBoolValue()) {
2199       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2200       Str.push_back(Digits[Digit]);
2201       Tmp.lshrInPlace(ShiftAmt);
2202     }
2203   } else {
2204     while (Tmp.getBoolValue()) {
2205       uint64_t Digit;
2206       udivrem(Tmp, Radix, Tmp, Digit);
2207       assert(Digit < Radix && "divide failed");
2208       Str.push_back(Digits[Digit]);
2209     }
2210   }
2211
2212   // Reverse the digits before returning.
2213   std::reverse(Str.begin()+StartDig, Str.end());
2214 }
2215
2216 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2217 /// It is better to pass in a SmallVector/SmallString to the methods above.
2218 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2219   SmallString<40> S;
2220   toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2221   return S.str();
2222 }
2223
2224 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2225 LLVM_DUMP_METHOD void APInt::dump() const {
2226   SmallString<40> S, U;
2227   this->toStringUnsigned(U);
2228   this->toStringSigned(S);
2229   dbgs() << "APInt(" << BitWidth << "b, "
2230          << U << "u " << S << "s)\n";
2231 }
2232 #endif
2233
2234 void APInt::print(raw_ostream &OS, bool isSigned) const {
2235   SmallString<40> S;
2236   this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2237   OS << S;
2238 }
2239
2240 // This implements a variety of operations on a representation of
2241 // arbitrary precision, two's-complement, bignum integer values.
2242
2243 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2244 // and unrestricting assumption.
2245 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2246               "Part width must be divisible by 2!");
2247
2248 /* Some handy functions local to this file.  */
2249
2250 /* Returns the integer part with the least significant BITS set.
2251    BITS cannot be zero.  */
2252 static inline APInt::WordType lowBitMask(unsigned bits) {
2253   assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2254
2255   return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2256 }
2257
2258 /* Returns the value of the lower half of PART.  */
2259 static inline APInt::WordType lowHalf(APInt::WordType part) {
2260   return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2261 }
2262
2263 /* Returns the value of the upper half of PART.  */
2264 static inline APInt::WordType highHalf(APInt::WordType part) {
2265   return part >> (APInt::APINT_BITS_PER_WORD / 2);
2266 }
2267
2268 /* Returns the bit number of the most significant set bit of a part.
2269    If the input number has no bits set -1U is returned.  */
2270 static unsigned partMSB(APInt::WordType value) {
2271   return findLastSet(value, ZB_Max);
2272 }
2273
2274 /* Returns the bit number of the least significant set bit of a
2275    part.  If the input number has no bits set -1U is returned.  */
2276 static unsigned partLSB(APInt::WordType value) {
2277   return findFirstSet(value, ZB_Max);
2278 }
2279
2280 /* Sets the least significant part of a bignum to the input value, and
2281    zeroes out higher parts.  */
2282 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2283   assert(parts > 0);
2284
2285   dst[0] = part;
2286   for (unsigned i = 1; i < parts; i++)
2287     dst[i] = 0;
2288 }
2289
2290 /* Assign one bignum to another.  */
2291 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2292   for (unsigned i = 0; i < parts; i++)
2293     dst[i] = src[i];
2294 }
2295
2296 /* Returns true if a bignum is zero, false otherwise.  */
2297 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2298   for (unsigned i = 0; i < parts; i++)
2299     if (src[i])
2300       return false;
2301
2302   return true;
2303 }
2304
2305 /* Extract the given bit of a bignum; returns 0 or 1.  */
2306 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2307   return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2308 }
2309
2310 /* Set the given bit of a bignum. */
2311 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2312   parts[whichWord(bit)] |= maskBit(bit);
2313 }
2314
2315 /* Clears the given bit of a bignum. */
2316 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2317   parts[whichWord(bit)] &= ~maskBit(bit);
2318 }
2319
2320 /* Returns the bit number of the least significant set bit of a
2321    number.  If the input number has no bits set -1U is returned.  */
2322 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2323   for (unsigned i = 0; i < n; i++) {
2324     if (parts[i] != 0) {
2325       unsigned lsb = partLSB(parts[i]);
2326
2327       return lsb + i * APINT_BITS_PER_WORD;
2328     }
2329   }
2330
2331   return -1U;
2332 }
2333
2334 /* Returns the bit number of the most significant set bit of a number.
2335    If the input number has no bits set -1U is returned.  */
2336 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2337   do {
2338     --n;
2339
2340     if (parts[n] != 0) {
2341       unsigned msb = partMSB(parts[n]);
2342
2343       return msb + n * APINT_BITS_PER_WORD;
2344     }
2345   } while (n);
2346
2347   return -1U;
2348 }
2349
2350 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2351    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2352    the least significant bit of DST.  All high bits above srcBITS in
2353    DST are zero-filled.  */
2354 void
2355 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2356                  unsigned srcBits, unsigned srcLSB) {
2357   unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2358   assert(dstParts <= dstCount);
2359
2360   unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2361   tcAssign (dst, src + firstSrcPart, dstParts);
2362
2363   unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2364   tcShiftRight (dst, dstParts, shift);
2365
2366   /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2367      in DST.  If this is less that srcBits, append the rest, else
2368      clear the high bits.  */
2369   unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2370   if (n < srcBits) {
2371     WordType mask = lowBitMask (srcBits - n);
2372     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2373                           << n % APINT_BITS_PER_WORD);
2374   } else if (n > srcBits) {
2375     if (srcBits % APINT_BITS_PER_WORD)
2376       dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2377   }
2378
2379   /* Clear high parts.  */
2380   while (dstParts < dstCount)
2381     dst[dstParts++] = 0;
2382 }
2383
2384 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2385 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2386                              WordType c, unsigned parts) {
2387   assert(c <= 1);
2388
2389   for (unsigned i = 0; i < parts; i++) {
2390     WordType l = dst[i];
2391     if (c) {
2392       dst[i] += rhs[i] + 1;
2393       c = (dst[i] <= l);
2394     } else {
2395       dst[i] += rhs[i];
2396       c = (dst[i] < l);
2397     }
2398   }
2399
2400   return c;
2401 }
2402
2403 /// This function adds a single "word" integer, src, to the multiple
2404 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2405 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2406 /// @returns the carry of the addition.
2407 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2408                                  unsigned parts) {
2409   for (unsigned i = 0; i < parts; ++i) {
2410     dst[i] += src;
2411     if (dst[i] >= src)
2412       return 0; // No need to carry so exit early.
2413     src = 1; // Carry one to next digit.
2414   }
2415
2416   return 1;
2417 }
2418
2419 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2420 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2421                                   WordType c, unsigned parts) {
2422   assert(c <= 1);
2423
2424   for (unsigned i = 0; i < parts; i++) {
2425     WordType l = dst[i];
2426     if (c) {
2427       dst[i] -= rhs[i] + 1;
2428       c = (dst[i] >= l);
2429     } else {
2430       dst[i] -= rhs[i];
2431       c = (dst[i] > l);
2432     }
2433   }
2434
2435   return c;
2436 }
2437
2438 /// This function subtracts a single "word" (64-bit word), src, from
2439 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2440 /// no further borrowing is needed or it runs out of "words" in dst.  The result
2441 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2442 /// exhausted. In other words, if src > dst then this function returns 1,
2443 /// otherwise 0.
2444 /// @returns the borrow out of the subtraction
2445 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2446                                       unsigned parts) {
2447   for (unsigned i = 0; i < parts; ++i) {
2448     WordType Dst = dst[i];
2449     dst[i] -= src;
2450     if (src <= Dst)
2451       return 0; // No need to borrow so exit early.
2452     src = 1; // We have to "borrow 1" from next "word"
2453   }
2454
2455   return 1;
2456 }
2457
2458 /* Negate a bignum in-place.  */
2459 void APInt::tcNegate(WordType *dst, unsigned parts) {
2460   tcComplement(dst, parts);
2461   tcIncrement(dst, parts);
2462 }
2463
2464 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2465     DST  = SRC * MULTIPLIER + CARRY   if add is false
2466
2467     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2468     they must start at the same point, i.e. DST == SRC.
2469
2470     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2471     returned.  Otherwise DST is filled with the least significant
2472     DSTPARTS parts of the result, and if all of the omitted higher
2473     parts were zero return zero, otherwise overflow occurred and
2474     return one.  */
2475 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2476                           WordType multiplier, WordType carry,
2477                           unsigned srcParts, unsigned dstParts,
2478                           bool add) {
2479   /* Otherwise our writes of DST kill our later reads of SRC.  */
2480   assert(dst <= src || dst >= src + srcParts);
2481   assert(dstParts <= srcParts + 1);
2482
2483   /* N loops; minimum of dstParts and srcParts.  */
2484   unsigned n = std::min(dstParts, srcParts);
2485
2486   for (unsigned i = 0; i < n; i++) {
2487     WordType low, mid, high, srcPart;
2488
2489       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2490
2491          This cannot overflow, because
2492
2493          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2494
2495          which is less than n^2.  */
2496
2497     srcPart = src[i];
2498
2499     if (multiplier == 0 || srcPart == 0) {
2500       low = carry;
2501       high = 0;
2502     } else {
2503       low = lowHalf(srcPart) * lowHalf(multiplier);
2504       high = highHalf(srcPart) * highHalf(multiplier);
2505
2506       mid = lowHalf(srcPart) * highHalf(multiplier);
2507       high += highHalf(mid);
2508       mid <<= APINT_BITS_PER_WORD / 2;
2509       if (low + mid < low)
2510         high++;
2511       low += mid;
2512
2513       mid = highHalf(srcPart) * lowHalf(multiplier);
2514       high += highHalf(mid);
2515       mid <<= APINT_BITS_PER_WORD / 2;
2516       if (low + mid < low)
2517         high++;
2518       low += mid;
2519
2520       /* Now add carry.  */
2521       if (low + carry < low)
2522         high++;
2523       low += carry;
2524     }
2525
2526     if (add) {
2527       /* And now DST[i], and store the new low part there.  */
2528       if (low + dst[i] < low)
2529         high++;
2530       dst[i] += low;
2531     } else
2532       dst[i] = low;
2533
2534     carry = high;
2535   }
2536
2537   if (srcParts < dstParts) {
2538     /* Full multiplication, there is no overflow.  */
2539     assert(srcParts + 1 == dstParts);
2540     dst[srcParts] = carry;
2541     return 0;
2542   }
2543
2544   /* We overflowed if there is carry.  */
2545   if (carry)
2546     return 1;
2547
2548   /* We would overflow if any significant unwritten parts would be
2549      non-zero.  This is true if any remaining src parts are non-zero
2550      and the multiplier is non-zero.  */
2551   if (multiplier)
2552     for (unsigned i = dstParts; i < srcParts; i++)
2553       if (src[i])
2554         return 1;
2555
2556   /* We fitted in the narrow destination.  */
2557   return 0;
2558 }
2559
2560 /* DST = LHS * RHS, where DST has the same width as the operands and
2561    is filled with the least significant parts of the result.  Returns
2562    one if overflow occurred, otherwise zero.  DST must be disjoint
2563    from both operands.  */
2564 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2565                       const WordType *rhs, unsigned parts) {
2566   assert(dst != lhs && dst != rhs);
2567
2568   int overflow = 0;
2569   tcSet(dst, 0, parts);
2570
2571   for (unsigned i = 0; i < parts; i++)
2572     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2573                                parts - i, true);
2574
2575   return overflow;
2576 }
2577
2578 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2579 /// operands. No overflow occurs. DST must be disjoint from both operands.
2580 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2581                            const WordType *rhs, unsigned lhsParts,
2582                            unsigned rhsParts) {
2583   /* Put the narrower number on the LHS for less loops below.  */
2584   if (lhsParts > rhsParts)
2585     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2586
2587   assert(dst != lhs && dst != rhs);
2588
2589   tcSet(dst, 0, rhsParts);
2590
2591   for (unsigned i = 0; i < lhsParts; i++)
2592     tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2593 }
2594
2595 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2596    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2597    set REMAINDER to the remainder, return zero.  i.e.
2598
2599    OLD_LHS = RHS * LHS + REMAINDER
2600
2601    SCRATCH is a bignum of the same size as the operands and result for
2602    use by the routine; its contents need not be initialized and are
2603    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2604 */
2605 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2606                     WordType *remainder, WordType *srhs,
2607                     unsigned parts) {
2608   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2609
2610   unsigned shiftCount = tcMSB(rhs, parts) + 1;
2611   if (shiftCount == 0)
2612     return true;
2613
2614   shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2615   unsigned n = shiftCount / APINT_BITS_PER_WORD;
2616   WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2617
2618   tcAssign(srhs, rhs, parts);
2619   tcShiftLeft(srhs, parts, shiftCount);
2620   tcAssign(remainder, lhs, parts);
2621   tcSet(lhs, 0, parts);
2622
2623   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2624      the total.  */
2625   for (;;) {
2626     int compare = tcCompare(remainder, srhs, parts);
2627     if (compare >= 0) {
2628       tcSubtract(remainder, srhs, 0, parts);
2629       lhs[n] |= mask;
2630     }
2631
2632     if (shiftCount == 0)
2633       break;
2634     shiftCount--;
2635     tcShiftRight(srhs, parts, 1);
2636     if ((mask >>= 1) == 0) {
2637       mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2638       n--;
2639     }
2640   }
2641
2642   return false;
2643 }
2644
2645 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2646 /// no restrictions on Count.
2647 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2648   // Don't bother performing a no-op shift.
2649   if (!Count)
2650     return;
2651
2652   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2653   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2654   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2655
2656   // Fastpath for moving by whole words.
2657   if (BitShift == 0) {
2658     std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2659   } else {
2660     while (Words-- > WordShift) {
2661       Dst[Words] = Dst[Words - WordShift] << BitShift;
2662       if (Words > WordShift)
2663         Dst[Words] |=
2664           Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2665     }
2666   }
2667
2668   // Fill in the remainder with 0s.
2669   std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2670 }
2671
2672 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2673 /// are no restrictions on Count.
2674 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2675   // Don't bother performing a no-op shift.
2676   if (!Count)
2677     return;
2678
2679   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2680   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2681   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2682
2683   unsigned WordsToMove = Words - WordShift;
2684   // Fastpath for moving by whole words.
2685   if (BitShift == 0) {
2686     std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2687   } else {
2688     for (unsigned i = 0; i != WordsToMove; ++i) {
2689       Dst[i] = Dst[i + WordShift] >> BitShift;
2690       if (i + 1 != WordsToMove)
2691         Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2692     }
2693   }
2694
2695   // Fill in the remainder with 0s.
2696   std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2697 }
2698
2699 /* Bitwise and of two bignums.  */
2700 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2701   for (unsigned i = 0; i < parts; i++)
2702     dst[i] &= rhs[i];
2703 }
2704
2705 /* Bitwise inclusive or of two bignums.  */
2706 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2707   for (unsigned i = 0; i < parts; i++)
2708     dst[i] |= rhs[i];
2709 }
2710
2711 /* Bitwise exclusive or of two bignums.  */
2712 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2713   for (unsigned i = 0; i < parts; i++)
2714     dst[i] ^= rhs[i];
2715 }
2716
2717 /* Complement a bignum in-place.  */
2718 void APInt::tcComplement(WordType *dst, unsigned parts) {
2719   for (unsigned i = 0; i < parts; i++)
2720     dst[i] = ~dst[i];
2721 }
2722
2723 /* Comparison (unsigned) of two bignums.  */
2724 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2725                      unsigned parts) {
2726   while (parts) {
2727     parts--;
2728     if (lhs[parts] != rhs[parts])
2729       return (lhs[parts] > rhs[parts]) ? 1 : -1;
2730   }
2731
2732   return 0;
2733 }
2734
2735 /* Set the least significant BITS bits of a bignum, clear the
2736    rest.  */
2737 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2738                                       unsigned bits) {
2739   unsigned i = 0;
2740   while (bits > APINT_BITS_PER_WORD) {
2741     dst[i++] = ~(WordType) 0;
2742     bits -= APINT_BITS_PER_WORD;
2743   }
2744
2745   if (bits)
2746     dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2747
2748   while (i < parts)
2749     dst[i++] = 0;
2750 }
2751
2752 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2753                                    APInt::Rounding RM) {
2754   // Currently udivrem always rounds down.
2755   switch (RM) {
2756   case APInt::Rounding::DOWN:
2757   case APInt::Rounding::TOWARD_ZERO:
2758     return A.udiv(B);
2759   case APInt::Rounding::UP: {
2760     APInt Quo, Rem;
2761     APInt::udivrem(A, B, Quo, Rem);
2762     if (Rem == 0)
2763       return Quo;
2764     return Quo + 1;
2765   }
2766   }
2767   llvm_unreachable("Unknown APInt::Rounding enum");
2768 }
2769
2770 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2771                                    APInt::Rounding RM) {
2772   switch (RM) {
2773   case APInt::Rounding::DOWN:
2774   case APInt::Rounding::UP: {
2775     APInt Quo, Rem;
2776     APInt::sdivrem(A, B, Quo, Rem);
2777     if (Rem == 0)
2778       return Quo;
2779     // This algorithm deals with arbitrary rounding mode used by sdivrem.
2780     // We want to check whether the non-integer part of the mathematical value
2781     // is negative or not. If the non-integer part is negative, we need to round
2782     // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2783     // already rounded down.
2784     if (RM == APInt::Rounding::DOWN) {
2785       if (Rem.isNegative() != B.isNegative())
2786         return Quo - 1;
2787       return Quo;
2788     }
2789     if (Rem.isNegative() != B.isNegative())
2790       return Quo;
2791     return Quo + 1;
2792   }
2793   // Currently sdiv rounds twards zero.
2794   case APInt::Rounding::TOWARD_ZERO:
2795     return A.sdiv(B);
2796   }
2797   llvm_unreachable("Unknown APInt::Rounding enum");
2798 }
2799
2800 Optional<APInt>
2801 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2802                                            unsigned RangeWidth) {
2803   unsigned CoeffWidth = A.getBitWidth();
2804   assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2805   assert(RangeWidth <= CoeffWidth &&
2806          "Value range width should be less than coefficient width");
2807   assert(RangeWidth > 1 && "Value range bit width should be > 1");
2808
2809   LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2810                     << "x + " << C << ", rw:" << RangeWidth << '\n');
2811
2812   // Identify 0 as a (non)solution immediately.
2813   if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2814     LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2815     return APInt(CoeffWidth, 0);
2816   }
2817
2818   // The result of APInt arithmetic has the same bit width as the operands,
2819   // so it can actually lose high bits. A product of two n-bit integers needs
2820   // 2n-1 bits to represent the full value.
2821   // The operation done below (on quadratic coefficients) that can produce
2822   // the largest value is the evaluation of the equation during bisection,
2823   // which needs 3 times the bitwidth of the coefficient, so the total number
2824   // of required bits is 3n.
2825   //
2826   // The purpose of this extension is to simulate the set Z of all integers,
2827   // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2828   // and negative numbers (not so much in a modulo arithmetic). The method
2829   // used to solve the equation is based on the standard formula for real
2830   // numbers, and uses the concepts of "positive" and "negative" with their
2831   // usual meanings.
2832   CoeffWidth *= 3;
2833   A = A.sext(CoeffWidth);
2834   B = B.sext(CoeffWidth);
2835   C = C.sext(CoeffWidth);
2836
2837   // Make A > 0 for simplicity. Negate cannot overflow at this point because
2838   // the bit width has increased.
2839   if (A.isNegative()) {
2840     A.negate();
2841     B.negate();
2842     C.negate();
2843   }
2844
2845   // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2846   // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2847   // and R = 2^BitWidth.
2848   // Since we're trying not only to find exact solutions, but also values
2849   // that "wrap around", such a set will always have a solution, i.e. an x
2850   // that satisfies at least one of the equations, or such that |q(x)|
2851   // exceeds kR, while |q(x-1)| for the same k does not.
2852   //
2853   // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2854   // positive solution n (in the above sense), and also such that the n
2855   // will be the least among all solutions corresponding to k = 0, 1, ...
2856   // (more precisely, the least element in the set
2857   //   { n(k) | k is such that a solution n(k) exists }).
2858   //
2859   // Consider the parabola (over real numbers) that corresponds to the
2860   // quadratic equation. Since A > 0, the arms of the parabola will point
2861   // up. Picking different values of k will shift it up and down by R.
2862   //
2863   // We want to shift the parabola in such a way as to reduce the problem
2864   // of solving q(x) = kR to solving shifted_q(x) = 0.
2865   // (The interesting solutions are the ceilings of the real number
2866   // solutions.)
2867   APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2868   APInt TwoA = 2 * A;
2869   APInt SqrB = B * B;
2870   bool PickLow;
2871
2872   auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2873     assert(A.isStrictlyPositive());
2874     APInt T = V.abs().urem(A);
2875     if (T.isNullValue())
2876       return V;
2877     return V.isNegative() ? V+T : V+(A-T);
2878   };
2879
2880   // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2881   // iff B is positive.
2882   if (B.isNonNegative()) {
2883     // If B >= 0, the vertex it at a negative location (or at 0), so in
2884     // order to have a non-negative solution we need to pick k that makes
2885     // C-kR negative. To satisfy all the requirements for the solution
2886     // that we are looking for, it needs to be closest to 0 of all k.
2887     C = C.srem(R);
2888     if (C.isStrictlyPositive())
2889       C -= R;
2890     // Pick the greater solution.
2891     PickLow = false;
2892   } else {
2893     // If B < 0, the vertex is at a positive location. For any solution
2894     // to exist, the discriminant must be non-negative. This means that
2895     // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2896     // lower bound on values of k: kR >= C - B^2/4A.
2897     APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2898     // Round LowkR up (towards +inf) to the nearest kR.
2899     LowkR = RoundUp(LowkR, R);
2900
2901     // If there exists k meeting the condition above, and such that
2902     // C-kR > 0, there will be two positive real number solutions of
2903     // q(x) = kR. Out of all such values of k, pick the one that makes
2904     // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2905     // In other words, find maximum k such that LowkR <= kR < C.
2906     if (C.sgt(LowkR)) {
2907       // If LowkR < C, then such a k is guaranteed to exist because
2908       // LowkR itself is a multiple of R.
2909       C -= -RoundUp(-C, R);      // C = C - RoundDown(C, R)
2910       // Pick the smaller solution.
2911       PickLow = true;
2912     } else {
2913       // If C-kR < 0 for all potential k's, it means that one solution
2914       // will be negative, while the other will be positive. The positive
2915       // solution will shift towards 0 if the parabola is moved up.
2916       // Pick the kR closest to the lower bound (i.e. make C-kR closest
2917       // to 0, or in other words, out of all parabolas that have solutions,
2918       // pick the one that is the farthest "up").
2919       // Since LowkR is itself a multiple of R, simply take C-LowkR.
2920       C -= LowkR;
2921       // Pick the greater solution.
2922       PickLow = false;
2923     }
2924   }
2925
2926   LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2927                     << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2928
2929   APInt D = SqrB - 4*A*C;
2930   assert(D.isNonNegative() && "Negative discriminant");
2931   APInt SQ = D.sqrt();
2932
2933   APInt Q = SQ * SQ;
2934   bool InexactSQ = Q != D;
2935   // The calculated SQ may actually be greater than the exact (non-integer)
2936   // value. If that's the case, decremement SQ to get a value that is lower.
2937   if (Q.sgt(D))
2938     SQ -= 1;
2939
2940   APInt X;
2941   APInt Rem;
2942
2943   // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
2944   // When using the quadratic formula directly, the calculated low root
2945   // may be greater than the exact one, since we would be subtracting SQ.
2946   // To make sure that the calculated root is not greater than the exact
2947   // one, subtract SQ+1 when calculating the low root (for inexact value
2948   // of SQ).
2949   if (PickLow)
2950     APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
2951   else
2952     APInt::sdivrem(-B + SQ, TwoA, X, Rem);
2953
2954   // The updated coefficients should be such that the (exact) solution is
2955   // positive. Since APInt division rounds towards 0, the calculated one
2956   // can be 0, but cannot be negative.
2957   assert(X.isNonNegative() && "Solution should be non-negative");
2958
2959   if (!InexactSQ && Rem.isNullValue()) {
2960     LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
2961     return X;
2962   }
2963
2964   assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
2965   // The exact value of the square root of D should be between SQ and SQ+1.
2966   // This implies that the solution should be between that corresponding to
2967   // SQ (i.e. X) and that corresponding to SQ+1.
2968   //
2969   // The calculated X cannot be greater than the exact (real) solution.
2970   // Actually it must be strictly less than the exact solution, while
2971   // X+1 will be greater than or equal to it.
2972
2973   APInt VX = (A*X + B)*X + C;
2974   APInt VY = VX + TwoA*X + A + B;
2975   bool SignChange = VX.isNegative() != VY.isNegative() ||
2976                     VX.isNullValue() != VY.isNullValue();
2977   // If the sign did not change between X and X+1, X is not a valid solution.
2978   // This could happen when the actual (exact) roots don't have an integer
2979   // between them, so they would both be contained between X and X+1.
2980   if (!SignChange) {
2981     LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
2982     return None;
2983   }
2984
2985   X += 1;
2986   LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
2987   return X;
2988 }
2989
2990 /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
2991 /// with the integer held in IntVal.
2992 void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
2993                             unsigned StoreBytes) {
2994   assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
2995   const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
2996
2997   if (sys::IsLittleEndianHost) {
2998     // Little-endian host - the source is ordered from LSB to MSB.  Order the
2999     // destination from LSB to MSB: Do a straight copy.
3000     memcpy(Dst, Src, StoreBytes);
3001   } else {
3002     // Big-endian host - the source is an array of 64 bit words ordered from
3003     // LSW to MSW.  Each word is ordered from MSB to LSB.  Order the destination
3004     // from MSB to LSB: Reverse the word order, but not the bytes in a word.
3005     while (StoreBytes > sizeof(uint64_t)) {
3006       StoreBytes -= sizeof(uint64_t);
3007       // May not be aligned so use memcpy.
3008       memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
3009       Src += sizeof(uint64_t);
3010     }
3011
3012     memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
3013   }
3014 }
3015
3016 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
3017 /// from Src into IntVal, which is assumed to be wide enough and to hold zero.
3018 void llvm::LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
3019   assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
3020   uint8_t *Dst = reinterpret_cast<uint8_t *>(
3021                    const_cast<uint64_t *>(IntVal.getRawData()));
3022
3023   if (sys::IsLittleEndianHost)
3024     // Little-endian host - the destination must be ordered from LSB to MSB.
3025     // The source is ordered from LSB to MSB: Do a straight copy.
3026     memcpy(Dst, Src, LoadBytes);
3027   else {
3028     // Big-endian - the destination is an array of 64 bit words ordered from
3029     // LSW to MSW.  Each word must be ordered from MSB to LSB.  The source is
3030     // ordered from MSB to LSB: Reverse the word order, but not the bytes in
3031     // a word.
3032     while (LoadBytes > sizeof(uint64_t)) {
3033       LoadBytes -= sizeof(uint64_t);
3034       // May not be aligned so use memcpy.
3035       memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
3036       Dst += sizeof(uint64_t);
3037     }
3038
3039     memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
3040   }
3041 }