contrib/llvm/lib/Analysis/ValueTracking.cpp

   1 //===- ValueTracking.cpp - Walk computations to compute properties --------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file contains routines that help analyze properties that chains of
  11 // computations have.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "llvm/Analysis/ValueTracking.h"
  16 #include "llvm/ADT/Optional.h"
  17 #include "llvm/ADT/SmallPtrSet.h"
  18 #include "llvm/Analysis/AssumptionCache.h"
  19 #include "llvm/Analysis/InstructionSimplify.h"
  20 #include "llvm/Analysis/MemoryBuiltins.h"
  21 #include "llvm/Analysis/Loads.h"
  22 #include "llvm/Analysis/LoopInfo.h"
  23 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
  24 #include "llvm/Analysis/VectorUtils.h"
  25 #include "llvm/IR/CallSite.h"
  26 #include "llvm/IR/ConstantRange.h"
  27 #include "llvm/IR/Constants.h"
  28 #include "llvm/IR/DataLayout.h"
  29 #include "llvm/IR/DerivedTypes.h"
  30 #include "llvm/IR/Dominators.h"
  31 #include "llvm/IR/GetElementPtrTypeIterator.h"
  32 #include "llvm/IR/GlobalAlias.h"
  33 #include "llvm/IR/GlobalVariable.h"
  34 #include "llvm/IR/Instructions.h"
  35 #include "llvm/IR/IntrinsicInst.h"
  36 #include "llvm/IR/LLVMContext.h"
  37 #include "llvm/IR/Metadata.h"
  38 #include "llvm/IR/Operator.h"
  39 #include "llvm/IR/PatternMatch.h"
  40 #include "llvm/IR/Statepoint.h"
  41 #include "llvm/Support/Debug.h"
  42 #include "llvm/Support/KnownBits.h"
  43 #include "llvm/Support/MathExtras.h"
  44 #include <algorithm>
  45 #include <array>
  46 #include <cstring>
  47 using namespace llvm;
  48 using namespace llvm::PatternMatch;
  49
  50 const unsigned MaxDepth = 6;
  51
  52 // Controls the number of uses of the value searched for possible
  53 // dominating comparisons.
  54 static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
  55                                               cl::Hidden, cl::init(20));
  56
  57 // This optimization is known to cause performance regressions is some cases,
  58 // keep it under a temporary flag for now.
  59 static cl::opt<bool>
  60 DontImproveNonNegativePhiBits("dont-improve-non-negative-phi-bits",
  61                               cl::Hidden, cl::init(true));
  62
  63 /// Returns the bitwidth of the given scalar or pointer type. For vector types,
  64 /// returns the element type's bitwidth.
  65 static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
  66   if (unsigned BitWidth = Ty->getScalarSizeInBits())
  67     return BitWidth;
  68
  69   return DL.getPointerTypeSizeInBits(Ty);
  70 }
  71
  72 namespace {
  73 // Simplifying using an assume can only be done in a particular control-flow
  74 // context (the context instruction provides that context). If an assume and
  75 // the context instruction are not in the same block then the DT helps in
  76 // figuring out if we can use it.
  77 struct Query {
  78   const DataLayout &DL;
  79   AssumptionCache *AC;
  80   const Instruction *CxtI;
  81   const DominatorTree *DT;
  82   // Unlike the other analyses, this may be a nullptr because not all clients
  83   // provide it currently.
  84   OptimizationRemarkEmitter *ORE;
  85
  86   /// Set of assumptions that should be excluded from further queries.
  87   /// This is because of the potential for mutual recursion to cause
  88   /// computeKnownBits to repeatedly visit the same assume intrinsic. The
  89   /// classic case of this is assume(x = y), which will attempt to determine
  90   /// bits in x from bits in y, which will attempt to determine bits in y from
  91   /// bits in x, etc. Regarding the mutual recursion, computeKnownBits can call
  92   /// isKnownNonZero, which calls computeKnownBits and isKnownToBeAPowerOfTwo
  93   /// (all of which can call computeKnownBits), and so on.
  94   std::array<const Value *, MaxDepth> Excluded;
  95   unsigned NumExcluded;
  96
  97   Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI,
  98         const DominatorTree *DT, OptimizationRemarkEmitter *ORE = nullptr)
  99       : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), NumExcluded(0) {}
 100
 101   Query(const Query &Q, const Value *NewExcl)
 102       : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE),
 103         NumExcluded(Q.NumExcluded) {
 104     Excluded = Q.Excluded;
 105     Excluded[NumExcluded++] = NewExcl;
 106     assert(NumExcluded <= Excluded.size());
 107   }
 108
 109   bool isExcluded(const Value *Value) const {
 110     if (NumExcluded == 0)
 111       return false;
 112     auto End = Excluded.begin() + NumExcluded;
 113     return std::find(Excluded.begin(), End, Value) != End;
 114   }
 115 };
 116 } // end anonymous namespace
 117
 118 // Given the provided Value and, potentially, a context instruction, return
 119 // the preferred context instruction (if any).
 120 static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) {
 121   // If we've been provided with a context instruction, then use that (provided
 122   // it has been inserted).
 123   if (CxtI && CxtI->getParent())
 124     return CxtI;
 125
 126   // If the value is really an already-inserted instruction, then use that.
 127   CxtI = dyn_cast<Instruction>(V);
 128   if (CxtI && CxtI->getParent())
 129     return CxtI;
 130
 131   return nullptr;
 132 }
 133
 134 static void computeKnownBits(const Value *V, KnownBits &Known,
 135                              unsigned Depth, const Query &Q);
 136
 137 void llvm::computeKnownBits(const Value *V, KnownBits &Known,
 138                             const DataLayout &DL, unsigned Depth,
 139                             AssumptionCache *AC, const Instruction *CxtI,
 140                             const DominatorTree *DT,
 141                             OptimizationRemarkEmitter *ORE) {
 142   ::computeKnownBits(V, Known, Depth,
 143                      Query(DL, AC, safeCxtI(V, CxtI), DT, ORE));
 144 }
 145
 146 static KnownBits computeKnownBits(const Value *V, unsigned Depth,
 147                                   const Query &Q);
 148
 149 KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL,
 150                                  unsigned Depth, AssumptionCache *AC,
 151                                  const Instruction *CxtI,
 152                                  const DominatorTree *DT,
 153                                  OptimizationRemarkEmitter *ORE) {
 154   return ::computeKnownBits(V, Depth,
 155                             Query(DL, AC, safeCxtI(V, CxtI), DT, ORE));
 156 }
 157
 158 bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
 159                                const DataLayout &DL,
 160                                AssumptionCache *AC, const Instruction *CxtI,
 161                                const DominatorTree *DT) {
 162   assert(LHS->getType() == RHS->getType() &&
 163          "LHS and RHS should have the same type");
 164   assert(LHS->getType()->isIntOrIntVectorTy() &&
 165          "LHS and RHS should be integers");
 166   IntegerType *IT = cast<IntegerType>(LHS->getType()->getScalarType());
 167   KnownBits LHSKnown(IT->getBitWidth());
 168   KnownBits RHSKnown(IT->getBitWidth());
 169   computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT);
 170   computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT);
 171   return (LHSKnown.Zero | RHSKnown.Zero).isAllOnesValue();
 172 }
 173
 174
 175 static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
 176                                    const Query &Q);
 177
 178 bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL,
 179                                   bool OrZero,
 180                                   unsigned Depth, AssumptionCache *AC,
 181                                   const Instruction *CxtI,
 182                                   const DominatorTree *DT) {
 183   return ::isKnownToBeAPowerOfTwo(V, OrZero, Depth,
 184                                   Query(DL, AC, safeCxtI(V, CxtI), DT));
 185 }
 186
 187 static bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q);
 188
 189 bool llvm::isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth,
 190                           AssumptionCache *AC, const Instruction *CxtI,
 191                           const DominatorTree *DT) {
 192   return ::isKnownNonZero(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT));
 193 }
 194
 195 bool llvm::isKnownNonNegative(const Value *V, const DataLayout &DL,
 196                               unsigned Depth,
 197                               AssumptionCache *AC, const Instruction *CxtI,
 198                               const DominatorTree *DT) {
 199   KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT);
 200   return Known.isNonNegative();
 201 }
 202
 203 bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth,
 204                            AssumptionCache *AC, const Instruction *CxtI,
 205                            const DominatorTree *DT) {
 206   if (auto *CI = dyn_cast<ConstantInt>(V))
 207     return CI->getValue().isStrictlyPositive();
 208
 209   // TODO: We'd doing two recursive queries here.  We should factor this such
 210   // that only a single query is needed.
 211   return isKnownNonNegative(V, DL, Depth, AC, CxtI, DT) &&
 212     isKnownNonZero(V, DL, Depth, AC, CxtI, DT);
 213 }
 214
 215 bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth,
 216                            AssumptionCache *AC, const Instruction *CxtI,
 217                            const DominatorTree *DT) {
 218   KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT);
 219   return Known.isNegative();
 220 }
 221
 222 static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q);
 223
 224 bool llvm::isKnownNonEqual(const Value *V1, const Value *V2,
 225                            const DataLayout &DL,
 226                            AssumptionCache *AC, const Instruction *CxtI,
 227                            const DominatorTree *DT) {
 228   return ::isKnownNonEqual(V1, V2, Query(DL, AC,
 229                                          safeCxtI(V1, safeCxtI(V2, CxtI)),
 230                                          DT));
 231 }
 232
 233 static bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
 234                               const Query &Q);
 235
 236 bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask,
 237                              const DataLayout &DL,
 238                              unsigned Depth, AssumptionCache *AC,
 239                              const Instruction *CxtI, const DominatorTree *DT) {
 240   return ::MaskedValueIsZero(V, Mask, Depth,
 241                              Query(DL, AC, safeCxtI(V, CxtI), DT));
 242 }
 243
 244 static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
 245                                    const Query &Q);
 246
 247 unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL,
 248                                   unsigned Depth, AssumptionCache *AC,
 249                                   const Instruction *CxtI,
 250                                   const DominatorTree *DT) {
 251   return ::ComputeNumSignBits(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT));
 252 }
 253
 254 static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
 255                                    bool NSW,
 256                                    KnownBits &KnownOut, KnownBits &Known2,
 257                                    unsigned Depth, const Query &Q) {
 258   unsigned BitWidth = KnownOut.getBitWidth();
 259
 260   // If an initial sequence of bits in the result is not needed, the
 261   // corresponding bits in the operands are not needed.
 262   KnownBits LHSKnown(BitWidth);
 263   computeKnownBits(Op0, LHSKnown, Depth + 1, Q);
 264   computeKnownBits(Op1, Known2, Depth + 1, Q);
 265
 266   // Carry in a 1 for a subtract, rather than a 0.
 267   uint64_t CarryIn = 0;
 268   if (!Add) {
 269     // Sum = LHS + ~RHS + 1
 270     std::swap(Known2.Zero, Known2.One);
 271     CarryIn = 1;
 272   }
 273
 274   APInt PossibleSumZero = ~LHSKnown.Zero + ~Known2.Zero + CarryIn;
 275   APInt PossibleSumOne = LHSKnown.One + Known2.One + CarryIn;
 276
 277   // Compute known bits of the carry.
 278   APInt CarryKnownZero = ~(PossibleSumZero ^ LHSKnown.Zero ^ Known2.Zero);
 279   APInt CarryKnownOne = PossibleSumOne ^ LHSKnown.One ^ Known2.One;
 280
 281   // Compute set of known bits (where all three relevant bits are known).
 282   APInt LHSKnownUnion = LHSKnown.Zero | LHSKnown.One;
 283   APInt RHSKnownUnion = Known2.Zero | Known2.One;
 284   APInt CarryKnownUnion = CarryKnownZero | CarryKnownOne;
 285   APInt Known = LHSKnownUnion & RHSKnownUnion & CarryKnownUnion;
 286
 287   assert((PossibleSumZero & Known) == (PossibleSumOne & Known) &&
 288          "known bits of sum differ");
 289
 290   // Compute known bits of the result.
 291   KnownOut.Zero = ~PossibleSumOne & Known;
 292   KnownOut.One = PossibleSumOne & Known;
 293
 294   // Are we still trying to solve for the sign bit?
 295   if (!Known.isSignBitSet()) {
 296     if (NSW) {
 297       // Adding two non-negative numbers, or subtracting a negative number from
 298       // a non-negative one, can't wrap into negative.
 299       if (LHSKnown.isNonNegative() && Known2.isNonNegative())
 300         KnownOut.makeNonNegative();
 301       // Adding two negative numbers, or subtracting a non-negative number from
 302       // a negative one, can't wrap into non-negative.
 303       else if (LHSKnown.isNegative() && Known2.isNegative())
 304         KnownOut.makeNegative();
 305     }
 306   }
 307 }
 308
 309 static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
 310                                 KnownBits &Known, KnownBits &Known2,
 311                                 unsigned Depth, const Query &Q) {
 312   unsigned BitWidth = Known.getBitWidth();
 313   computeKnownBits(Op1, Known, Depth + 1, Q);
 314   computeKnownBits(Op0, Known2, Depth + 1, Q);
 315
 316   bool isKnownNegative = false;
 317   bool isKnownNonNegative = false;
 318   // If the multiplication is known not to overflow, compute the sign bit.
 319   if (NSW) {
 320     if (Op0 == Op1) {
 321       // The product of a number with itself is non-negative.
 322       isKnownNonNegative = true;
 323     } else {
 324       bool isKnownNonNegativeOp1 = Known.isNonNegative();
 325       bool isKnownNonNegativeOp0 = Known2.isNonNegative();
 326       bool isKnownNegativeOp1 = Known.isNegative();
 327       bool isKnownNegativeOp0 = Known2.isNegative();
 328       // The product of two numbers with the same sign is non-negative.
 329       isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) ||
 330         (isKnownNonNegativeOp1 && isKnownNonNegativeOp0);
 331       // The product of a negative number and a non-negative number is either
 332       // negative or zero.
 333       if (!isKnownNonNegative)
 334         isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 &&
 335                            isKnownNonZero(Op0, Depth, Q)) ||
 336                           (isKnownNegativeOp0 && isKnownNonNegativeOp1 &&
 337                            isKnownNonZero(Op1, Depth, Q));
 338     }
 339   }
 340
 341   // If low bits are zero in either operand, output low known-0 bits.
 342   // Also compute a conservative estimate for high known-0 bits.
 343   // More trickiness is possible, but this is sufficient for the
 344   // interesting case of alignment computation.
 345   unsigned TrailZ = Known.countMinTrailingZeros() +
 346                     Known2.countMinTrailingZeros();
 347   unsigned LeadZ =  std::max(Known.countMinLeadingZeros() +
 348                              Known2.countMinLeadingZeros(),
 349                              BitWidth) - BitWidth;
 350
 351   TrailZ = std::min(TrailZ, BitWidth);
 352   LeadZ = std::min(LeadZ, BitWidth);
 353   Known.resetAll();
 354   Known.Zero.setLowBits(TrailZ);
 355   Known.Zero.setHighBits(LeadZ);
 356
 357   // Only make use of no-wrap flags if we failed to compute the sign bit
 358   // directly.  This matters if the multiplication always overflows, in
 359   // which case we prefer to follow the result of the direct computation,
 360   // though as the program is invoking undefined behaviour we can choose
 361   // whatever we like here.
 362   if (isKnownNonNegative && !Known.isNegative())
 363     Known.makeNonNegative();
 364   else if (isKnownNegative && !Known.isNonNegative())
 365     Known.makeNegative();
 366 }
 367
 368 void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
 369                                              KnownBits &Known) {
 370   unsigned BitWidth = Known.getBitWidth();
 371   unsigned NumRanges = Ranges.getNumOperands() / 2;
 372   assert(NumRanges >= 1);
 373
 374   Known.Zero.setAllBits();
 375   Known.One.setAllBits();
 376
 377   for (unsigned i = 0; i < NumRanges; ++i) {
 378     ConstantInt *Lower =
 379         mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));
 380     ConstantInt *Upper =
 381         mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));
 382     ConstantRange Range(Lower->getValue(), Upper->getValue());
 383
 384     // The first CommonPrefixBits of all values in Range are equal.
 385     unsigned CommonPrefixBits =
 386         (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countLeadingZeros();
 387
 388     APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits);
 389     Known.One &= Range.getUnsignedMax() & Mask;
 390     Known.Zero &= ~Range.getUnsignedMax() & Mask;
 391   }
 392 }
 393
 394 static bool isEphemeralValueOf(const Instruction *I, const Value *E) {
 395   SmallVector<const Value *, 16> WorkSet(1, I);
 396   SmallPtrSet<const Value *, 32> Visited;
 397   SmallPtrSet<const Value *, 16> EphValues;
 398
 399   // The instruction defining an assumption's condition itself is always
 400   // considered ephemeral to that assumption (even if it has other
 401   // non-ephemeral users). See r246696's test case for an example.
 402   if (is_contained(I->operands(), E))
 403     return true;
 404
 405   while (!WorkSet.empty()) {
 406     const Value *V = WorkSet.pop_back_val();
 407     if (!Visited.insert(V).second)
 408       continue;
 409
 410     // If all uses of this value are ephemeral, then so is this value.
 411     if (all_of(V->users(), [&](const User *U) { return EphValues.count(U); })) {
 412       if (V == E)
 413         return true;
 414
 415       EphValues.insert(V);
 416       if (const User *U = dyn_cast<User>(V))
 417         for (User::const_op_iterator J = U->op_begin(), JE = U->op_end();
 418              J != JE; ++J) {
 419           if (isSafeToSpeculativelyExecute(*J))
 420             WorkSet.push_back(*J);
 421         }
 422     }
 423   }
 424
 425   return false;
 426 }
 427
 428 // Is this an intrinsic that cannot be speculated but also cannot trap?
 429 static bool isAssumeLikeIntrinsic(const Instruction *I) {
 430   if (const CallInst *CI = dyn_cast<CallInst>(I))
 431     if (Function *F = CI->getCalledFunction())
 432       switch (F->getIntrinsicID()) {
 433       default: break;
 434       // FIXME: This list is repeated from NoTTI::getIntrinsicCost.
 435       case Intrinsic::assume:
 436       case Intrinsic::dbg_declare:
 437       case Intrinsic::dbg_value:
 438       case Intrinsic::invariant_start:
 439       case Intrinsic::invariant_end:
 440       case Intrinsic::lifetime_start:
 441       case Intrinsic::lifetime_end:
 442       case Intrinsic::objectsize:
 443       case Intrinsic::ptr_annotation:
 444       case Intrinsic::var_annotation:
 445         return true;
 446       }
 447
 448   return false;
 449 }
 450
 451 bool llvm::isValidAssumeForContext(const Instruction *Inv,
 452                                    const Instruction *CxtI,
 453                                    const DominatorTree *DT) {
 454
 455   // There are two restrictions on the use of an assume:
 456   //  1. The assume must dominate the context (or the control flow must
 457   //     reach the assume whenever it reaches the context).
 458   //  2. The context must not be in the assume's set of ephemeral values
 459   //     (otherwise we will use the assume to prove that the condition
 460   //     feeding the assume is trivially true, thus causing the removal of
 461   //     the assume).
 462
 463   if (DT) {
 464     if (DT->dominates(Inv, CxtI))
 465       return true;
 466   } else if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor()) {
 467     // We don't have a DT, but this trivially dominates.
 468     return true;
 469   }
 470
 471   // With or without a DT, the only remaining case we will check is if the
 472   // instructions are in the same BB.  Give up if that is not the case.
 473   if (Inv->getParent() != CxtI->getParent())
 474     return false;
 475
 476   // If we have a dom tree, then we now know that the assume doens't dominate
 477   // the other instruction.  If we don't have a dom tree then we can check if
 478   // the assume is first in the BB.
 479   if (!DT) {
 480     // Search forward from the assume until we reach the context (or the end
 481     // of the block); the common case is that the assume will come first.
 482     for (auto I = std::next(BasicBlock::const_iterator(Inv)),
 483          IE = Inv->getParent()->end(); I != IE; ++I)
 484       if (&*I == CxtI)
 485         return true;
 486   }
 487
 488   // The context comes first, but they're both in the same block. Make sure
 489   // there is nothing in between that might interrupt the control flow.
 490   for (BasicBlock::const_iterator I =
 491          std::next(BasicBlock::const_iterator(CxtI)), IE(Inv);
 492        I != IE; ++I)
 493     if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I))
 494       return false;
 495
 496   return !isEphemeralValueOf(Inv, CxtI);
 497 }
 498
 499 static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
 500                                        unsigned Depth, const Query &Q) {
 501   // Use of assumptions is context-sensitive. If we don't have a context, we
 502   // cannot use them!
 503   if (!Q.AC || !Q.CxtI)
 504     return;
 505
 506   unsigned BitWidth = Known.getBitWidth();
 507
 508   // Note that the patterns below need to be kept in sync with the code
 509   // in AssumptionCache::updateAffectedValues.
 510
 511   for (auto &AssumeVH : Q.AC->assumptionsFor(V)) {
 512     if (!AssumeVH)
 513       continue;
 514     CallInst *I = cast<CallInst>(AssumeVH);
 515     assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() &&
 516            "Got assumption for the wrong function!");
 517     if (Q.isExcluded(I))
 518       continue;
 519
 520     // Warning: This loop can end up being somewhat performance sensetive.
 521     // We're running this loop for once for each value queried resulting in a
 522     // runtime of ~O(#assumes * #values).
 523
 524     assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
 525            "must be an assume intrinsic");
 526
 527     Value *Arg = I->getArgOperand(0);
 528
 529     if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
 530       assert(BitWidth == 1 && "assume operand is not i1?");
 531       Known.setAllOnes();
 532       return;
 533     }
 534     if (match(Arg, m_Not(m_Specific(V))) &&
 535         isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
 536       assert(BitWidth == 1 && "assume operand is not i1?");
 537       Known.setAllZero();
 538       return;
 539     }
 540
 541     // The remaining tests are all recursive, so bail out if we hit the limit.
 542     if (Depth == MaxDepth)
 543       continue;
 544
 545     Value *A, *B;
 546     auto m_V = m_CombineOr(m_Specific(V),
 547                            m_CombineOr(m_PtrToInt(m_Specific(V)),
 548                            m_BitCast(m_Specific(V))));
 549
 550     CmpInst::Predicate Pred;
 551     ConstantInt *C;
 552     // assume(v = a)
 553     if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) &&
 554         Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
 555       KnownBits RHSKnown(BitWidth);
 556       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 557       Known.Zero |= RHSKnown.Zero;
 558       Known.One  |= RHSKnown.One;
 559     // assume(v & b = a)
 560     } else if (match(Arg,
 561                      m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) &&
 562                Pred == ICmpInst::ICMP_EQ &&
 563                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
 564       KnownBits RHSKnown(BitWidth);
 565       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 566       KnownBits MaskKnown(BitWidth);
 567       computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I));
 568
 569       // For those bits in the mask that are known to be one, we can propagate
 570       // known bits from the RHS to V.
 571       Known.Zero |= RHSKnown.Zero & MaskKnown.One;
 572       Known.One  |= RHSKnown.One  & MaskKnown.One;
 573     // assume(~(v & b) = a)
 574     } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))),
 575                                    m_Value(A))) &&
 576                Pred == ICmpInst::ICMP_EQ &&
 577                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
 578       KnownBits RHSKnown(BitWidth);
 579       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 580       KnownBits MaskKnown(BitWidth);
 581       computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I));
 582
 583       // For those bits in the mask that are known to be one, we can propagate
 584       // inverted known bits from the RHS to V.
 585       Known.Zero |= RHSKnown.One  & MaskKnown.One;
 586       Known.One  |= RHSKnown.Zero & MaskKnown.One;
 587     // assume(v | b = a)
 588     } else if (match(Arg,
 589                      m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) &&
 590                Pred == ICmpInst::ICMP_EQ &&
 591                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
 592       KnownBits RHSKnown(BitWidth);
 593       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 594       KnownBits BKnown(BitWidth);
 595       computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
 596
 597       // For those bits in B that are known to be zero, we can propagate known
 598       // bits from the RHS to V.
 599       Known.Zero |= RHSKnown.Zero & BKnown.Zero;
 600       Known.One  |= RHSKnown.One  & BKnown.Zero;
 601     // assume(~(v | b) = a)
 602     } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))),
 603                                    m_Value(A))) &&
 604                Pred == ICmpInst::ICMP_EQ &&
 605                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
 606       KnownBits RHSKnown(BitWidth);
 607       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 608       KnownBits BKnown(BitWidth);
 609       computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
 610
 611       // For those bits in B that are known to be zero, we can propagate
 612       // inverted known bits from the RHS to V.
 613       Known.Zero |= RHSKnown.One  & BKnown.Zero;
 614       Known.One  |= RHSKnown.Zero & BKnown.Zero;
 615     // assume(v ^ b = a)
 616     } else if (match(Arg,
 617                      m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) &&
 618                Pred == ICmpInst::ICMP_EQ &&
 619                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
 620       KnownBits RHSKnown(BitWidth);
 621       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 622       KnownBits BKnown(BitWidth);
 623       computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
 624
 625       // For those bits in B that are known to be zero, we can propagate known
 626       // bits from the RHS to V. For those bits in B that are known to be one,
 627       // we can propagate inverted known bits from the RHS to V.
 628       Known.Zero |= RHSKnown.Zero & BKnown.Zero;
 629       Known.One  |= RHSKnown.One  & BKnown.Zero;
 630       Known.Zero |= RHSKnown.One  & BKnown.One;
 631       Known.One  |= RHSKnown.Zero & BKnown.One;
 632     // assume(~(v ^ b) = a)
 633     } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))),
 634                                    m_Value(A))) &&
 635                Pred == ICmpInst::ICMP_EQ &&
 636                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
 637       KnownBits RHSKnown(BitWidth);
 638       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 639       KnownBits BKnown(BitWidth);
 640       computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
 641
 642       // For those bits in B that are known to be zero, we can propagate
 643       // inverted known bits from the RHS to V. For those bits in B that are
 644       // known to be one, we can propagate known bits from the RHS to V.
 645       Known.Zero |= RHSKnown.One  & BKnown.Zero;
 646       Known.One  |= RHSKnown.Zero & BKnown.Zero;
 647       Known.Zero |= RHSKnown.Zero & BKnown.One;
 648       Known.One  |= RHSKnown.One  & BKnown.One;
 649     // assume(v << c = a)
 650     } else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
 651                                    m_Value(A))) &&
 652                Pred == ICmpInst::ICMP_EQ &&
 653                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
 654       KnownBits RHSKnown(BitWidth);
 655       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 656       // For those bits in RHS that are known, we can propagate them to known
 657       // bits in V shifted to the right by C.
 658       RHSKnown.Zero.lshrInPlace(C->getZExtValue());
 659       Known.Zero |= RHSKnown.Zero;
 660       RHSKnown.One.lshrInPlace(C->getZExtValue());
 661       Known.One  |= RHSKnown.One;
 662     // assume(~(v << c) = a)
 663     } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
 664                                    m_Value(A))) &&
 665                Pred == ICmpInst::ICMP_EQ &&
 666                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
 667       KnownBits RHSKnown(BitWidth);
 668       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 669       // For those bits in RHS that are known, we can propagate them inverted
 670       // to known bits in V shifted to the right by C.
 671       RHSKnown.One.lshrInPlace(C->getZExtValue());
 672       Known.Zero |= RHSKnown.One;
 673       RHSKnown.Zero.lshrInPlace(C->getZExtValue());
 674       Known.One  |= RHSKnown.Zero;
 675     // assume(v >> c = a)
 676     } else if (match(Arg,
 677                      m_c_ICmp(Pred, m_CombineOr(m_LShr(m_V, m_ConstantInt(C)),
 678                                                 m_AShr(m_V, m_ConstantInt(C))),
 679                               m_Value(A))) &&
 680                Pred == ICmpInst::ICMP_EQ &&
 681                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
 682       KnownBits RHSKnown(BitWidth);
 683       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 684       // For those bits in RHS that are known, we can propagate them to known
 685       // bits in V shifted to the right by C.
 686       Known.Zero |= RHSKnown.Zero << C->getZExtValue();
 687       Known.One  |= RHSKnown.One  << C->getZExtValue();
 688     // assume(~(v >> c) = a)
 689     } else if (match(Arg, m_c_ICmp(Pred, m_Not(m_CombineOr(
 690                                              m_LShr(m_V, m_ConstantInt(C)),
 691                                              m_AShr(m_V, m_ConstantInt(C)))),
 692                                    m_Value(A))) &&
 693                Pred == ICmpInst::ICMP_EQ &&
 694                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
 695       KnownBits RHSKnown(BitWidth);
 696       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 697       // For those bits in RHS that are known, we can propagate them inverted
 698       // to known bits in V shifted to the right by C.
 699       Known.Zero |= RHSKnown.One  << C->getZExtValue();
 700       Known.One  |= RHSKnown.Zero << C->getZExtValue();
 701     // assume(v >=_s c) where c is non-negative
 702     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
 703                Pred == ICmpInst::ICMP_SGE &&
 704                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
 705       KnownBits RHSKnown(BitWidth);
 706       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 707
 708       if (RHSKnown.isNonNegative()) {
 709         // We know that the sign bit is zero.
 710         Known.makeNonNegative();
 711       }
 712     // assume(v >_s c) where c is at least -1.
 713     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
 714                Pred == ICmpInst::ICMP_SGT &&
 715                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
 716       KnownBits RHSKnown(BitWidth);
 717       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 718
 719       if (RHSKnown.isAllOnes() || RHSKnown.isNonNegative()) {
 720         // We know that the sign bit is zero.
 721         Known.makeNonNegative();
 722       }
 723     // assume(v <=_s c) where c is negative
 724     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
 725                Pred == ICmpInst::ICMP_SLE &&
 726                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
 727       KnownBits RHSKnown(BitWidth);
 728       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 729
 730       if (RHSKnown.isNegative()) {
 731         // We know that the sign bit is one.
 732         Known.makeNegative();
 733       }
 734     // assume(v <_s c) where c is non-positive
 735     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
 736                Pred == ICmpInst::ICMP_SLT &&
 737                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
 738       KnownBits RHSKnown(BitWidth);
 739       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 740
 741       if (RHSKnown.isZero() || RHSKnown.isNegative()) {
 742         // We know that the sign bit is one.
 743         Known.makeNegative();
 744       }
 745     // assume(v <=_u c)
 746     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
 747                Pred == ICmpInst::ICMP_ULE &&
 748                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
 749       KnownBits RHSKnown(BitWidth);
 750       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 751
 752       // Whatever high bits in c are zero are known to be zero.
 753       Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros());
 754       // assume(v <_u c)
 755     } else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
 756                Pred == ICmpInst::ICMP_ULT &&
 757                isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
 758       KnownBits RHSKnown(BitWidth);
 759       computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
 760
 761       // Whatever high bits in c are zero are known to be zero (if c is a power
 762       // of 2, then one more).
 763       if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I)))
 764         Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros() + 1);
 765       else
 766         Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros());
 767     }
 768   }
 769
 770   // If assumptions conflict with each other or previous known bits, then we
 771   // have a logical fallacy. It's possible that the assumption is not reachable,
 772   // so this isn't a real bug. On the other hand, the program may have undefined
 773   // behavior, or we might have a bug in the compiler. We can't assert/crash, so
 774   // clear out the known bits, try to warn the user, and hope for the best.
 775   if (Known.Zero.intersects(Known.One)) {
 776     Known.resetAll();
 777
 778     if (Q.ORE) {
 779       auto *CxtI = const_cast<Instruction *>(Q.CxtI);
 780       OptimizationRemarkAnalysis ORA("value-tracking", "BadAssumption", CxtI);
 781       Q.ORE->emit(ORA << "Detected conflicting code assumptions. Program may "
 782                          "have undefined behavior, or compiler may have "
 783                          "internal error.");
 784     }
 785   }
 786 }
 787
 788 // Compute known bits from a shift operator, including those with a
 789 // non-constant shift amount. Known is the outputs of this function. Known2 is a
 790 // pre-allocated temporary with the/ same bit width as Known. KZF and KOF are
 791 // operator-specific functors that, given the known-zero or known-one bits
 792 // respectively, and a shift amount, compute the implied known-zero or known-one
 793 // bits of the shift operator's result respectively for that shift amount. The
 794 // results from calling KZF and KOF are conservatively combined for all
 795 // permitted shift amounts.
 796 static void computeKnownBitsFromShiftOperator(
 797     const Operator *I, KnownBits &Known, KnownBits &Known2,
 798     unsigned Depth, const Query &Q,
 799     function_ref<APInt(const APInt &, unsigned)> KZF,
 800     function_ref<APInt(const APInt &, unsigned)> KOF) {
 801   unsigned BitWidth = Known.getBitWidth();
 802
 803   if (auto *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
 804     unsigned ShiftAmt = SA->getLimitedValue(BitWidth-1);
 805
 806     computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
 807     Known.Zero = KZF(Known.Zero, ShiftAmt);
 808     Known.One  = KOF(Known.One, ShiftAmt);
 809     // If there is conflict between Known.Zero and Known.One, this must be an
 810     // overflowing left shift, so the shift result is undefined. Clear Known
 811     // bits so that other code could propagate this undef.
 812     if ((Known.Zero & Known.One) != 0)
 813       Known.resetAll();
 814
 815     return;
 816   }
 817
 818   computeKnownBits(I->getOperand(1), Known, Depth + 1, Q);
 819
 820   // If the shift amount could be greater than or equal to the bit-width of the LHS, the
 821   // value could be undef, so we don't know anything about it.
 822   if ((~Known.Zero).uge(BitWidth)) {
 823     Known.resetAll();
 824     return;
 825   }
 826
 827   // Note: We cannot use Known.Zero.getLimitedValue() here, because if
 828   // BitWidth > 64 and any upper bits are known, we'll end up returning the
 829   // limit value (which implies all bits are known).
 830   uint64_t ShiftAmtKZ = Known.Zero.zextOrTrunc(64).getZExtValue();
 831   uint64_t ShiftAmtKO = Known.One.zextOrTrunc(64).getZExtValue();
 832
 833   // It would be more-clearly correct to use the two temporaries for this
 834   // calculation. Reusing the APInts here to prevent unnecessary allocations.
 835   Known.resetAll();
 836
 837   // If we know the shifter operand is nonzero, we can sometimes infer more
 838   // known bits. However this is expensive to compute, so be lazy about it and
 839   // only compute it when absolutely necessary.
 840   Optional<bool> ShifterOperandIsNonZero;
 841
 842   // Early exit if we can't constrain any well-defined shift amount.
 843   if (!(ShiftAmtKZ & (BitWidth - 1)) && !(ShiftAmtKO & (BitWidth - 1))) {
 844     ShifterOperandIsNonZero =
 845         isKnownNonZero(I->getOperand(1), Depth + 1, Q);
 846     if (!*ShifterOperandIsNonZero)
 847       return;
 848   }
 849
 850   computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
 851
 852   Known.Zero.setAllBits();
 853   Known.One.setAllBits();
 854   for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; ++ShiftAmt) {
 855     // Combine the shifted known input bits only for those shift amounts
 856     // compatible with its known constraints.
 857     if ((ShiftAmt & ~ShiftAmtKZ) != ShiftAmt)
 858       continue;
 859     if ((ShiftAmt | ShiftAmtKO) != ShiftAmt)
 860       continue;
 861     // If we know the shifter is nonzero, we may be able to infer more known
 862     // bits. This check is sunk down as far as possible to avoid the expensive
 863     // call to isKnownNonZero if the cheaper checks above fail.
 864     if (ShiftAmt == 0) {
 865       if (!ShifterOperandIsNonZero.hasValue())
 866         ShifterOperandIsNonZero =
 867             isKnownNonZero(I->getOperand(1), Depth + 1, Q);
 868       if (*ShifterOperandIsNonZero)
 869         continue;
 870     }
 871
 872     Known.Zero &= KZF(Known2.Zero, ShiftAmt);
 873     Known.One  &= KOF(Known2.One, ShiftAmt);
 874   }
 875
 876   // If there are no compatible shift amounts, then we've proven that the shift
 877   // amount must be >= the BitWidth, and the result is undefined. We could
 878   // return anything we'd like, but we need to make sure the sets of known bits
 879   // stay disjoint (it should be better for some other code to actually
 880   // propagate the undef than to pick a value here using known bits).
 881   if (Known.Zero.intersects(Known.One))
 882     Known.resetAll();
 883 }
 884
 885 static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
 886                                          unsigned Depth, const Query &Q) {
 887   unsigned BitWidth = Known.getBitWidth();
 888
 889   KnownBits Known2(Known);
 890   switch (I->getOpcode()) {
 891   default: break;
 892   case Instruction::Load:
 893     if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range))
 894       computeKnownBitsFromRangeMetadata(*MD, Known);
 895     break;
 896   case Instruction::And: {
 897     // If either the LHS or the RHS are Zero, the result is zero.
 898     computeKnownBits(I->getOperand(1), Known, Depth + 1, Q);
 899     computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
 900
 901     // Output known-1 bits are only known if set in both the LHS & RHS.
 902     Known.One &= Known2.One;
 903     // Output known-0 are known to be clear if zero in either the LHS | RHS.
 904     Known.Zero |= Known2.Zero;
 905
 906     // and(x, add (x, -1)) is a common idiom that always clears the low bit;
 907     // here we handle the more general case of adding any odd number by
 908     // matching the form add(x, add(x, y)) where y is odd.
 909     // TODO: This could be generalized to clearing any bit set in y where the
 910     // following bit is known to be unset in y.
 911     Value *Y = nullptr;
 912     if (!Known.Zero[0] && !Known.One[0] &&
 913         (match(I->getOperand(0), m_Add(m_Specific(I->getOperand(1)),
 914                                        m_Value(Y))) ||
 915          match(I->getOperand(1), m_Add(m_Specific(I->getOperand(0)),
 916                                        m_Value(Y))))) {
 917       Known2.resetAll();
 918       computeKnownBits(Y, Known2, Depth + 1, Q);
 919       if (Known2.countMinTrailingOnes() > 0)
 920         Known.Zero.setBit(0);
 921     }
 922     break;
 923   }
 924   case Instruction::Or: {
 925     computeKnownBits(I->getOperand(1), Known, Depth + 1, Q);
 926     computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
 927
 928     // Output known-0 bits are only known if clear in both the LHS & RHS.
 929     Known.Zero &= Known2.Zero;
 930     // Output known-1 are known to be set if set in either the LHS | RHS.
 931     Known.One |= Known2.One;
 932     break;
 933   }
 934   case Instruction::Xor: {
 935     computeKnownBits(I->getOperand(1), Known, Depth + 1, Q);
 936     computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
 937
 938     // Output known-0 bits are known if clear or set in both the LHS & RHS.
 939     APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
 940     // Output known-1 are known to be set if set in only one of the LHS, RHS.
 941     Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
 942     Known.Zero = std::move(KnownZeroOut);
 943     break;
 944   }
 945   case Instruction::Mul: {
 946     bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
 947     computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, Known,
 948                         Known2, Depth, Q);
 949     break;
 950   }
 951   case Instruction::UDiv: {
 952     // For the purposes of computing leading zeros we can conservatively
 953     // treat a udiv as a logical right shift by the power of 2 known to
 954     // be less than the denominator.
 955     computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
 956     unsigned LeadZ = Known2.countMinLeadingZeros();
 957
 958     Known2.resetAll();
 959     computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
 960     unsigned RHSMaxLeadingZeros = Known2.countMaxLeadingZeros();
 961     if (RHSMaxLeadingZeros != BitWidth)
 962       LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1);
 963
 964     Known.Zero.setHighBits(LeadZ);
 965     break;
 966   }
 967   case Instruction::Select: {
 968     const Value *LHS, *RHS;
 969     SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor;
 970     if (SelectPatternResult::isMinOrMax(SPF)) {
 971       computeKnownBits(RHS, Known, Depth + 1, Q);
 972       computeKnownBits(LHS, Known2, Depth + 1, Q);
 973     } else {
 974       computeKnownBits(I->getOperand(2), Known, Depth + 1, Q);
 975       computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
 976     }
 977
 978     unsigned MaxHighOnes = 0;
 979     unsigned MaxHighZeros = 0;
 980     if (SPF == SPF_SMAX) {
 981       // If both sides are negative, the result is negative.
 982       if (Known.isNegative() && Known2.isNegative())
 983         // We can derive a lower bound on the result by taking the max of the
 984         // leading one bits.
 985         MaxHighOnes =
 986             std::max(Known.countMinLeadingOnes(), Known2.countMinLeadingOnes());
 987       // If either side is non-negative, the result is non-negative.
 988       else if (Known.isNonNegative() || Known2.isNonNegative())
 989         MaxHighZeros = 1;
 990     } else if (SPF == SPF_SMIN) {
 991       // If both sides are non-negative, the result is non-negative.
 992       if (Known.isNonNegative() && Known2.isNonNegative())
 993         // We can derive an upper bound on the result by taking the max of the
 994         // leading zero bits.
 995         MaxHighZeros = std::max(Known.countMinLeadingZeros(),
 996                                 Known2.countMinLeadingZeros());
 997       // If either side is negative, the result is negative.
 998       else if (Known.isNegative() || Known2.isNegative())
 999         MaxHighOnes = 1;
1000     } else if (SPF == SPF_UMAX) {
1001       // We can derive a lower bound on the result by taking the max of the
1002       // leading one bits.
1003       MaxHighOnes =
1004           std::max(Known.countMinLeadingOnes(), Known2.countMinLeadingOnes());
1005     } else if (SPF == SPF_UMIN) {
1006       // We can derive an upper bound on the result by taking the max of the
1007       // leading zero bits.
1008       MaxHighZeros =
1009           std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros());
1010     }
1011
1012     // Only known if known in both the LHS and RHS.
1013     Known.One &= Known2.One;
1014     Known.Zero &= Known2.Zero;
1015     if (MaxHighOnes > 0)
1016       Known.One.setHighBits(MaxHighOnes);
1017     if (MaxHighZeros > 0)
1018       Known.Zero.setHighBits(MaxHighZeros);
1019     break;
1020   }
1021   case Instruction::FPTrunc:
1022   case Instruction::FPExt:
1023   case Instruction::FPToUI:
1024   case Instruction::FPToSI:
1025   case Instruction::SIToFP:
1026   case Instruction::UIToFP:
1027     break; // Can't work with floating point.
1028   case Instruction::PtrToInt:
1029   case Instruction::IntToPtr:
1030     // Fall through and handle them the same as zext/trunc.
1031     LLVM_FALLTHROUGH;
1032   case Instruction::ZExt:
1033   case Instruction::Trunc: {
1034     Type *SrcTy = I->getOperand(0)->getType();
1035
1036     unsigned SrcBitWidth;
1037     // Note that we handle pointer operands here because of inttoptr/ptrtoint
1038     // which fall through here.
1039     SrcBitWidth = Q.DL.getTypeSizeInBits(SrcTy->getScalarType());
1040
1041     assert(SrcBitWidth && "SrcBitWidth can't be zero");
1042     Known = Known.zextOrTrunc(SrcBitWidth);
1043     computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1044     Known = Known.zextOrTrunc(BitWidth);
1045     // Any top bits are known to be zero.
1046     if (BitWidth > SrcBitWidth)
1047       Known.Zero.setBitsFrom(SrcBitWidth);
1048     break;
1049   }
1050   case Instruction::BitCast: {
1051     Type *SrcTy = I->getOperand(0)->getType();
1052     if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
1053         // TODO: For now, not handling conversions like:
1054         // (bitcast i64 %x to <2 x i32>)
1055         !I->getType()->isVectorTy()) {
1056       computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1057       break;
1058     }
1059     break;
1060   }
1061   case Instruction::SExt: {
1062     // Compute the bits in the result that are not present in the input.
1063     unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
1064
1065     Known = Known.trunc(SrcBitWidth);
1066     computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1067     // If the sign bit of the input is known set or clear, then we know the
1068     // top bits of the result.
1069     Known = Known.sext(BitWidth);
1070     break;
1071   }
1072   case Instruction::Shl: {
1073     // (shl X, C1) & C2 == 0   iff   (X & C2 >>u C1) == 0
1074     bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
1075     auto KZF = [NSW](const APInt &KnownZero, unsigned ShiftAmt) {
1076       APInt KZResult = KnownZero << ShiftAmt;
1077       KZResult.setLowBits(ShiftAmt); // Low bits known 0.
1078       // If this shift has "nsw" keyword, then the result is either a poison
1079       // value or has the same sign bit as the first operand.
1080       if (NSW && KnownZero.isSignBitSet())
1081         KZResult.setSignBit();
1082       return KZResult;
1083     };
1084
1085     auto KOF = [NSW](const APInt &KnownOne, unsigned ShiftAmt) {
1086       APInt KOResult = KnownOne << ShiftAmt;
1087       if (NSW && KnownOne.isSignBitSet())
1088         KOResult.setSignBit();
1089       return KOResult;
1090     };
1091
1092     computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF);
1093     break;
1094   }
1095   case Instruction::LShr: {
1096     // (ushr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
1097     auto KZF = [](const APInt &KnownZero, unsigned ShiftAmt) {
1098       APInt KZResult = KnownZero.lshr(ShiftAmt);
1099       // High bits known zero.
1100       KZResult.setHighBits(ShiftAmt);
1101       return KZResult;
1102     };
1103
1104     auto KOF = [](const APInt &KnownOne, unsigned ShiftAmt) {
1105       return KnownOne.lshr(ShiftAmt);
1106     };
1107
1108     computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF);
1109     break;
1110   }
1111   case Instruction::AShr: {
1112     // (ashr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
1113     auto KZF = [](const APInt &KnownZero, unsigned ShiftAmt) {
1114       return KnownZero.ashr(ShiftAmt);
1115     };
1116
1117     auto KOF = [](const APInt &KnownOne, unsigned ShiftAmt) {
1118       return KnownOne.ashr(ShiftAmt);
1119     };
1120
1121     computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF);
1122     break;
1123   }
1124   case Instruction::Sub: {
1125     bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
1126     computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
1127                            Known, Known2, Depth, Q);
1128     break;
1129   }
1130   case Instruction::Add: {
1131     bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
1132     computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW,
1133                            Known, Known2, Depth, Q);
1134     break;
1135   }
1136   case Instruction::SRem:
1137     if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
1138       APInt RA = Rem->getValue().abs();
1139       if (RA.isPowerOf2()) {
1140         APInt LowBits = RA - 1;
1141         computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1142
1143         // The low bits of the first operand are unchanged by the srem.
1144         Known.Zero = Known2.Zero & LowBits;
1145         Known.One = Known2.One & LowBits;
1146
1147         // If the first operand is non-negative or has all low bits zero, then
1148         // the upper bits are all zero.
1149         if (Known2.isNonNegative() || LowBits.isSubsetOf(Known2.Zero))
1150           Known.Zero |= ~LowBits;
1151
1152         // If the first operand is negative and not all low bits are zero, then
1153         // the upper bits are all one.
1154         if (Known2.isNegative() && LowBits.intersects(Known2.One))
1155           Known.One |= ~LowBits;
1156
1157         assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
1158         break;
1159       }
1160     }
1161
1162     // The sign bit is the LHS's sign bit, except when the result of the
1163     // remainder is zero.
1164     computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1165     // If it's known zero, our sign bit is also zero.
1166     if (Known2.isNonNegative())
1167       Known.makeNonNegative();
1168
1169     break;
1170   case Instruction::URem: {
1171     if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
1172       const APInt &RA = Rem->getValue();
1173       if (RA.isPowerOf2()) {
1174         APInt LowBits = (RA - 1);
1175         computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1176         Known.Zero |= ~LowBits;
1177         Known.One &= LowBits;
1178         break;
1179       }
1180     }
1181
1182     // Since the result is less than or equal to either operand, any leading
1183     // zero bits in either operand must also exist in the result.
1184     computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1185     computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1186
1187     unsigned Leaders =
1188         std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros());
1189     Known.resetAll();
1190     Known.Zero.setHighBits(Leaders);
1191     break;
1192   }
1193
1194   case Instruction::Alloca: {
1195     const AllocaInst *AI = cast<AllocaInst>(I);
1196     unsigned Align = AI->getAlignment();
1197     if (Align == 0)
1198       Align = Q.DL.getABITypeAlignment(AI->getAllocatedType());
1199
1200     if (Align > 0)
1201       Known.Zero.setLowBits(countTrailingZeros(Align));
1202     break;
1203   }
1204   case Instruction::GetElementPtr: {
1205     // Analyze all of the subscripts of this getelementptr instruction
1206     // to determine if we can prove known low zero bits.
1207     KnownBits LocalKnown(BitWidth);
1208     computeKnownBits(I->getOperand(0), LocalKnown, Depth + 1, Q);
1209     unsigned TrailZ = LocalKnown.countMinTrailingZeros();
1210
1211     gep_type_iterator GTI = gep_type_begin(I);
1212     for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
1213       Value *Index = I->getOperand(i);
1214       if (StructType *STy = GTI.getStructTypeOrNull()) {
1215         // Handle struct member offset arithmetic.
1216
1217         // Handle case when index is vector zeroinitializer
1218         Constant *CIndex = cast<Constant>(Index);
1219         if (CIndex->isZeroValue())
1220           continue;
1221
1222         if (CIndex->getType()->isVectorTy())
1223           Index = CIndex->getSplatValue();
1224
1225         unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
1226         const StructLayout *SL = Q.DL.getStructLayout(STy);
1227         uint64_t Offset = SL->getElementOffset(Idx);
1228         TrailZ = std::min<unsigned>(TrailZ,
1229                                     countTrailingZeros(Offset));
1230       } else {
1231         // Handle array index arithmetic.
1232         Type *IndexedTy = GTI.getIndexedType();
1233         if (!IndexedTy->isSized()) {
1234           TrailZ = 0;
1235           break;
1236         }
1237         unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits();
1238         uint64_t TypeSize = Q.DL.getTypeAllocSize(IndexedTy);
1239         LocalKnown.Zero = LocalKnown.One = APInt(GEPOpiBits, 0);
1240         computeKnownBits(Index, LocalKnown, Depth + 1, Q);
1241         TrailZ = std::min(TrailZ,
1242                           unsigned(countTrailingZeros(TypeSize) +
1243                                    LocalKnown.countMinTrailingZeros()));
1244       }
1245     }
1246
1247     Known.Zero.setLowBits(TrailZ);
1248     break;
1249   }
1250   case Instruction::PHI: {
1251     const PHINode *P = cast<PHINode>(I);
1252     // Handle the case of a simple two-predecessor recurrence PHI.
1253     // There's a lot more that could theoretically be done here, but
1254     // this is sufficient to catch some interesting cases.
1255     if (P->getNumIncomingValues() == 2) {
1256       for (unsigned i = 0; i != 2; ++i) {
1257         Value *L = P->getIncomingValue(i);
1258         Value *R = P->getIncomingValue(!i);
1259         Operator *LU = dyn_cast<Operator>(L);
1260         if (!LU)
1261           continue;
1262         unsigned Opcode = LU->getOpcode();
1263         // Check for operations that have the property that if
1264         // both their operands have low zero bits, the result
1265         // will have low zero bits.
1266         if (Opcode == Instruction::Add ||
1267             Opcode == Instruction::Sub ||
1268             Opcode == Instruction::And ||
1269             Opcode == Instruction::Or ||
1270             Opcode == Instruction::Mul) {
1271           Value *LL = LU->getOperand(0);
1272           Value *LR = LU->getOperand(1);
1273           // Find a recurrence.
1274           if (LL == I)
1275             L = LR;
1276           else if (LR == I)
1277             L = LL;
1278           else
1279             break;
1280           // Ok, we have a PHI of the form L op= R. Check for low
1281           // zero bits.
1282           computeKnownBits(R, Known2, Depth + 1, Q);
1283
1284           // We need to take the minimum number of known bits
1285           KnownBits Known3(Known);
1286           computeKnownBits(L, Known3, Depth + 1, Q);
1287
1288           Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(),
1289                                          Known3.countMinTrailingZeros()));
1290
1291           if (DontImproveNonNegativePhiBits)
1292             break;
1293
1294           auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(LU);
1295           if (OverflowOp && OverflowOp->hasNoSignedWrap()) {
1296             // If initial value of recurrence is nonnegative, and we are adding
1297             // a nonnegative number with nsw, the result can only be nonnegative
1298             // or poison value regardless of the number of times we execute the
1299             // add in phi recurrence. If initial value is negative and we are
1300             // adding a negative number with nsw, the result can only be
1301             // negative or poison value. Similar arguments apply to sub and mul.
1302             //
1303             // (add non-negative, non-negative) --> non-negative
1304             // (add negative, negative) --> negative
1305             if (Opcode == Instruction::Add) {
1306               if (Known2.isNonNegative() && Known3.isNonNegative())
1307                 Known.makeNonNegative();
1308               else if (Known2.isNegative() && Known3.isNegative())
1309                 Known.makeNegative();
1310             }
1311
1312             // (sub nsw non-negative, negative) --> non-negative
1313             // (sub nsw negative, non-negative) --> negative
1314             else if (Opcode == Instruction::Sub && LL == I) {
1315               if (Known2.isNonNegative() && Known3.isNegative())
1316                 Known.makeNonNegative();
1317               else if (Known2.isNegative() && Known3.isNonNegative())
1318                 Known.makeNegative();
1319             }
1320
1321             // (mul nsw non-negative, non-negative) --> non-negative
1322             else if (Opcode == Instruction::Mul && Known2.isNonNegative() &&
1323                      Known3.isNonNegative())
1324               Known.makeNonNegative();
1325           }
1326
1327           break;
1328         }
1329       }
1330     }
1331
1332     // Unreachable blocks may have zero-operand PHI nodes.
1333     if (P->getNumIncomingValues() == 0)
1334       break;
1335
1336     // Otherwise take the unions of the known bit sets of the operands,
1337     // taking conservative care to avoid excessive recursion.
1338     if (Depth < MaxDepth - 1 && !Known.Zero && !Known.One) {
1339       // Skip if every incoming value references to ourself.
1340       if (dyn_cast_or_null<UndefValue>(P->hasConstantValue()))
1341         break;
1342
1343       Known.Zero.setAllBits();
1344       Known.One.setAllBits();
1345       for (Value *IncValue : P->incoming_values()) {
1346         // Skip direct self references.
1347         if (IncValue == P) continue;
1348
1349         Known2 = KnownBits(BitWidth);
1350         // Recurse, but cap the recursion to one level, because we don't
1351         // want to waste time spinning around in loops.
1352         computeKnownBits(IncValue, Known2, MaxDepth - 1, Q);
1353         Known.Zero &= Known2.Zero;
1354         Known.One &= Known2.One;
1355         // If all bits have been ruled out, there's no need to check
1356         // more operands.
1357         if (!Known.Zero && !Known.One)
1358           break;
1359       }
1360     }
1361     break;
1362   }
1363   case Instruction::Call:
1364   case Instruction::Invoke:
1365     // If range metadata is attached to this call, set known bits from that,
1366     // and then intersect with known bits based on other properties of the
1367     // function.
1368     if (MDNode *MD = cast<Instruction>(I)->getMetadata(LLVMContext::MD_range))
1369       computeKnownBitsFromRangeMetadata(*MD, Known);
1370     if (const Value *RV = ImmutableCallSite(I).getReturnedArgOperand()) {
1371       computeKnownBits(RV, Known2, Depth + 1, Q);
1372       Known.Zero |= Known2.Zero;
1373       Known.One |= Known2.One;
1374     }
1375     if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1376       switch (II->getIntrinsicID()) {
1377       default: break;
1378       case Intrinsic::bitreverse:
1379         computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1380         Known.Zero |= Known2.Zero.reverseBits();
1381         Known.One |= Known2.One.reverseBits();
1382         break;
1383       case Intrinsic::bswap:
1384         computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1385         Known.Zero |= Known2.Zero.byteSwap();
1386         Known.One |= Known2.One.byteSwap();
1387         break;
1388       case Intrinsic::ctlz: {
1389         computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1390         // If we have a known 1, its position is our upper bound.
1391         unsigned PossibleLZ = Known2.One.countLeadingZeros();
1392         // If this call is undefined for 0, the result will be less than 2^n.
1393         if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
1394           PossibleLZ = std::min(PossibleLZ, BitWidth - 1);
1395         unsigned LowBits = Log2_32(PossibleLZ)+1;
1396         Known.Zero.setBitsFrom(LowBits);
1397         break;
1398       }
1399       case Intrinsic::cttz: {
1400         computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1401         // If we have a known 1, its position is our upper bound.
1402         unsigned PossibleTZ = Known2.One.countTrailingZeros();
1403         // If this call is undefined for 0, the result will be less than 2^n.
1404         if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
1405           PossibleTZ = std::min(PossibleTZ, BitWidth - 1);
1406         unsigned LowBits = Log2_32(PossibleTZ)+1;
1407         Known.Zero.setBitsFrom(LowBits);
1408         break;
1409       }
1410       case Intrinsic::ctpop: {
1411         computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1412         // We can bound the space the count needs.  Also, bits known to be zero
1413         // can't contribute to the population.
1414         unsigned BitsPossiblySet = Known2.countMaxPopulation();
1415         unsigned LowBits = Log2_32(BitsPossiblySet)+1;
1416         Known.Zero.setBitsFrom(LowBits);
1417         // TODO: we could bound KnownOne using the lower bound on the number
1418         // of bits which might be set provided by popcnt KnownOne2.
1419         break;
1420       }
1421       case Intrinsic::x86_sse42_crc32_64_64:
1422         Known.Zero.setBitsFrom(32);
1423         break;
1424       }
1425     }
1426     break;
1427   case Instruction::ExtractElement:
1428     // Look through extract element. At the moment we keep this simple and skip
1429     // tracking the specific element. But at least we might find information
1430     // valid for all elements of the vector (for example if vector is sign
1431     // extended, shifted, etc).
1432     computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1433     break;
1434   case Instruction::ExtractValue:
1435     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) {
1436       const ExtractValueInst *EVI = cast<ExtractValueInst>(I);
1437       if (EVI->getNumIndices() != 1) break;
1438       if (EVI->getIndices()[0] == 0) {
1439         switch (II->getIntrinsicID()) {
1440         default: break;
1441         case Intrinsic::uadd_with_overflow:
1442         case Intrinsic::sadd_with_overflow:
1443           computeKnownBitsAddSub(true, II->getArgOperand(0),
1444                                  II->getArgOperand(1), false, Known, Known2,
1445                                  Depth, Q);
1446           break;
1447         case Intrinsic::usub_with_overflow:
1448         case Intrinsic::ssub_with_overflow:
1449           computeKnownBitsAddSub(false, II->getArgOperand(0),
1450                                  II->getArgOperand(1), false, Known, Known2,
1451                                  Depth, Q);
1452           break;
1453         case Intrinsic::umul_with_overflow:
1454         case Intrinsic::smul_with_overflow:
1455           computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false,
1456                               Known, Known2, Depth, Q);
1457           break;
1458         }
1459       }
1460     }
1461   }
1462 }
1463
1464 /// Determine which bits of V are known to be either zero or one and return
1465 /// them.
1466 KnownBits computeKnownBits(const Value *V, unsigned Depth, const Query &Q) {
1467   KnownBits Known(getBitWidth(V->getType(), Q.DL));
1468   computeKnownBits(V, Known, Depth, Q);
1469   return Known;
1470 }
1471
1472 /// Determine which bits of V are known to be either zero or one and return
1473 /// them in the Known bit set.
1474 ///
1475 /// NOTE: we cannot consider 'undef' to be "IsZero" here.  The problem is that
1476 /// we cannot optimize based on the assumption that it is zero without changing
1477 /// it to be an explicit zero.  If we don't change it to zero, other code could
1478 /// optimized based on the contradictory assumption that it is non-zero.
1479 /// Because instcombine aggressively folds operations with undef args anyway,
1480 /// this won't lose us code quality.
1481 ///
1482 /// This function is defined on values with integer type, values with pointer
1483 /// type, and vectors of integers.  In the case
1484 /// where V is a vector, known zero, and known one values are the
1485 /// same width as the vector element, and the bit is set only if it is true
1486 /// for all of the elements in the vector.
1487 void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
1488                       const Query &Q) {
1489   assert(V && "No Value?");
1490   assert(Depth <= MaxDepth && "Limit Search Depth");
1491   unsigned BitWidth = Known.getBitWidth();
1492
1493   assert((V->getType()->isIntOrIntVectorTy() ||
1494           V->getType()->getScalarType()->isPointerTy()) &&
1495          "Not integer or pointer type!");
1496   assert((Q.DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
1497          (!V->getType()->isIntOrIntVectorTy() ||
1498           V->getType()->getScalarSizeInBits() == BitWidth) &&
1499          "V and Known should have same BitWidth");
1500   (void)BitWidth;
1501
1502   const APInt *C;
1503   if (match(V, m_APInt(C))) {
1504     // We know all of the bits for a scalar constant or a splat vector constant!
1505     Known.One = *C;
1506     Known.Zero = ~Known.One;
1507     return;
1508   }
1509   // Null and aggregate-zero are all-zeros.
1510   if (isa<ConstantPointerNull>(V) || isa<ConstantAggregateZero>(V)) {
1511     Known.setAllZero();
1512     return;
1513   }
1514   // Handle a constant vector by taking the intersection of the known bits of
1515   // each element.
1516   if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) {
1517     // We know that CDS must be a vector of integers. Take the intersection of
1518     // each element.
1519     Known.Zero.setAllBits(); Known.One.setAllBits();
1520     APInt Elt(BitWidth, 0);
1521     for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
1522       Elt = CDS->getElementAsInteger(i);
1523       Known.Zero &= ~Elt;
1524       Known.One &= Elt;
1525     }
1526     return;
1527   }
1528
1529   if (const auto *CV = dyn_cast<ConstantVector>(V)) {
1530     // We know that CV must be a vector of integers. Take the intersection of
1531     // each element.
1532     Known.Zero.setAllBits(); Known.One.setAllBits();
1533     APInt Elt(BitWidth, 0);
1534     for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
1535       Constant *Element = CV->getAggregateElement(i);
1536       auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
1537       if (!ElementCI) {
1538         Known.resetAll();
1539         return;
1540       }
1541       Elt = ElementCI->getValue();
1542       Known.Zero &= ~Elt;
1543       Known.One &= Elt;
1544     }
1545     return;
1546   }
1547
1548   // Start out not knowing anything.
1549   Known.resetAll();
1550
1551   // We can't imply anything about undefs.
1552   if (isa<UndefValue>(V))
1553     return;
1554
1555   // There's no point in looking through other users of ConstantData for
1556   // assumptions.  Confirm that we've handled them all.
1557   assert(!isa<ConstantData>(V) && "Unhandled constant data!");
1558
1559   // Limit search depth.
1560   // All recursive calls that increase depth must come after this.
1561   if (Depth == MaxDepth)
1562     return;
1563
1564   // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
1565   // the bits of its aliasee.
1566   if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
1567     if (!GA->isInterposable())
1568       computeKnownBits(GA->getAliasee(), Known, Depth + 1, Q);
1569     return;
1570   }
1571
1572   if (const Operator *I = dyn_cast<Operator>(V))
1573     computeKnownBitsFromOperator(I, Known, Depth, Q);
1574
1575   // Aligned pointers have trailing zeros - refine Known.Zero set
1576   if (V->getType()->isPointerTy()) {
1577     unsigned Align = V->getPointerAlignment(Q.DL);
1578     if (Align)
1579       Known.Zero.setLowBits(countTrailingZeros(Align));
1580   }
1581
1582   // computeKnownBitsFromAssume strictly refines Known.
1583   // Therefore, we run them after computeKnownBitsFromOperator.
1584
1585   // Check whether a nearby assume intrinsic can determine some known bits.
1586   computeKnownBitsFromAssume(V, Known, Depth, Q);
1587
1588   assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
1589 }
1590
1591 /// Return true if the given value is known to have exactly one
1592 /// bit set when defined. For vectors return true if every element is known to
1593 /// be a power of two when defined. Supports values with integer or pointer
1594 /// types and vectors of integers.
1595 bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
1596                             const Query &Q) {
1597   if (const Constant *C = dyn_cast<Constant>(V)) {
1598     if (C->isNullValue())
1599       return OrZero;
1600
1601     const APInt *ConstIntOrConstSplatInt;
1602     if (match(C, m_APInt(ConstIntOrConstSplatInt)))
1603       return ConstIntOrConstSplatInt->isPowerOf2();
1604   }
1605
1606   // 1 << X is clearly a power of two if the one is not shifted off the end.  If
1607   // it is shifted off the end then the result is undefined.
1608   if (match(V, m_Shl(m_One(), m_Value())))
1609     return true;
1610
1611   // (signmask) >>l X is clearly a power of two if the one is not shifted off
1612   // the bottom.  If it is shifted off the bottom then the result is undefined.
1613   if (match(V, m_LShr(m_SignMask(), m_Value())))
1614     return true;
1615
1616   // The remaining tests are all recursive, so bail out if we hit the limit.
1617   if (Depth++ == MaxDepth)
1618     return false;
1619
1620   Value *X = nullptr, *Y = nullptr;
1621   // A shift left or a logical shift right of a power of two is a power of two
1622   // or zero.
1623   if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) ||
1624                  match(V, m_LShr(m_Value(X), m_Value()))))
1625     return isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q);
1626
1627   if (const ZExtInst *ZI = dyn_cast<ZExtInst>(V))
1628     return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q);
1629
1630   if (const SelectInst *SI = dyn_cast<SelectInst>(V))
1631     return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q) &&
1632            isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q);
1633
1634   if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) {
1635     // A power of two and'd with anything is a power of two or zero.
1636     if (isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q) ||
1637         isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, Depth, Q))
1638       return true;
1639     // X & (-X) is always a power of two or zero.
1640     if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X))))
1641       return true;
1642     return false;
1643   }
1644
1645   // Adding a power-of-two or zero to the same power-of-two or zero yields
1646   // either the original power-of-two, a larger power-of-two or zero.
1647   if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
1648     const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V);
1649     if (OrZero || VOBO->hasNoUnsignedWrap() || VOBO->hasNoSignedWrap()) {
1650       if (match(X, m_And(m_Specific(Y), m_Value())) ||
1651           match(X, m_And(m_Value(), m_Specific(Y))))
1652         if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q))
1653           return true;
1654       if (match(Y, m_And(m_Specific(X), m_Value())) ||
1655           match(Y, m_And(m_Value(), m_Specific(X))))
1656         if (isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q))
1657           return true;
1658
1659       unsigned BitWidth = V->getType()->getScalarSizeInBits();
1660       KnownBits LHSBits(BitWidth);
1661       computeKnownBits(X, LHSBits, Depth, Q);
1662
1663       KnownBits RHSBits(BitWidth);
1664       computeKnownBits(Y, RHSBits, Depth, Q);
1665       // If i8 V is a power of two or zero:
1666       //  ZeroBits: 1 1 1 0 1 1 1 1
1667       // ~ZeroBits: 0 0 0 1 0 0 0 0
1668       if ((~(LHSBits.Zero & RHSBits.Zero)).isPowerOf2())
1669         // If OrZero isn't set, we cannot give back a zero result.
1670         // Make sure either the LHS or RHS has a bit set.
1671         if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue())
1672           return true;
1673     }
1674   }
1675
1676   // An exact divide or right shift can only shift off zero bits, so the result
1677   // is a power of two only if the first operand is a power of two and not
1678   // copying a sign bit (sdiv int_min, 2).
1679   if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) ||
1680       match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) {
1681     return isKnownToBeAPowerOfTwo(cast<Operator>(V)->getOperand(0), OrZero,
1682                                   Depth, Q);
1683   }
1684
1685   return false;
1686 }
1687
1688 /// \brief Test whether a GEP's result is known to be non-null.
1689 ///
1690 /// Uses properties inherent in a GEP to try to determine whether it is known
1691 /// to be non-null.
1692 ///
1693 /// Currently this routine does not support vector GEPs.
1694 static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth,
1695                               const Query &Q) {
1696   if (!GEP->isInBounds() || GEP->getPointerAddressSpace() != 0)
1697     return false;
1698
1699   // FIXME: Support vector-GEPs.
1700   assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP");
1701
1702   // If the base pointer is non-null, we cannot walk to a null address with an
1703   // inbounds GEP in address space zero.
1704   if (isKnownNonZero(GEP->getPointerOperand(), Depth, Q))
1705     return true;
1706
1707   // Walk the GEP operands and see if any operand introduces a non-zero offset.
1708   // If so, then the GEP cannot produce a null pointer, as doing so would
1709   // inherently violate the inbounds contract within address space zero.
1710   for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
1711        GTI != GTE; ++GTI) {
1712     // Struct types are easy -- they must always be indexed by a constant.
1713     if (StructType *STy = GTI.getStructTypeOrNull()) {
1714       ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand());
1715       unsigned ElementIdx = OpC->getZExtValue();
1716       const StructLayout *SL = Q.DL.getStructLayout(STy);
1717       uint64_t ElementOffset = SL->getElementOffset(ElementIdx);
1718       if (ElementOffset > 0)
1719         return true;
1720       continue;
1721     }
1722
1723     // If we have a zero-sized type, the index doesn't matter. Keep looping.
1724     if (Q.DL.getTypeAllocSize(GTI.getIndexedType()) == 0)
1725       continue;
1726
1727     // Fast path the constant operand case both for efficiency and so we don't
1728     // increment Depth when just zipping down an all-constant GEP.
1729     if (ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand())) {
1730       if (!OpC->isZero())
1731         return true;
1732       continue;
1733     }
1734
1735     // We post-increment Depth here because while isKnownNonZero increments it
1736     // as well, when we pop back up that increment won't persist. We don't want
1737     // to recurse 10k times just because we have 10k GEP operands. We don't
1738     // bail completely out because we want to handle constant GEPs regardless
1739     // of depth.
1740     if (Depth++ >= MaxDepth)
1741       continue;
1742
1743     if (isKnownNonZero(GTI.getOperand(), Depth, Q))
1744       return true;
1745   }
1746
1747   return false;
1748 }
1749
1750 /// Does the 'Range' metadata (which must be a valid MD_range operand list)
1751 /// ensure that the value it's attached to is never Value?  'RangeType' is
1752 /// is the type of the value described by the range.
1753 static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value) {
1754   const unsigned NumRanges = Ranges->getNumOperands() / 2;
1755   assert(NumRanges >= 1);
1756   for (unsigned i = 0; i < NumRanges; ++i) {
1757     ConstantInt *Lower =
1758         mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 0));
1759     ConstantInt *Upper =
1760         mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 1));
1761     ConstantRange Range(Lower->getValue(), Upper->getValue());
1762     if (Range.contains(Value))
1763       return false;
1764   }
1765   return true;
1766 }
1767
1768 /// Return true if the given value is known to be non-zero when defined. For
1769 /// vectors, return true if every element is known to be non-zero when
1770 /// defined. For pointers, if the context instruction and dominator tree are
1771 /// specified, perform context-sensitive analysis and return true if the
1772 /// pointer couldn't possibly be null at the specified instruction.
1773 /// Supports values with integer or pointer type and vectors of integers.
1774 bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
1775   if (auto *C = dyn_cast<Constant>(V)) {
1776     if (C->isNullValue())
1777       return false;
1778     if (isa<ConstantInt>(C))
1779       // Must be non-zero due to null test above.
1780       return true;
1781
1782     // For constant vectors, check that all elements are undefined or known
1783     // non-zero to determine that the whole vector is known non-zero.
1784     if (auto *VecTy = dyn_cast<VectorType>(C->getType())) {
1785       for (unsigned i = 0, e = VecTy->getNumElements(); i != e; ++i) {
1786         Constant *Elt = C->getAggregateElement(i);
1787         if (!Elt || Elt->isNullValue())
1788           return false;
1789         if (!isa<UndefValue>(Elt) && !isa<ConstantInt>(Elt))
1790           return false;
1791       }
1792       return true;
1793     }
1794
1795     return false;
1796   }
1797
1798   if (auto *I = dyn_cast<Instruction>(V)) {
1799     if (MDNode *Ranges = I->getMetadata(LLVMContext::MD_range)) {
1800       // If the possible ranges don't contain zero, then the value is
1801       // definitely non-zero.
1802       if (auto *Ty = dyn_cast<IntegerType>(V->getType())) {
1803         const APInt ZeroValue(Ty->getBitWidth(), 0);
1804         if (rangeMetadataExcludesValue(Ranges, ZeroValue))
1805           return true;
1806       }
1807     }
1808   }
1809
1810   // The remaining tests are all recursive, so bail out if we hit the limit.
1811   if (Depth++ >= MaxDepth)
1812     return false;
1813
1814   // Check for pointer simplifications.
1815   if (V->getType()->isPointerTy()) {
1816     if (isKnownNonNullAt(V, Q.CxtI, Q.DT))
1817       return true;
1818     if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V))
1819       if (isGEPKnownNonNull(GEP, Depth, Q))
1820         return true;
1821   }
1822
1823   unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), Q.DL);
1824
1825   // X | Y != 0 if X != 0 or Y != 0.
1826   Value *X = nullptr, *Y = nullptr;
1827   if (match(V, m_Or(m_Value(X), m_Value(Y))))
1828     return isKnownNonZero(X, Depth, Q) || isKnownNonZero(Y, Depth, Q);
1829
1830   // ext X != 0 if X != 0.
1831   if (isa<SExtInst>(V) || isa<ZExtInst>(V))
1832     return isKnownNonZero(cast<Instruction>(V)->getOperand(0), Depth, Q);
1833
1834   // shl X, Y != 0 if X is odd.  Note that the value of the shift is undefined
1835   // if the lowest bit is shifted off the end.
1836   if (match(V, m_Shl(m_Value(X), m_Value(Y)))) {
1837     // shl nuw can't remove any non-zero bits.
1838     const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
1839     if (BO->hasNoUnsignedWrap())
1840       return isKnownNonZero(X, Depth, Q);
1841
1842     KnownBits Known(BitWidth);
1843     computeKnownBits(X, Known, Depth, Q);
1844     if (Known.One[0])
1845       return true;
1846   }
1847   // shr X, Y != 0 if X is negative.  Note that the value of the shift is not
1848   // defined if the sign bit is shifted off the end.
1849   else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) {
1850     // shr exact can only shift out zero bits.
1851     const PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V);
1852     if (BO->isExact())
1853       return isKnownNonZero(X, Depth, Q);
1854
1855     KnownBits Known = computeKnownBits(X, Depth, Q);
1856     if (Known.isNegative())
1857       return true;
1858
1859     // If the shifter operand is a constant, and all of the bits shifted
1860     // out are known to be zero, and X is known non-zero then at least one
1861     // non-zero bit must remain.
1862     if (ConstantInt *Shift = dyn_cast<ConstantInt>(Y)) {
1863       auto ShiftVal = Shift->getLimitedValue(BitWidth - 1);
1864       // Is there a known one in the portion not shifted out?
1865       if (Known.countMaxLeadingZeros() < BitWidth - ShiftVal)
1866         return true;
1867       // Are all the bits to be shifted out known zero?
1868       if (Known.countMinTrailingZeros() >= ShiftVal)
1869         return isKnownNonZero(X, Depth, Q);
1870     }
1871   }
1872   // div exact can only produce a zero if the dividend is zero.
1873   else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) {
1874     return isKnownNonZero(X, Depth, Q);
1875   }
1876   // X + Y.
1877   else if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
1878     KnownBits XKnown = computeKnownBits(X, Depth, Q);
1879     KnownBits YKnown = computeKnownBits(Y, Depth, Q);
1880
1881     // If X and Y are both non-negative (as signed values) then their sum is not
1882     // zero unless both X and Y are zero.
1883     if (XKnown.isNonNegative() && YKnown.isNonNegative())
1884       if (isKnownNonZero(X, Depth, Q) || isKnownNonZero(Y, Depth, Q))
1885         return true;
1886
1887     // If X and Y are both negative (as signed values) then their sum is not
1888     // zero unless both X and Y equal INT_MIN.
1889     if (XKnown.isNegative() && YKnown.isNegative()) {
1890       APInt Mask = APInt::getSignedMaxValue(BitWidth);
1891       // The sign bit of X is set.  If some other bit is set then X is not equal
1892       // to INT_MIN.
1893       if (XKnown.One.intersects(Mask))
1894         return true;
1895       // The sign bit of Y is set.  If some other bit is set then Y is not equal
1896       // to INT_MIN.
1897       if (YKnown.One.intersects(Mask))
1898         return true;
1899     }
1900
1901     // The sum of a non-negative number and a power of two is not zero.
1902     if (XKnown.isNonNegative() &&
1903         isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q))
1904       return true;
1905     if (YKnown.isNonNegative() &&
1906         isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q))
1907       return true;
1908   }
1909   // X * Y.
1910   else if (match(V, m_Mul(m_Value(X), m_Value(Y)))) {
1911     const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
1912     // If X and Y are non-zero then so is X * Y as long as the multiplication
1913     // does not overflow.
1914     if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) &&
1915         isKnownNonZero(X, Depth, Q) && isKnownNonZero(Y, Depth, Q))
1916       return true;
1917   }
1918   // (C ? X : Y) != 0 if X != 0 and Y != 0.
1919   else if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
1920     if (isKnownNonZero(SI->getTrueValue(), Depth, Q) &&
1921         isKnownNonZero(SI->getFalseValue(), Depth, Q))
1922       return true;
1923   }
1924   // PHI
1925   else if (const PHINode *PN = dyn_cast<PHINode>(V)) {
1926     // Try and detect a recurrence that monotonically increases from a
1927     // starting value, as these are common as induction variables.
1928     if (PN->getNumIncomingValues() == 2) {
1929       Value *Start = PN->getIncomingValue(0);
1930       Value *Induction = PN->getIncomingValue(1);
1931       if (isa<ConstantInt>(Induction) && !isa<ConstantInt>(Start))
1932         std::swap(Start, Induction);
1933       if (ConstantInt *C = dyn_cast<ConstantInt>(Start)) {
1934         if (!C->isZero() && !C->isNegative()) {
1935           ConstantInt *X;
1936           if ((match(Induction, m_NSWAdd(m_Specific(PN), m_ConstantInt(X))) ||
1937                match(Induction, m_NUWAdd(m_Specific(PN), m_ConstantInt(X)))) &&
1938               !X->isNegative())
1939             return true;
1940         }
1941       }
1942     }
1943     // Check if all incoming values are non-zero constant.
1944     bool AllNonZeroConstants = all_of(PN->operands(), [](Value *V) {
1945       return isa<ConstantInt>(V) && !cast<ConstantInt>(V)->isZeroValue();
1946     });
1947     if (AllNonZeroConstants)
1948       return true;
1949   }
1950
1951   KnownBits Known(BitWidth);
1952   computeKnownBits(V, Known, Depth, Q);
1953   return Known.One != 0;
1954 }
1955
1956 /// Return true if V2 == V1 + X, where X is known non-zero.
1957 static bool isAddOfNonZero(const Value *V1, const Value *V2, const Query &Q) {
1958   const BinaryOperator *BO = dyn_cast<BinaryOperator>(V1);
1959   if (!BO || BO->getOpcode() != Instruction::Add)
1960     return false;
1961   Value *Op = nullptr;
1962   if (V2 == BO->getOperand(0))
1963     Op = BO->getOperand(1);
1964   else if (V2 == BO->getOperand(1))
1965     Op = BO->getOperand(0);
1966   else
1967     return false;
1968   return isKnownNonZero(Op, 0, Q);
1969 }
1970
1971 /// Return true if it is known that V1 != V2.
1972 static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q) {
1973   if (V1->getType()->isVectorTy() || V1 == V2)
1974     return false;
1975   if (V1->getType() != V2->getType())
1976     // We can't look through casts yet.
1977     return false;
1978   if (isAddOfNonZero(V1, V2, Q) || isAddOfNonZero(V2, V1, Q))
1979     return true;
1980
1981   if (IntegerType *Ty = dyn_cast<IntegerType>(V1->getType())) {
1982     // Are any known bits in V1 contradictory to known bits in V2? If V1
1983     // has a known zero where V2 has a known one, they must not be equal.
1984     auto BitWidth = Ty->getBitWidth();
1985     KnownBits Known1(BitWidth);
1986     computeKnownBits(V1, Known1, 0, Q);
1987     KnownBits Known2(BitWidth);
1988     computeKnownBits(V2, Known2, 0, Q);
1989
1990     APInt OppositeBits = (Known1.Zero & Known2.One) |
1991                          (Known2.Zero & Known1.One);
1992     if (OppositeBits.getBoolValue())
1993       return true;
1994   }
1995   return false;
1996 }
1997
1998 /// Return true if 'V & Mask' is known to be zero.  We use this predicate to
1999 /// simplify operations downstream. Mask is known to be zero for bits that V
2000 /// cannot have.
2001 ///
2002 /// This function is defined on values with integer type, values with pointer
2003 /// type, and vectors of integers.  In the case
2004 /// where V is a vector, the mask, known zero, and known one values are the
2005 /// same width as the vector element, and the bit is set only if it is true
2006 /// for all of the elements in the vector.
2007 bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
2008                        const Query &Q) {
2009   KnownBits Known(Mask.getBitWidth());
2010   computeKnownBits(V, Known, Depth, Q);
2011   return Mask.isSubsetOf(Known.Zero);
2012 }
2013
2014 /// For vector constants, loop over the elements and find the constant with the
2015 /// minimum number of sign bits. Return 0 if the value is not a vector constant
2016 /// or if any element was not analyzed; otherwise, return the count for the
2017 /// element with the minimum number of sign bits.
2018 static unsigned computeNumSignBitsVectorConstant(const Value *V,
2019                                                  unsigned TyBits) {
2020   const auto *CV = dyn_cast<Constant>(V);
2021   if (!CV || !CV->getType()->isVectorTy())
2022     return 0;
2023
2024   unsigned MinSignBits = TyBits;
2025   unsigned NumElts = CV->getType()->getVectorNumElements();
2026   for (unsigned i = 0; i != NumElts; ++i) {
2027     // If we find a non-ConstantInt, bail out.
2028     auto *Elt = dyn_cast_or_null<ConstantInt>(CV->getAggregateElement(i));
2029     if (!Elt)
2030       return 0;
2031
2032     // If the sign bit is 1, flip the bits, so we always count leading zeros.
2033     APInt EltVal = Elt->getValue();
2034     if (EltVal.isNegative())
2035       EltVal = ~EltVal;
2036     MinSignBits = std::min(MinSignBits, EltVal.countLeadingZeros());
2037   }
2038
2039   return MinSignBits;
2040 }
2041
2042 static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
2043                                        const Query &Q);
2044
2045 static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
2046                                    const Query &Q) {
2047   unsigned Result = ComputeNumSignBitsImpl(V, Depth, Q);
2048   assert(Result > 0 && "At least one sign bit needs to be present!");
2049   return Result;
2050 }
2051
2052 /// Return the number of times the sign bit of the register is replicated into
2053 /// the other bits. We know that at least 1 bit is always equal to the sign bit
2054 /// (itself), but other cases can give us information. For example, immediately
2055 /// after an "ashr X, 2", we know that the top 3 bits are all equal to each
2056 /// other, so we return 3. For vectors, return the number of sign bits for the
2057 /// vector element with the mininum number of known sign bits.
2058 static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
2059                                        const Query &Q) {
2060
2061   // We return the minimum number of sign bits that are guaranteed to be present
2062   // in V, so for undef we have to conservatively return 1.  We don't have the
2063   // same behavior for poison though -- that's a FIXME today.
2064
2065   unsigned TyBits = Q.DL.getTypeSizeInBits(V->getType()->getScalarType());
2066   unsigned Tmp, Tmp2;
2067   unsigned FirstAnswer = 1;
2068
2069   // Note that ConstantInt is handled by the general computeKnownBits case
2070   // below.
2071
2072   if (Depth == MaxDepth)
2073     return 1;  // Limit search depth.
2074
2075   const Operator *U = dyn_cast<Operator>(V);
2076   switch (Operator::getOpcode(V)) {
2077   default: break;
2078   case Instruction::SExt:
2079     Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
2080     return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q) + Tmp;
2081
2082   case Instruction::SDiv: {
2083     const APInt *Denominator;
2084     // sdiv X, C -> adds log(C) sign bits.
2085     if (match(U->getOperand(1), m_APInt(Denominator))) {
2086
2087       // Ignore non-positive denominator.
2088       if (!Denominator->isStrictlyPositive())
2089         break;
2090
2091       // Calculate the incoming numerator bits.
2092       unsigned NumBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
2093
2094       // Add floor(log(C)) bits to the numerator bits.
2095       return std::min(TyBits, NumBits + Denominator->logBase2());
2096     }
2097     break;
2098   }
2099
2100   case Instruction::SRem: {
2101     const APInt *Denominator;
2102     // srem X, C -> we know that the result is within [-C+1,C) when C is a
2103     // positive constant.  This let us put a lower bound on the number of sign
2104     // bits.
2105     if (match(U->getOperand(1), m_APInt(Denominator))) {
2106
2107       // Ignore non-positive denominator.
2108       if (!Denominator->isStrictlyPositive())
2109         break;
2110
2111       // Calculate the incoming numerator bits. SRem by a positive constant
2112       // can't lower the number of sign bits.
2113       unsigned NumrBits =
2114           ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
2115
2116       // Calculate the leading sign bit constraints by examining the
2117       // denominator.  Given that the denominator is positive, there are two
2118       // cases:
2119       //
2120       //  1. the numerator is positive.  The result range is [0,C) and [0,C) u<
2121       //     (1 << ceilLogBase2(C)).
2122       //
2123       //  2. the numerator is negative.  Then the result range is (-C,0] and
2124       //     integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)).
2125       //
2126       // Thus a lower bound on the number of sign bits is `TyBits -
2127       // ceilLogBase2(C)`.
2128
2129       unsigned ResBits = TyBits - Denominator->ceilLogBase2();
2130       return std::max(NumrBits, ResBits);
2131     }
2132     break;
2133   }
2134
2135   case Instruction::AShr: {
2136     Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
2137     // ashr X, C   -> adds C sign bits.  Vectors too.
2138     const APInt *ShAmt;
2139     if (match(U->getOperand(1), m_APInt(ShAmt))) {
2140       unsigned ShAmtLimited = ShAmt->getZExtValue();
2141       if (ShAmtLimited >= TyBits)
2142         break;  // Bad shift.
2143       Tmp += ShAmtLimited;
2144       if (Tmp > TyBits) Tmp = TyBits;
2145     }
2146     return Tmp;
2147   }
2148   case Instruction::Shl: {
2149     const APInt *ShAmt;
2150     if (match(U->getOperand(1), m_APInt(ShAmt))) {
2151       // shl destroys sign bits.
2152       Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
2153       Tmp2 = ShAmt->getZExtValue();
2154       if (Tmp2 >= TyBits ||      // Bad shift.
2155           Tmp2 >= Tmp) break;    // Shifted all sign bits out.
2156       return Tmp - Tmp2;
2157     }
2158     break;
2159   }
2160   case Instruction::And:
2161   case Instruction::Or:
2162   case Instruction::Xor:    // NOT is handled here.
2163     // Logical binary ops preserve the number of sign bits at the worst.
2164     Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
2165     if (Tmp != 1) {
2166       Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
2167       FirstAnswer = std::min(Tmp, Tmp2);
2168       // We computed what we know about the sign bits as our first
2169       // answer. Now proceed to the generic code that uses
2170       // computeKnownBits, and pick whichever answer is better.
2171     }
2172     break;
2173
2174   case Instruction::Select:
2175     Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
2176     if (Tmp == 1) return 1;  // Early out.
2177     Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q);
2178     return std::min(Tmp, Tmp2);
2179
2180   case Instruction::Add:
2181     // Add can have at most one carry bit.  Thus we know that the output
2182     // is, at worst, one more bit than the inputs.
2183     Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
2184     if (Tmp == 1) return 1;  // Early out.
2185
2186     // Special case decrementing a value (ADD X, -1):
2187     if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1)))
2188       if (CRHS->isAllOnesValue()) {
2189         KnownBits Known(TyBits);
2190         computeKnownBits(U->getOperand(0), Known, Depth + 1, Q);
2191
2192         // If the input is known to be 0 or 1, the output is 0/-1, which is all
2193         // sign bits set.
2194         if ((Known.Zero | 1).isAllOnesValue())
2195           return TyBits;
2196
2197         // If we are subtracting one from a positive number, there is no carry
2198         // out of the result.
2199         if (Known.isNonNegative())
2200           return Tmp;
2201       }
2202
2203     Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
2204     if (Tmp2 == 1) return 1;
2205     return std::min(Tmp, Tmp2)-1;
2206
2207   case Instruction::Sub:
2208     Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
2209     if (Tmp2 == 1) return 1;
2210
2211     // Handle NEG.
2212     if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0)))
2213       if (CLHS->isNullValue()) {
2214         KnownBits Known(TyBits);
2215         computeKnownBits(U->getOperand(1), Known, Depth + 1, Q);
2216         // If the input is known to be 0 or 1, the output is 0/-1, which is all
2217         // sign bits set.
2218         if ((Known.Zero | 1).isAllOnesValue())
2219           return TyBits;
2220
2221         // If the input is known to be positive (the sign bit is known clear),
2222         // the output of the NEG has the same number of sign bits as the input.
2223         if (Known.isNonNegative())
2224           return Tmp2;
2225
2226         // Otherwise, we treat this like a SUB.
2227       }
2228
2229     // Sub can have at most one carry bit.  Thus we know that the output
2230     // is, at worst, one more bit than the inputs.
2231     Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
2232     if (Tmp == 1) return 1;  // Early out.
2233     return std::min(Tmp, Tmp2)-1;
2234
2235   case Instruction::PHI: {
2236     const PHINode *PN = cast<PHINode>(U);
2237     unsigned NumIncomingValues = PN->getNumIncomingValues();
2238     // Don't analyze large in-degree PHIs.
2239     if (NumIncomingValues > 4) break;
2240     // Unreachable blocks may have zero-operand PHI nodes.
2241     if (NumIncomingValues == 0) break;
2242
2243     // Take the minimum of all incoming values.  This can't infinitely loop
2244     // because of our depth threshold.
2245     Tmp = ComputeNumSignBits(PN->getIncomingValue(0), Depth + 1, Q);
2246     for (unsigned i = 1, e = NumIncomingValues; i != e; ++i) {
2247       if (Tmp == 1) return Tmp;
2248       Tmp = std::min(
2249           Tmp, ComputeNumSignBits(PN->getIncomingValue(i), Depth + 1, Q));
2250     }
2251     return Tmp;
2252   }
2253
2254   case Instruction::Trunc:
2255     // FIXME: it's tricky to do anything useful for this, but it is an important
2256     // case for targets like X86.
2257     break;
2258
2259   case Instruction::ExtractElement:
2260     // Look through extract element. At the moment we keep this simple and skip
2261     // tracking the specific element. But at least we might find information
2262     // valid for all elements of the vector (for example if vector is sign
2263     // extended, shifted, etc).
2264     return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
2265   }
2266
2267   // Finally, if we can prove that the top bits of the result are 0's or 1's,
2268   // use this information.
2269
2270   // If we can examine all elements of a vector constant successfully, we're
2271   // done (we can't do any better than that). If not, keep trying.
2272   if (unsigned VecSignBits = computeNumSignBitsVectorConstant(V, TyBits))
2273     return VecSignBits;
2274
2275   KnownBits Known(TyBits);
2276   computeKnownBits(V, Known, Depth, Q);
2277
2278   // If we know that the sign bit is either zero or one, determine the number of
2279   // identical bits in the top of the input value.
2280   return std::max(FirstAnswer, Known.countMinSignBits());
2281 }
2282
2283 /// This function computes the integer multiple of Base that equals V.
2284 /// If successful, it returns true and returns the multiple in
2285 /// Multiple. If unsuccessful, it returns false. It looks
2286 /// through SExt instructions only if LookThroughSExt is true.
2287 bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
2288                            bool LookThroughSExt, unsigned Depth) {
2289   const unsigned MaxDepth = 6;
2290
2291   assert(V && "No Value?");
2292   assert(Depth <= MaxDepth && "Limit Search Depth");
2293   assert(V->getType()->isIntegerTy() && "Not integer or pointer type!");
2294
2295   Type *T = V->getType();
2296
2297   ConstantInt *CI = dyn_cast<ConstantInt>(V);
2298
2299   if (Base == 0)
2300     return false;
2301
2302   if (Base == 1) {
2303     Multiple = V;
2304     return true;
2305   }
2306
2307   ConstantExpr *CO = dyn_cast<ConstantExpr>(V);
2308   Constant *BaseVal = ConstantInt::get(T, Base);
2309   if (CO && CO == BaseVal) {
2310     // Multiple is 1.
2311     Multiple = ConstantInt::get(T, 1);
2312     return true;
2313   }
2314
2315   if (CI && CI->getZExtValue() % Base == 0) {
2316     Multiple = ConstantInt::get(T, CI->getZExtValue() / Base);
2317     return true;
2318   }
2319
2320   if (Depth == MaxDepth) return false;  // Limit search depth.
2321
2322   Operator *I = dyn_cast<Operator>(V);
2323   if (!I) return false;
2324
2325   switch (I->getOpcode()) {
2326   default: break;
2327   case Instruction::SExt:
2328     if (!LookThroughSExt) return false;
2329     // otherwise fall through to ZExt
2330   case Instruction::ZExt:
2331     return ComputeMultiple(I->getOperand(0), Base, Multiple,
2332                            LookThroughSExt, Depth+1);
2333   case Instruction::Shl:
2334   case Instruction::Mul: {
2335     Value *Op0 = I->getOperand(0);
2336     Value *Op1 = I->getOperand(1);
2337
2338     if (I->getOpcode() == Instruction::Shl) {
2339       ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1);
2340       if (!Op1CI) return false;
2341       // Turn Op0 << Op1 into Op0 * 2^Op1
2342       APInt Op1Int = Op1CI->getValue();
2343       uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1);
2344       APInt API(Op1Int.getBitWidth(), 0);
2345       API.setBit(BitToSet);
2346       Op1 = ConstantInt::get(V->getContext(), API);
2347     }
2348
2349     Value *Mul0 = nullptr;
2350     if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) {
2351       if (Constant *Op1C = dyn_cast<Constant>(Op1))
2352         if (Constant *MulC = dyn_cast<Constant>(Mul0)) {
2353           if (Op1C->getType()->getPrimitiveSizeInBits() <
2354               MulC->getType()->getPrimitiveSizeInBits())
2355             Op1C = ConstantExpr::getZExt(Op1C, MulC->getType());
2356           if (Op1C->getType()->getPrimitiveSizeInBits() >
2357               MulC->getType()->getPrimitiveSizeInBits())
2358             MulC = ConstantExpr::getZExt(MulC, Op1C->getType());
2359
2360           // V == Base * (Mul0 * Op1), so return (Mul0 * Op1)
2361           Multiple = ConstantExpr::getMul(MulC, Op1C);
2362           return true;
2363         }
2364
2365       if (ConstantInt *Mul0CI = dyn_cast<ConstantInt>(Mul0))
2366         if (Mul0CI->getValue() == 1) {
2367           // V == Base * Op1, so return Op1
2368           Multiple = Op1;
2369           return true;
2370         }
2371     }
2372
2373     Value *Mul1 = nullptr;
2374     if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) {
2375       if (Constant *Op0C = dyn_cast<Constant>(Op0))
2376         if (Constant *MulC = dyn_cast<Constant>(Mul1)) {
2377           if (Op0C->getType()->getPrimitiveSizeInBits() <
2378               MulC->getType()->getPrimitiveSizeInBits())
2379             Op0C = ConstantExpr::getZExt(Op0C, MulC->getType());
2380           if (Op0C->getType()->getPrimitiveSizeInBits() >
2381               MulC->getType()->getPrimitiveSizeInBits())
2382             MulC = ConstantExpr::getZExt(MulC, Op0C->getType());
2383
2384           // V == Base * (Mul1 * Op0), so return (Mul1 * Op0)
2385           Multiple = ConstantExpr::getMul(MulC, Op0C);
2386           return true;
2387         }
2388
2389       if (ConstantInt *Mul1CI = dyn_cast<ConstantInt>(Mul1))
2390         if (Mul1CI->getValue() == 1) {
2391           // V == Base * Op0, so return Op0
2392           Multiple = Op0;
2393           return true;
2394         }
2395     }
2396   }
2397   }
2398
2399   // We could not determine if V is a multiple of Base.
2400   return false;
2401 }
2402
2403 Intrinsic::ID llvm::getIntrinsicForCallSite(ImmutableCallSite ICS,
2404                                             const TargetLibraryInfo *TLI) {
2405   const Function *F = ICS.getCalledFunction();
2406   if (!F)
2407     return Intrinsic::not_intrinsic;
2408
2409   if (F->isIntrinsic())
2410     return F->getIntrinsicID();
2411
2412   if (!TLI)
2413     return Intrinsic::not_intrinsic;
2414
2415   LibFunc Func;
2416   // We're going to make assumptions on the semantics of the functions, check
2417   // that the target knows that it's available in this environment and it does
2418   // not have local linkage.
2419   if (!F || F->hasLocalLinkage() || !TLI->getLibFunc(*F, Func))
2420     return Intrinsic::not_intrinsic;
2421
2422   if (!ICS.onlyReadsMemory())
2423     return Intrinsic::not_intrinsic;
2424
2425   // Otherwise check if we have a call to a function that can be turned into a
2426   // vector intrinsic.
2427   switch (Func) {
2428   default:
2429     break;
2430   case LibFunc_sin:
2431   case LibFunc_sinf:
2432   case LibFunc_sinl:
2433     return Intrinsic::sin;
2434   case LibFunc_cos:
2435   case LibFunc_cosf:
2436   case LibFunc_cosl:
2437     return Intrinsic::cos;
2438   case LibFunc_exp:
2439   case LibFunc_expf:
2440   case LibFunc_expl:
2441     return Intrinsic::exp;
2442   case LibFunc_exp2:
2443   case LibFunc_exp2f:
2444   case LibFunc_exp2l:
2445     return Intrinsic::exp2;
2446   case LibFunc_log:
2447   case LibFunc_logf:
2448   case LibFunc_logl:
2449     return Intrinsic::log;
2450   case LibFunc_log10:
2451   case LibFunc_log10f:
2452   case LibFunc_log10l:
2453     return Intrinsic::log10;
2454   case LibFunc_log2:
2455   case LibFunc_log2f:
2456   case LibFunc_log2l:
2457     return Intrinsic::log2;
2458   case LibFunc_fabs:
2459   case LibFunc_fabsf:
2460   case LibFunc_fabsl:
2461     return Intrinsic::fabs;
2462   case LibFunc_fmin:
2463   case LibFunc_fminf:
2464   case LibFunc_fminl:
2465     return Intrinsic::minnum;
2466   case LibFunc_fmax:
2467   case LibFunc_fmaxf:
2468   case LibFunc_fmaxl:
2469     return Intrinsic::maxnum;
2470   case LibFunc_copysign:
2471   case LibFunc_copysignf:
2472   case LibFunc_copysignl:
2473     return Intrinsic::copysign;
2474   case LibFunc_floor:
2475   case LibFunc_floorf:
2476   case LibFunc_floorl:
2477     return Intrinsic::floor;
2478   case LibFunc_ceil:
2479   case LibFunc_ceilf:
2480   case LibFunc_ceill:
2481     return Intrinsic::ceil;
2482   case LibFunc_trunc:
2483   case LibFunc_truncf:
2484   case LibFunc_truncl:
2485     return Intrinsic::trunc;
2486   case LibFunc_rint:
2487   case LibFunc_rintf:
2488   case LibFunc_rintl:
2489     return Intrinsic::rint;
2490   case LibFunc_nearbyint:
2491   case LibFunc_nearbyintf:
2492   case LibFunc_nearbyintl:
2493     return Intrinsic::nearbyint;
2494   case LibFunc_round:
2495   case LibFunc_roundf:
2496   case LibFunc_roundl:
2497     return Intrinsic::round;
2498   case LibFunc_pow:
2499   case LibFunc_powf:
2500   case LibFunc_powl:
2501     return Intrinsic::pow;
2502   case LibFunc_sqrt:
2503   case LibFunc_sqrtf:
2504   case LibFunc_sqrtl:
2505     if (ICS->hasNoNaNs())
2506       return Intrinsic::sqrt;
2507     return Intrinsic::not_intrinsic;
2508   }
2509
2510   return Intrinsic::not_intrinsic;
2511 }
2512
2513 /// Return true if we can prove that the specified FP value is never equal to
2514 /// -0.0.
2515 ///
2516 /// NOTE: this function will need to be revisited when we support non-default
2517 /// rounding modes!
2518 ///
2519 bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
2520                                 unsigned Depth) {
2521   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
2522     return !CFP->getValueAPF().isNegZero();
2523
2524   if (Depth == MaxDepth)
2525     return false;  // Limit search depth.
2526
2527   const Operator *I = dyn_cast<Operator>(V);
2528   if (!I) return false;
2529
2530   // Check if the nsz fast-math flag is set
2531   if (const FPMathOperator *FPO = dyn_cast<FPMathOperator>(I))
2532     if (FPO->hasNoSignedZeros())
2533       return true;
2534
2535   // (add x, 0.0) is guaranteed to return +0.0, not -0.0.
2536   if (I->getOpcode() == Instruction::FAdd)
2537     if (ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(1)))
2538       if (CFP->isNullValue())
2539         return true;
2540
2541   // sitofp and uitofp turn into +0.0 for zero.
2542   if (isa<SIToFPInst>(I) || isa<UIToFPInst>(I))
2543     return true;
2544
2545   if (const CallInst *CI = dyn_cast<CallInst>(I)) {
2546     Intrinsic::ID IID = getIntrinsicForCallSite(CI, TLI);
2547     switch (IID) {
2548     default:
2549       break;
2550     // sqrt(-0.0) = -0.0, no other negative results are possible.
2551     case Intrinsic::sqrt:
2552       return CannotBeNegativeZero(CI->getArgOperand(0), TLI, Depth + 1);
2553     // fabs(x) != -0.0
2554     case Intrinsic::fabs:
2555       return true;
2556     }
2557   }
2558
2559   return false;
2560 }
2561
2562 /// If \p SignBitOnly is true, test for a known 0 sign bit rather than a
2563 /// standard ordered compare. e.g. make -0.0 olt 0.0 be true because of the sign
2564 /// bit despite comparing equal.
2565 static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
2566                                             const TargetLibraryInfo *TLI,
2567                                             bool SignBitOnly,
2568                                             unsigned Depth) {
2569   // TODO: This function does not do the right thing when SignBitOnly is true
2570   // and we're lowering to a hypothetical IEEE 754-compliant-but-evil platform
2571   // which flips the sign bits of NaNs.  See
2572   // https://llvm.org/bugs/show_bug.cgi?id=31702.
2573
2574   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
2575     return !CFP->getValueAPF().isNegative() ||
2576            (!SignBitOnly && CFP->getValueAPF().isZero());
2577   }
2578
2579   if (Depth == MaxDepth)
2580     return false; // Limit search depth.
2581
2582   const Operator *I = dyn_cast<Operator>(V);
2583   if (!I)
2584     return false;
2585
2586   switch (I->getOpcode()) {
2587   default:
2588     break;
2589   // Unsigned integers are always nonnegative.
2590   case Instruction::UIToFP:
2591     return true;
2592   case Instruction::FMul:
2593     // x*x is always non-negative or a NaN.
2594     if (I->getOperand(0) == I->getOperand(1) &&
2595         (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()))
2596       return true;
2597
2598     LLVM_FALLTHROUGH;
2599   case Instruction::FAdd:
2600   case Instruction::FDiv:
2601   case Instruction::FRem:
2602     return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
2603                                            Depth + 1) &&
2604            cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
2605                                            Depth + 1);
2606   case Instruction::Select:
2607     return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
2608                                            Depth + 1) &&
2609            cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly,
2610                                            Depth + 1);
2611   case Instruction::FPExt:
2612   case Instruction::FPTrunc:
2613     // Widening/narrowing never change sign.
2614     return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
2615                                            Depth + 1);
2616   case Instruction::Call:
2617     const auto *CI = cast<CallInst>(I);
2618     Intrinsic::ID IID = getIntrinsicForCallSite(CI, TLI);
2619     switch (IID) {
2620     default:
2621       break;
2622     case Intrinsic::maxnum:
2623       return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
2624                                              Depth + 1) ||
2625              cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
2626                                              Depth + 1);
2627     case Intrinsic::minnum:
2628       return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
2629                                              Depth + 1) &&
2630              cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
2631                                              Depth + 1);
2632     case Intrinsic::exp:
2633     case Intrinsic::exp2:
2634     case Intrinsic::fabs:
2635       return true;
2636
2637     case Intrinsic::sqrt:
2638       // sqrt(x) is always >= -0 or NaN.  Moreover, sqrt(x) == -0 iff x == -0.
2639       if (!SignBitOnly)
2640         return true;
2641       return CI->hasNoNaNs() && (CI->hasNoSignedZeros() ||
2642                                  CannotBeNegativeZero(CI->getOperand(0), TLI));
2643
2644     case Intrinsic::powi:
2645       if (ConstantInt *Exponent = dyn_cast<ConstantInt>(I->getOperand(1))) {
2646         // powi(x,n) is non-negative if n is even.
2647         if (Exponent->getBitWidth() <= 64 && Exponent->getSExtValue() % 2u == 0)
2648           return true;
2649       }
2650       // TODO: This is not correct.  Given that exp is an integer, here are the
2651       // ways that pow can return a negative value:
2652       //
2653       //   pow(x, exp)    --> negative if exp is odd and x is negative.
2654       //   pow(-0, exp)   --> -inf if exp is negative odd.
2655       //   pow(-0, exp)   --> -0 if exp is positive odd.
2656       //   pow(-inf, exp) --> -0 if exp is negative odd.
2657       //   pow(-inf, exp) --> -inf if exp is positive odd.
2658       //
2659       // Therefore, if !SignBitOnly, we can return true if x >= +0 or x is NaN,
2660       // but we must return false if x == -0.  Unfortunately we do not currently
2661       // have a way of expressing this constraint.  See details in
2662       // https://llvm.org/bugs/show_bug.cgi?id=31702.
2663       return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
2664                                              Depth + 1);
2665
2666     case Intrinsic::fma:
2667     case Intrinsic::fmuladd:
2668       // x*x+y is non-negative if y is non-negative.
2669       return I->getOperand(0) == I->getOperand(1) &&
2670              (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()) &&
2671              cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly,
2672                                              Depth + 1);
2673     }
2674     break;
2675   }
2676   return false;
2677 }
2678
2679 bool llvm::CannotBeOrderedLessThanZero(const Value *V,
2680                                        const TargetLibraryInfo *TLI) {
2681   return cannotBeOrderedLessThanZeroImpl(V, TLI, false, 0);
2682 }
2683
2684 bool llvm::SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI) {
2685   return cannotBeOrderedLessThanZeroImpl(V, TLI, true, 0);
2686 }
2687
2688 /// If the specified value can be set by repeating the same byte in memory,
2689 /// return the i8 value that it is represented with.  This is
2690 /// true for all i8 values obviously, but is also true for i32 0, i32 -1,
2691 /// i16 0xF0F0, double 0.0 etc.  If the value can't be handled with a repeated
2692 /// byte store (e.g. i16 0x1234), return null.
2693 Value *llvm::isBytewiseValue(Value *V) {
2694   // All byte-wide stores are splatable, even of arbitrary variables.
2695   if (V->getType()->isIntegerTy(8)) return V;
2696
2697   // Handle 'null' ConstantArrayZero etc.
2698   if (Constant *C = dyn_cast<Constant>(V))
2699     if (C->isNullValue())
2700       return Constant::getNullValue(Type::getInt8Ty(V->getContext()));
2701
2702   // Constant float and double values can be handled as integer values if the
2703   // corresponding integer value is "byteable".  An important case is 0.0.
2704   if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
2705     if (CFP->getType()->isFloatTy())
2706       V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(V->getContext()));
2707     if (CFP->getType()->isDoubleTy())
2708       V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(V->getContext()));
2709     // Don't handle long double formats, which have strange constraints.
2710   }
2711
2712   // We can handle constant integers that are multiple of 8 bits.
2713   if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
2714     if (CI->getBitWidth() % 8 == 0) {
2715       assert(CI->getBitWidth() > 8 && "8 bits should be handled above!");
2716
2717       if (!CI->getValue().isSplat(8))
2718         return nullptr;
2719       return ConstantInt::get(V->getContext(), CI->getValue().trunc(8));
2720     }
2721   }
2722
2723   // A ConstantDataArray/Vector is splatable if all its members are equal and
2724   // also splatable.
2725   if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(V)) {
2726     Value *Elt = CA->getElementAsConstant(0);
2727     Value *Val = isBytewiseValue(Elt);
2728     if (!Val)
2729       return nullptr;
2730
2731     for (unsigned I = 1, E = CA->getNumElements(); I != E; ++I)
2732       if (CA->getElementAsConstant(I) != Elt)
2733         return nullptr;
2734
2735     return Val;
2736   }
2737
2738   // Conceptually, we could handle things like:
2739   //   %a = zext i8 %X to i16
2740   //   %b = shl i16 %a, 8
2741   //   %c = or i16 %a, %b
2742   // but until there is an example that actually needs this, it doesn't seem
2743   // worth worrying about.
2744   return nullptr;
2745 }
2746
2747
2748 // This is the recursive version of BuildSubAggregate. It takes a few different
2749 // arguments. Idxs is the index within the nested struct From that we are
2750 // looking at now (which is of type IndexedType). IdxSkip is the number of
2751 // indices from Idxs that should be left out when inserting into the resulting
2752 // struct. To is the result struct built so far, new insertvalue instructions
2753 // build on that.
2754 static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType,
2755                                 SmallVectorImpl<unsigned> &Idxs,
2756                                 unsigned IdxSkip,
2757                                 Instruction *InsertBefore) {
2758   llvm::StructType *STy = dyn_cast<llvm::StructType>(IndexedType);
2759   if (STy) {
2760     // Save the original To argument so we can modify it
2761     Value *OrigTo = To;
2762     // General case, the type indexed by Idxs is a struct
2763     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
2764       // Process each struct element recursively
2765       Idxs.push_back(i);
2766       Value *PrevTo = To;
2767       To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip,
2768                              InsertBefore);
2769       Idxs.pop_back();
2770       if (!To) {
2771         // Couldn't find any inserted value for this index? Cleanup
2772         while (PrevTo != OrigTo) {
2773           InsertValueInst* Del = cast<InsertValueInst>(PrevTo);
2774           PrevTo = Del->getAggregateOperand();
2775           Del->eraseFromParent();
2776         }
2777         // Stop processing elements
2778         break;
2779       }
2780     }
2781     // If we successfully found a value for each of our subaggregates
2782     if (To)
2783       return To;
2784   }
2785   // Base case, the type indexed by SourceIdxs is not a struct, or not all of
2786   // the struct's elements had a value that was inserted directly. In the latter
2787   // case, perhaps we can't determine each of the subelements individually, but
2788   // we might be able to find the complete struct somewhere.
2789
2790   // Find the value that is at that particular spot
2791   Value *V = FindInsertedValue(From, Idxs);
2792
2793   if (!V)
2794     return nullptr;
2795
2796   // Insert the value in the new (sub) aggregrate
2797   return llvm::InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip),
2798                                        "tmp", InsertBefore);
2799 }
2800
2801 // This helper takes a nested struct and extracts a part of it (which is again a
2802 // struct) into a new value. For example, given the struct:
2803 // { a, { b, { c, d }, e } }
2804 // and the indices "1, 1" this returns
2805 // { c, d }.
2806 //
2807 // It does this by inserting an insertvalue for each element in the resulting
2808 // struct, as opposed to just inserting a single struct. This will only work if
2809 // each of the elements of the substruct are known (ie, inserted into From by an
2810 // insertvalue instruction somewhere).
2811 //
2812 // All inserted insertvalue instructions are inserted before InsertBefore
2813 static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range,
2814                                 Instruction *InsertBefore) {
2815   assert(InsertBefore && "Must have someplace to insert!");
2816   Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(),
2817                                                              idx_range);
2818   Value *To = UndefValue::get(IndexedType);
2819   SmallVector<unsigned, 10> Idxs(idx_range.begin(), idx_range.end());
2820   unsigned IdxSkip = Idxs.size();
2821
2822   return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore);
2823 }
2824
2825 /// Given an aggregrate and an sequence of indices, see if
2826 /// the scalar value indexed is already around as a register, for example if it
2827 /// were inserted directly into the aggregrate.
2828 ///
2829 /// If InsertBefore is not null, this function will duplicate (modified)
2830 /// insertvalues when a part of a nested struct is extracted.
2831 Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
2832                                Instruction *InsertBefore) {
2833   // Nothing to index? Just return V then (this is useful at the end of our
2834   // recursion).
2835   if (idx_range.empty())
2836     return V;
2837   // We have indices, so V should have an indexable type.
2838   assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) &&
2839          "Not looking at a struct or array?");
2840   assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) &&
2841          "Invalid indices for type?");
2842
2843   if (Constant *C = dyn_cast<Constant>(V)) {
2844     C = C->getAggregateElement(idx_range[0]);
2845     if (!C) return nullptr;
2846     return FindInsertedValue(C, idx_range.slice(1), InsertBefore);
2847   }
2848
2849   if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) {
2850     // Loop the indices for the insertvalue instruction in parallel with the
2851     // requested indices
2852     const unsigned *req_idx = idx_range.begin();
2853     for (const unsigned *i = I->idx_begin(), *e = I->idx_end();
2854          i != e; ++i, ++req_idx) {
2855       if (req_idx == idx_range.end()) {
2856         // We can't handle this without inserting insertvalues
2857         if (!InsertBefore)
2858           return nullptr;
2859
2860         // The requested index identifies a part of a nested aggregate. Handle
2861         // this specially. For example,
2862         // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0
2863         // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1
2864         // %C = extractvalue {i32, { i32, i32 } } %B, 1
2865         // This can be changed into
2866         // %A = insertvalue {i32, i32 } undef, i32 10, 0
2867         // %C = insertvalue {i32, i32 } %A, i32 11, 1
2868         // which allows the unused 0,0 element from the nested struct to be
2869         // removed.
2870         return BuildSubAggregate(V, makeArrayRef(idx_range.begin(), req_idx),
2871                                  InsertBefore);
2872       }
2873
2874       // This insert value inserts something else than what we are looking for.
2875       // See if the (aggregate) value inserted into has the value we are
2876       // looking for, then.
2877       if (*req_idx != *i)
2878         return FindInsertedValue(I->getAggregateOperand(), idx_range,
2879                                  InsertBefore);
2880     }
2881     // If we end up here, the indices of the insertvalue match with those
2882     // requested (though possibly only partially). Now we recursively look at
2883     // the inserted value, passing any remaining indices.
2884     return FindInsertedValue(I->getInsertedValueOperand(),
2885                              makeArrayRef(req_idx, idx_range.end()),
2886                              InsertBefore);
2887   }
2888
2889   if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
2890     // If we're extracting a value from an aggregate that was extracted from
2891     // something else, we can extract from that something else directly instead.
2892     // However, we will need to chain I's indices with the requested indices.
2893
2894     // Calculate the number of indices required
2895     unsigned size = I->getNumIndices() + idx_range.size();
2896     // Allocate some space to put the new indices in
2897     SmallVector<unsigned, 5> Idxs;
2898     Idxs.reserve(size);
2899     // Add indices from the extract value instruction
2900     Idxs.append(I->idx_begin(), I->idx_end());
2901
2902     // Add requested indices
2903     Idxs.append(idx_range.begin(), idx_range.end());
2904
2905     assert(Idxs.size() == size
2906            && "Number of indices added not correct?");
2907
2908     return FindInsertedValue(I->getAggregateOperand(), Idxs, InsertBefore);
2909   }
2910   // Otherwise, we don't know (such as, extracting from a function return value
2911   // or load instruction)
2912   return nullptr;
2913 }
2914
2915 /// Analyze the specified pointer to see if it can be expressed as a base
2916 /// pointer plus a constant offset. Return the base and offset to the caller.
2917 Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
2918                                               const DataLayout &DL) {
2919   unsigned BitWidth = DL.getPointerTypeSizeInBits(Ptr->getType());
2920   APInt ByteOffset(BitWidth, 0);
2921
2922   // We walk up the defs but use a visited set to handle unreachable code. In
2923   // that case, we stop after accumulating the cycle once (not that it
2924   // matters).
2925   SmallPtrSet<Value *, 16> Visited;
2926   while (Visited.insert(Ptr).second) {
2927     if (Ptr->getType()->isVectorTy())
2928       break;
2929
2930     if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
2931       // If one of the values we have visited is an addrspacecast, then
2932       // the pointer type of this GEP may be different from the type
2933       // of the Ptr parameter which was passed to this function.  This
2934       // means when we construct GEPOffset, we need to use the size
2935       // of GEP's pointer type rather than the size of the original
2936       // pointer type.
2937       APInt GEPOffset(DL.getPointerTypeSizeInBits(Ptr->getType()), 0);
2938       if (!GEP->accumulateConstantOffset(DL, GEPOffset))
2939         break;
2940
2941       ByteOffset += GEPOffset.getSExtValue();
2942
2943       Ptr = GEP->getPointerOperand();
2944     } else if (Operator::getOpcode(Ptr) == Instruction::BitCast ||
2945                Operator::getOpcode(Ptr) == Instruction::AddrSpaceCast) {
2946       Ptr = cast<Operator>(Ptr)->getOperand(0);
2947     } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {
2948       if (GA->isInterposable())
2949         break;
2950       Ptr = GA->getAliasee();
2951     } else {
2952       break;
2953     }
2954   }
2955   Offset = ByteOffset.getSExtValue();
2956   return Ptr;
2957 }
2958
2959 bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP,
2960                                        unsigned CharSize) {
2961   // Make sure the GEP has exactly three arguments.
2962   if (GEP->getNumOperands() != 3)
2963     return false;
2964
2965   // Make sure the index-ee is a pointer to array of \p CharSize integers.
2966   // CharSize.
2967   ArrayType *AT = dyn_cast<ArrayType>(GEP->getSourceElementType());
2968   if (!AT || !AT->getElementType()->isIntegerTy(CharSize))
2969     return false;
2970
2971   // Check to make sure that the first operand of the GEP is an integer and
2972   // has value 0 so that we are sure we're indexing into the initializer.
2973   const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
2974   if (!FirstIdx || !FirstIdx->isZero())
2975     return false;
2976
2977   return true;
2978 }
2979
2980 bool llvm::getConstantDataArrayInfo(const Value *V,
2981                                     ConstantDataArraySlice &Slice,
2982                                     unsigned ElementSize, uint64_t Offset) {
2983   assert(V);
2984
2985   // Look through bitcast instructions and geps.
2986   V = V->stripPointerCasts();
2987
2988   // If the value is a GEP instruction or constant expression, treat it as an
2989   // offset.
2990   if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
2991     // The GEP operator should be based on a pointer to string constant, and is
2992     // indexing into the string constant.
2993     if (!isGEPBasedOnPointerToString(GEP, ElementSize))
2994       return false;
2995
2996     // If the second index isn't a ConstantInt, then this is a variable index
2997     // into the array.  If this occurs, we can't say anything meaningful about
2998     // the string.
2999     uint64_t StartIdx = 0;
3000     if (const ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
3001       StartIdx = CI->getZExtValue();
3002     else
3003       return false;
3004     return getConstantDataArrayInfo(GEP->getOperand(0), Slice, ElementSize,
3005                                     StartIdx + Offset);
3006   }
3007
3008   // The GEP instruction, constant or instruction, must reference a global
3009   // variable that is a constant and is initialized. The referenced constant
3010   // initializer is the array that we'll use for optimization.
3011   const GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
3012   if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
3013     return false;
3014
3015   const ConstantDataArray *Array;
3016   ArrayType *ArrayTy;
3017   if (GV->getInitializer()->isNullValue()) {
3018     Type *GVTy = GV->getValueType();
3019     if ( (ArrayTy = dyn_cast<ArrayType>(GVTy)) ) {
3020       // A zeroinitializer for the array; There is no ConstantDataArray.
3021       Array = nullptr;
3022     } else {
3023       const DataLayout &DL = GV->getParent()->getDataLayout();
3024       uint64_t SizeInBytes = DL.getTypeStoreSize(GVTy);
3025       uint64_t Length = SizeInBytes / (ElementSize / 8);
3026       if (Length <= Offset)
3027         return false;
3028
3029       Slice.Array = nullptr;
3030       Slice.Offset = 0;
3031       Slice.Length = Length - Offset;
3032       return true;
3033     }
3034   } else {
3035     // This must be a ConstantDataArray.
3036     Array = dyn_cast<ConstantDataArray>(GV->getInitializer());
3037     if (!Array)
3038       return false;
3039     ArrayTy = Array->getType();
3040   }
3041   if (!ArrayTy->getElementType()->isIntegerTy(ElementSize))
3042     return false;
3043
3044   uint64_t NumElts = ArrayTy->getArrayNumElements();
3045   if (Offset > NumElts)
3046     return false;
3047
3048   Slice.Array = Array;
3049   Slice.Offset = Offset;
3050   Slice.Length = NumElts - Offset;
3051   return true;
3052 }
3053
3054 /// This function computes the length of a null-terminated C string pointed to
3055 /// by V. If successful, it returns true and returns the string in Str.
3056 /// If unsuccessful, it returns false.
3057 bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
3058                                  uint64_t Offset, bool TrimAtNul) {
3059   ConstantDataArraySlice Slice;
3060   if (!getConstantDataArrayInfo(V, Slice, 8, Offset))
3061     return false;
3062
3063   if (Slice.Array == nullptr) {
3064     if (TrimAtNul) {
3065       Str = StringRef();
3066       return true;
3067     }
3068     if (Slice.Length == 1) {
3069       Str = StringRef("", 1);
3070       return true;
3071     }
3072     // We cannot instantiate a StringRef as we do not have an apropriate string
3073     // of 0s at hand.
3074     return false;
3075   }
3076
3077   // Start out with the entire array in the StringRef.
3078   Str = Slice.Array->getAsString();
3079   // Skip over 'offset' bytes.
3080   Str = Str.substr(Slice.Offset);
3081
3082   if (TrimAtNul) {
3083     // Trim off the \0 and anything after it.  If the array is not nul
3084     // terminated, we just return the whole end of string.  The client may know
3085     // some other way that the string is length-bound.
3086     Str = Str.substr(0, Str.find('\0'));
3087   }
3088   return true;
3089 }
3090
3091 // These next two are very similar to the above, but also look through PHI
3092 // nodes.
3093 // TODO: See if we can integrate these two together.
3094
3095 /// If we can compute the length of the string pointed to by
3096 /// the specified pointer, return 'len+1'.  If we can't, return 0.
3097 static uint64_t GetStringLengthH(const Value *V,
3098                                  SmallPtrSetImpl<const PHINode*> &PHIs,
3099                                  unsigned CharSize) {
3100   // Look through noop bitcast instructions.
3101   V = V->stripPointerCasts();
3102
3103   // If this is a PHI node, there are two cases: either we have already seen it
3104   // or we haven't.
3105   if (const PHINode *PN = dyn_cast<PHINode>(V)) {
3106     if (!PHIs.insert(PN).second)
3107       return ~0ULL;  // already in the set.
3108
3109     // If it was new, see if all the input strings are the same length.
3110     uint64_t LenSoFar = ~0ULL;
3111     for (Value *IncValue : PN->incoming_values()) {
3112       uint64_t Len = GetStringLengthH(IncValue, PHIs, CharSize);
3113       if (Len == 0) return 0; // Unknown length -> unknown.
3114
3115       if (Len == ~0ULL) continue;
3116
3117       if (Len != LenSoFar && LenSoFar != ~0ULL)
3118         return 0;    // Disagree -> unknown.
3119       LenSoFar = Len;
3120     }
3121
3122     // Success, all agree.
3123     return LenSoFar;
3124   }
3125
3126   // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
3127   if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
3128     uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs, CharSize);
3129     if (Len1 == 0) return 0;
3130     uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs, CharSize);
3131     if (Len2 == 0) return 0;
3132     if (Len1 == ~0ULL) return Len2;
3133     if (Len2 == ~0ULL) return Len1;
3134     if (Len1 != Len2) return 0;
3135     return Len1;
3136   }
3137
3138   // Otherwise, see if we can read the string.
3139   ConstantDataArraySlice Slice;
3140   if (!getConstantDataArrayInfo(V, Slice, CharSize))
3141     return 0;
3142
3143   if (Slice.Array == nullptr)
3144     return 1;
3145
3146   // Search for nul characters
3147   unsigned NullIndex = 0;
3148   for (unsigned E = Slice.Length; NullIndex < E; ++NullIndex) {
3149     if (Slice.Array->getElementAsInteger(Slice.Offset + NullIndex) == 0)
3150       break;
3151   }
3152
3153   return NullIndex + 1;
3154 }
3155
3156 /// If we can compute the length of the string pointed to by
3157 /// the specified pointer, return 'len+1'.  If we can't, return 0.
3158 uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) {
3159   if (!V->getType()->isPointerTy()) return 0;
3160
3161   SmallPtrSet<const PHINode*, 32> PHIs;
3162   uint64_t Len = GetStringLengthH(V, PHIs, CharSize);
3163   // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
3164   // an empty string as a length.
3165   return Len == ~0ULL ? 1 : Len;
3166 }
3167
3168 /// \brief \p PN defines a loop-variant pointer to an object.  Check if the
3169 /// previous iteration of the loop was referring to the same object as \p PN.
3170 static bool isSameUnderlyingObjectInLoop(const PHINode *PN,
3171                                          const LoopInfo *LI) {
3172   // Find the loop-defined value.
3173   Loop *L = LI->getLoopFor(PN->getParent());
3174   if (PN->getNumIncomingValues() != 2)
3175     return true;
3176
3177   // Find the value from previous iteration.
3178   auto *PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(0));
3179   if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L)
3180     PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(1));
3181   if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L)
3182     return true;
3183
3184   // If a new pointer is loaded in the loop, the pointer references a different
3185   // object in every iteration.  E.g.:
3186   //    for (i)
3187   //       int *p = a[i];
3188   //       ...
3189   if (auto *Load = dyn_cast<LoadInst>(PrevValue))
3190     if (!L->isLoopInvariant(Load->getPointerOperand()))
3191       return false;
3192   return true;
3193 }
3194
3195 Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
3196                                  unsigned MaxLookup) {
3197   if (!V->getType()->isPointerTy())
3198     return V;
3199   for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
3200     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
3201       V = GEP->getPointerOperand();
3202     } else if (Operator::getOpcode(V) == Instruction::BitCast ||
3203                Operator::getOpcode(V) == Instruction::AddrSpaceCast) {
3204       V = cast<Operator>(V)->getOperand(0);
3205     } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
3206       if (GA->isInterposable())
3207         return V;
3208       V = GA->getAliasee();
3209     } else if (isa<AllocaInst>(V)) {
3210       // An alloca can't be further simplified.
3211       return V;
3212     } else {
3213       if (auto CS = CallSite(V))
3214         if (Value *RV = CS.getReturnedArgOperand()) {
3215           V = RV;
3216           continue;
3217         }
3218
3219       // See if InstructionSimplify knows any relevant tricks.
3220       if (Instruction *I = dyn_cast<Instruction>(V))
3221         // TODO: Acquire a DominatorTree and AssumptionCache and use them.
3222         if (Value *Simplified = SimplifyInstruction(I, {DL, I})) {
3223           V = Simplified;
3224           continue;
3225         }
3226
3227       return V;
3228     }
3229     assert(V->getType()->isPointerTy() && "Unexpected operand type!");
3230   }
3231   return V;
3232 }
3233
3234 void llvm::GetUnderlyingObjects(Value *V, SmallVectorImpl<Value *> &Objects,
3235                                 const DataLayout &DL, LoopInfo *LI,
3236                                 unsigned MaxLookup) {
3237   SmallPtrSet<Value *, 4> Visited;
3238   SmallVector<Value *, 4> Worklist;
3239   Worklist.push_back(V);
3240   do {
3241     Value *P = Worklist.pop_back_val();
3242     P = GetUnderlyingObject(P, DL, MaxLookup);
3243
3244     if (!Visited.insert(P).second)
3245       continue;
3246
3247     if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
3248       Worklist.push_back(SI->getTrueValue());
3249       Worklist.push_back(SI->getFalseValue());
3250       continue;
3251     }
3252
3253     if (PHINode *PN = dyn_cast<PHINode>(P)) {
3254       // If this PHI changes the underlying object in every iteration of the
3255       // loop, don't look through it.  Consider:
3256       //   int **A;
3257       //   for (i) {
3258       //     Prev = Curr;     // Prev = PHI (Prev_0, Curr)
3259       //     Curr = A[i];
3260       //     *Prev, *Curr;
3261       //
3262       // Prev is tracking Curr one iteration behind so they refer to different
3263       // underlying objects.
3264       if (!LI || !LI->isLoopHeader(PN->getParent()) ||
3265           isSameUnderlyingObjectInLoop(PN, LI))
3266         for (Value *IncValue : PN->incoming_values())
3267           Worklist.push_back(IncValue);
3268       continue;
3269     }
3270
3271     Objects.push_back(P);
3272   } while (!Worklist.empty());
3273 }
3274
3275 /// Return true if the only users of this pointer are lifetime markers.
3276 bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
3277   for (const User *U : V->users()) {
3278     const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
3279     if (!II) return false;
3280
3281     if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
3282         II->getIntrinsicID() != Intrinsic::lifetime_end)
3283       return false;
3284   }
3285   return true;
3286 }
3287
3288 bool llvm::isSafeToSpeculativelyExecute(const Value *V,
3289                                         const Instruction *CtxI,
3290                                         const DominatorTree *DT) {
3291   const Operator *Inst = dyn_cast<Operator>(V);
3292   if (!Inst)
3293     return false;
3294
3295   for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
3296     if (Constant *C = dyn_cast<Constant>(Inst->getOperand(i)))
3297       if (C->canTrap())
3298         return false;
3299
3300   switch (Inst->getOpcode()) {
3301   default:
3302     return true;
3303   case Instruction::UDiv:
3304   case Instruction::URem: {
3305     // x / y is undefined if y == 0.
3306     const APInt *V;
3307     if (match(Inst->getOperand(1), m_APInt(V)))
3308       return *V != 0;
3309     return false;
3310   }
3311   case Instruction::SDiv:
3312   case Instruction::SRem: {
3313     // x / y is undefined if y == 0 or x == INT_MIN and y == -1
3314     const APInt *Numerator, *Denominator;
3315     if (!match(Inst->getOperand(1), m_APInt(Denominator)))
3316       return false;
3317     // We cannot hoist this division if the denominator is 0.
3318     if (*Denominator == 0)
3319       return false;
3320     // It's safe to hoist if the denominator is not 0 or -1.
3321     if (*Denominator != -1)
3322       return true;
3323     // At this point we know that the denominator is -1.  It is safe to hoist as
3324     // long we know that the numerator is not INT_MIN.
3325     if (match(Inst->getOperand(0), m_APInt(Numerator)))
3326       return !Numerator->isMinSignedValue();
3327     // The numerator *might* be MinSignedValue.
3328     return false;
3329   }
3330   case Instruction::Load: {
3331     const LoadInst *LI = cast<LoadInst>(Inst);
3332     if (!LI->isUnordered() ||
3333         // Speculative load may create a race that did not exist in the source.
3334         LI->getFunction()->hasFnAttribute(Attribute::SanitizeThread) ||
3335         // Speculative load may load data from dirty regions.
3336         LI->getFunction()->hasFnAttribute(Attribute::SanitizeAddress))
3337       return false;
3338     const DataLayout &DL = LI->getModule()->getDataLayout();
3339     return isDereferenceableAndAlignedPointer(LI->getPointerOperand(),
3340                                               LI->getAlignment(), DL, CtxI, DT);
3341   }
3342   case Instruction::Call: {
3343     auto *CI = cast<const CallInst>(Inst);
3344     const Function *Callee = CI->getCalledFunction();
3345
3346     // The called function could have undefined behavior or side-effects, even
3347     // if marked readnone nounwind.
3348     return Callee && Callee->isSpeculatable();
3349   }
3350   case Instruction::VAArg:
3351   case Instruction::Alloca:
3352   case Instruction::Invoke:
3353   case Instruction::PHI:
3354   case Instruction::Store:
3355   case Instruction::Ret:
3356   case Instruction::Br:
3357   case Instruction::IndirectBr:
3358   case Instruction::Switch:
3359   case Instruction::Unreachable:
3360   case Instruction::Fence:
3361   case Instruction::AtomicRMW:
3362   case Instruction::AtomicCmpXchg:
3363   case Instruction::LandingPad:
3364   case Instruction::Resume:
3365   case Instruction::CatchSwitch:
3366   case Instruction::CatchPad:
3367   case Instruction::CatchRet:
3368   case Instruction::CleanupPad:
3369   case Instruction::CleanupRet:
3370     return false; // Misc instructions which have effects
3371   }
3372 }
3373
3374 bool llvm::mayBeMemoryDependent(const Instruction &I) {
3375   return I.mayReadOrWriteMemory() || !isSafeToSpeculativelyExecute(&I);
3376 }
3377
3378 /// Return true if we know that the specified value is never null.
3379 bool llvm::isKnownNonNull(const Value *V) {
3380   assert(V->getType()->isPointerTy() && "V must be pointer type");
3381
3382   // Alloca never returns null, malloc might.
3383   if (isa<AllocaInst>(V)) return true;
3384
3385   // A byval, inalloca, or nonnull argument is never null.
3386   if (const Argument *A = dyn_cast<Argument>(V))
3387     return A->hasByValOrInAllocaAttr() || A->hasNonNullAttr();
3388
3389   // A global variable in address space 0 is non null unless extern weak
3390   // or an absolute symbol reference. Other address spaces may have null as a
3391   // valid address for a global, so we can't assume anything.
3392   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
3393     return !GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() &&
3394            GV->getType()->getAddressSpace() == 0;
3395
3396   // A Load tagged with nonnull metadata is never null.
3397   if (const LoadInst *LI = dyn_cast<LoadInst>(V))
3398     return LI->getMetadata(LLVMContext::MD_nonnull);
3399
3400   if (auto CS = ImmutableCallSite(V))
3401     if (CS.isReturnNonNull())
3402       return true;
3403
3404   return false;
3405 }
3406
3407 static bool isKnownNonNullFromDominatingCondition(const Value *V,
3408                                                   const Instruction *CtxI,
3409                                                   const DominatorTree *DT) {
3410   assert(V->getType()->isPointerTy() && "V must be pointer type");
3411   assert(!isa<ConstantData>(V) && "Did not expect ConstantPointerNull");
3412   assert(CtxI && "Context instruction required for analysis");
3413   assert(DT && "Dominator tree required for analysis");
3414
3415   unsigned NumUsesExplored = 0;
3416   for (auto *U : V->users()) {
3417     // Avoid massive lists
3418     if (NumUsesExplored >= DomConditionsMaxUses)
3419       break;
3420     NumUsesExplored++;
3421
3422     // If the value is used as an argument to a call or invoke, then argument
3423     // attributes may provide an answer about null-ness.
3424     if (auto CS = ImmutableCallSite(U))
3425       if (auto *CalledFunc = CS.getCalledFunction())
3426         for (const Argument &Arg : CalledFunc->args())
3427           if (CS.getArgOperand(Arg.getArgNo()) == V &&
3428               Arg.hasNonNullAttr() && DT->dominates(CS.getInstruction(), CtxI))
3429             return true;
3430
3431     // Consider only compare instructions uniquely controlling a branch
3432     CmpInst::Predicate Pred;
3433     if (!match(const_cast<User *>(U),
3434                m_c_ICmp(Pred, m_Specific(V), m_Zero())) ||
3435         (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE))
3436       continue;
3437
3438     for (auto *CmpU : U->users()) {
3439       if (const BranchInst *BI = dyn_cast<BranchInst>(CmpU)) {
3440         assert(BI->isConditional() && "uses a comparison!");
3441
3442         BasicBlock *NonNullSuccessor =
3443             BI->getSuccessor(Pred == ICmpInst::ICMP_EQ ? 1 : 0);
3444         BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor);
3445         if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent()))
3446           return true;
3447       } else if (Pred == ICmpInst::ICMP_NE &&
3448                  match(CmpU, m_Intrinsic<Intrinsic::experimental_guard>()) &&
3449                  DT->dominates(cast<Instruction>(CmpU), CtxI)) {
3450         return true;
3451       }
3452     }
3453   }
3454
3455   return false;
3456 }
3457
3458 bool llvm::isKnownNonNullAt(const Value *V, const Instruction *CtxI,
3459                             const DominatorTree *DT) {
3460   if (isa<ConstantPointerNull>(V) || isa<UndefValue>(V))
3461     return false;
3462
3463   if (isKnownNonNull(V))
3464     return true;
3465
3466   if (!CtxI || !DT)
3467     return false;
3468
3469   return ::isKnownNonNullFromDominatingCondition(V, CtxI, DT);
3470 }
3471
3472 OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS,
3473                                                    const Value *RHS,
3474                                                    const DataLayout &DL,
3475                                                    AssumptionCache *AC,
3476                                                    const Instruction *CxtI,
3477                                                    const DominatorTree *DT) {
3478   // Multiplying n * m significant bits yields a result of n + m significant
3479   // bits. If the total number of significant bits does not exceed the
3480   // result bit width (minus 1), there is no overflow.
3481   // This means if we have enough leading zero bits in the operands
3482   // we can guarantee that the result does not overflow.
3483   // Ref: "Hacker's Delight" by Henry Warren
3484   unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
3485   KnownBits LHSKnown(BitWidth);
3486   KnownBits RHSKnown(BitWidth);
3487   computeKnownBits(LHS, LHSKnown, DL, /*Depth=*/0, AC, CxtI, DT);
3488   computeKnownBits(RHS, RHSKnown, DL, /*Depth=*/0, AC, CxtI, DT);
3489   // Note that underestimating the number of zero bits gives a more
3490   // conservative answer.
3491   unsigned ZeroBits = LHSKnown.countMinLeadingZeros() +
3492                       RHSKnown.countMinLeadingZeros();
3493   // First handle the easy case: if we have enough zero bits there's
3494   // definitely no overflow.
3495   if (ZeroBits >= BitWidth)
3496     return OverflowResult::NeverOverflows;
3497
3498   // Get the largest possible values for each operand.
3499   APInt LHSMax = ~LHSKnown.Zero;
3500   APInt RHSMax = ~RHSKnown.Zero;
3501
3502   // We know the multiply operation doesn't overflow if the maximum values for
3503   // each operand will not overflow after we multiply them together.
3504   bool MaxOverflow;
3505   (void)LHSMax.umul_ov(RHSMax, MaxOverflow);
3506   if (!MaxOverflow)
3507     return OverflowResult::NeverOverflows;
3508
3509   // We know it always overflows if multiplying the smallest possible values for
3510   // the operands also results in overflow.
3511   bool MinOverflow;
3512   (void)LHSKnown.One.umul_ov(RHSKnown.One, MinOverflow);
3513   if (MinOverflow)
3514     return OverflowResult::AlwaysOverflows;
3515
3516   return OverflowResult::MayOverflow;
3517 }
3518
3519 OverflowResult llvm::computeOverflowForUnsignedAdd(const Value *LHS,
3520                                                    const Value *RHS,
3521                                                    const DataLayout &DL,
3522                                                    AssumptionCache *AC,
3523                                                    const Instruction *CxtI,
3524                                                    const DominatorTree *DT) {
3525   KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT);
3526   if (LHSKnown.isNonNegative() || LHSKnown.isNegative()) {
3527     KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
3528
3529     if (LHSKnown.isNegative() && RHSKnown.isNegative()) {
3530       // The sign bit is set in both cases: this MUST overflow.
3531       // Create a simple add instruction, and insert it into the struct.
3532       return OverflowResult::AlwaysOverflows;
3533     }
3534
3535     if (LHSKnown.isNonNegative() && RHSKnown.isNonNegative()) {
3536       // The sign bit is clear in both cases: this CANNOT overflow.
3537       // Create a simple add instruction, and insert it into the struct.
3538       return OverflowResult::NeverOverflows;
3539     }
3540   }
3541
3542   return OverflowResult::MayOverflow;
3543 }
3544
3545 /// \brief Return true if we can prove that adding the two values of the
3546 /// knownbits will not overflow.
3547 /// Otherwise return false.
3548 static bool checkRippleForSignedAdd(const KnownBits &LHSKnown,
3549                                     const KnownBits &RHSKnown) {
3550   // Addition of two 2's complement numbers having opposite signs will never
3551   // overflow.
3552   if ((LHSKnown.isNegative() && RHSKnown.isNonNegative()) ||
3553       (LHSKnown.isNonNegative() && RHSKnown.isNegative()))
3554     return true;
3555
3556   // If either of the values is known to be non-negative, adding them can only
3557   // overflow if the second is also non-negative, so we can assume that.
3558   // Two non-negative numbers will only overflow if there is a carry to the
3559   // sign bit, so we can check if even when the values are as big as possible
3560   // there is no overflow to the sign bit.
3561   if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative()) {
3562     APInt MaxLHS = ~LHSKnown.Zero;
3563     MaxLHS.clearSignBit();
3564     APInt MaxRHS = ~RHSKnown.Zero;
3565     MaxRHS.clearSignBit();
3566     APInt Result = std::move(MaxLHS) + std::move(MaxRHS);
3567     return Result.isSignBitClear();
3568   }
3569
3570   // If either of the values is known to be negative, adding them can only
3571   // overflow if the second is also negative, so we can assume that.
3572   // Two negative number will only overflow if there is no carry to the sign
3573   // bit, so we can check if even when the values are as small as possible
3574   // there is overflow to the sign bit.
3575   if (LHSKnown.isNegative() || RHSKnown.isNegative()) {
3576     APInt MinLHS = LHSKnown.One;
3577     MinLHS.clearSignBit();
3578     APInt MinRHS = RHSKnown.One;
3579     MinRHS.clearSignBit();
3580     APInt Result = std::move(MinLHS) + std::move(MinRHS);
3581     return Result.isSignBitSet();
3582   }
3583
3584   // If we reached here it means that we know nothing about the sign bits.
3585   // In this case we can't know if there will be an overflow, since by
3586   // changing the sign bits any two values can be made to overflow.
3587   return false;
3588 }
3589
3590 static OverflowResult computeOverflowForSignedAdd(const Value *LHS,
3591                                                   const Value *RHS,
3592                                                   const AddOperator *Add,
3593                                                   const DataLayout &DL,
3594                                                   AssumptionCache *AC,
3595                                                   const Instruction *CxtI,
3596                                                   const DominatorTree *DT) {
3597   if (Add && Add->hasNoSignedWrap()) {
3598     return OverflowResult::NeverOverflows;
3599   }
3600
3601   // If LHS and RHS each have at least two sign bits, the addition will look
3602   // like
3603   //
3604   // XX..... +
3605   // YY.....
3606   //
3607   // If the carry into the most significant position is 0, X and Y can't both
3608   // be 1 and therefore the carry out of the addition is also 0.
3609   //
3610   // If the carry into the most significant position is 1, X and Y can't both
3611   // be 0 and therefore the carry out of the addition is also 1.
3612   //
3613   // Since the carry into the most significant position is always equal to
3614   // the carry out of the addition, there is no signed overflow.
3615   if (ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) > 1 &&
3616       ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT) > 1)
3617     return OverflowResult::NeverOverflows;
3618
3619   KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT);
3620   KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT);
3621
3622   if (checkRippleForSignedAdd(LHSKnown, RHSKnown))
3623     return OverflowResult::NeverOverflows;
3624
3625   // The remaining code needs Add to be available. Early returns if not so.
3626   if (!Add)
3627     return OverflowResult::MayOverflow;
3628
3629   // If the sign of Add is the same as at least one of the operands, this add
3630   // CANNOT overflow. This is particularly useful when the sum is
3631   // @llvm.assume'ed non-negative rather than proved so from analyzing its
3632   // operands.
3633   bool LHSOrRHSKnownNonNegative =
3634       (LHSKnown.isNonNegative() || RHSKnown.isNonNegative());
3635   bool LHSOrRHSKnownNegative =
3636       (LHSKnown.isNegative() || RHSKnown.isNegative());
3637   if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) {
3638     KnownBits AddKnown = computeKnownBits(Add, DL, /*Depth=*/0, AC, CxtI, DT);
3639     if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) ||
3640         (AddKnown.isNegative() && LHSOrRHSKnownNegative)) {
3641       return OverflowResult::NeverOverflows;
3642     }
3643   }
3644
3645   return OverflowResult::MayOverflow;
3646 }
3647
3648 bool llvm::isOverflowIntrinsicNoWrap(const IntrinsicInst *II,
3649                                      const DominatorTree &DT) {
3650 #ifndef NDEBUG
3651   auto IID = II->getIntrinsicID();
3652   assert((IID == Intrinsic::sadd_with_overflow ||
3653           IID == Intrinsic::uadd_with_overflow ||
3654           IID == Intrinsic::ssub_with_overflow ||
3655           IID == Intrinsic::usub_with_overflow ||
3656           IID == Intrinsic::smul_with_overflow ||
3657           IID == Intrinsic::umul_with_overflow) &&
3658          "Not an overflow intrinsic!");
3659 #endif
3660
3661   SmallVector<const BranchInst *, 2> GuardingBranches;
3662   SmallVector<const ExtractValueInst *, 2> Results;
3663
3664   for (const User *U : II->users()) {
3665     if (const auto *EVI = dyn_cast<ExtractValueInst>(U)) {
3666       assert(EVI->getNumIndices() == 1 && "Obvious from CI's type");
3667
3668       if (EVI->getIndices()[0] == 0)
3669         Results.push_back(EVI);
3670       else {
3671         assert(EVI->getIndices()[0] == 1 && "Obvious from CI's type");
3672
3673         for (const auto *U : EVI->users())
3674           if (const auto *B = dyn_cast<BranchInst>(U)) {
3675             assert(B->isConditional() && "How else is it using an i1?");
3676             GuardingBranches.push_back(B);
3677           }
3678       }
3679     } else {
3680       // We are using the aggregate directly in a way we don't want to analyze
3681       // here (storing it to a global, say).
3682       return false;
3683     }
3684   }
3685
3686   auto AllUsesGuardedByBranch = [&](const BranchInst *BI) {
3687     BasicBlockEdge NoWrapEdge(BI->getParent(), BI->getSuccessor(1));
3688     if (!NoWrapEdge.isSingleEdge())
3689       return false;
3690
3691     // Check if all users of the add are provably no-wrap.
3692     for (const auto *Result : Results) {
3693       // If the extractvalue itself is not executed on overflow, the we don't
3694       // need to check each use separately, since domination is transitive.
3695       if (DT.dominates(NoWrapEdge, Result->getParent()))
3696         continue;
3697
3698       for (auto &RU : Result->uses())
3699         if (!DT.dominates(NoWrapEdge, RU))
3700           return false;
3701     }
3702
3703     return true;
3704   };
3705
3706   return any_of(GuardingBranches, AllUsesGuardedByBranch);
3707 }
3708
3709
3710 OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add,
3711                                                  const DataLayout &DL,
3712                                                  AssumptionCache *AC,
3713                                                  const Instruction *CxtI,
3714                                                  const DominatorTree *DT) {
3715   return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1),
3716                                        Add, DL, AC, CxtI, DT);
3717 }
3718
3719 OverflowResult llvm::computeOverflowForSignedAdd(const Value *LHS,
3720                                                  const Value *RHS,
3721                                                  const DataLayout &DL,
3722                                                  AssumptionCache *AC,
3723                                                  const Instruction *CxtI,
3724                                                  const DominatorTree *DT) {
3725   return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, DL, AC, CxtI, DT);
3726 }
3727
3728 bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
3729   // A memory operation returns normally if it isn't volatile. A volatile
3730   // operation is allowed to trap.
3731   //
3732   // An atomic operation isn't guaranteed to return in a reasonable amount of
3733   // time because it's possible for another thread to interfere with it for an
3734   // arbitrary length of time, but programs aren't allowed to rely on that.
3735   if (const LoadInst *LI = dyn_cast<LoadInst>(I))
3736     return !LI->isVolatile();
3737   if (const StoreInst *SI = dyn_cast<StoreInst>(I))
3738     return !SI->isVolatile();
3739   if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I))
3740     return !CXI->isVolatile();
3741   if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I))
3742     return !RMWI->isVolatile();
3743   if (const MemIntrinsic *MII = dyn_cast<MemIntrinsic>(I))
3744     return !MII->isVolatile();
3745
3746   // If there is no successor, then execution can't transfer to it.
3747   if (const auto *CRI = dyn_cast<CleanupReturnInst>(I))
3748     return !CRI->unwindsToCaller();
3749   if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I))
3750     return !CatchSwitch->unwindsToCaller();
3751   if (isa<ResumeInst>(I))
3752     return false;
3753   if (isa<ReturnInst>(I))
3754     return false;
3755   if (isa<UnreachableInst>(I))
3756     return false;
3757
3758   // Calls can throw, or contain an infinite loop, or kill the process.
3759   if (auto CS = ImmutableCallSite(I)) {
3760     // Call sites that throw have implicit non-local control flow.
3761     if (!CS.doesNotThrow())
3762       return false;
3763
3764     // Non-throwing call sites can loop infinitely, call exit/pthread_exit
3765     // etc. and thus not return.  However, LLVM already assumes that
3766     //
3767     //  - Thread exiting actions are modeled as writes to memory invisible to
3768     //    the program.
3769     //
3770     //  - Loops that don't have side effects (side effects are volatile/atomic
3771     //    stores and IO) always terminate (see http://llvm.org/PR965).
3772     //    Furthermore IO itself is also modeled as writes to memory invisible to
3773     //    the program.
3774     //
3775     // We rely on those assumptions here, and use the memory effects of the call
3776     // target as a proxy for checking that it always returns.
3777
3778     // FIXME: This isn't aggressive enough; a call which only writes to a global
3779     // is guaranteed to return.
3780     return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory() ||
3781            match(I, m_Intrinsic<Intrinsic::assume>());
3782   }
3783
3784   // Other instructions return normally.
3785   return true;
3786 }
3787
3788 bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I,
3789                                                   const Loop *L) {
3790   // The loop header is guaranteed to be executed for every iteration.
3791   //
3792   // FIXME: Relax this constraint to cover all basic blocks that are
3793   // guaranteed to be executed at every iteration.
3794   if (I->getParent() != L->getHeader()) return false;
3795
3796   for (const Instruction &LI : *L->getHeader()) {
3797     if (&LI == I) return true;
3798     if (!isGuaranteedToTransferExecutionToSuccessor(&LI)) return false;
3799   }
3800   llvm_unreachable("Instruction not contained in its own parent basic block.");
3801 }
3802
3803 bool llvm::propagatesFullPoison(const Instruction *I) {
3804   switch (I->getOpcode()) {
3805   case Instruction::Add:
3806   case Instruction::Sub:
3807   case Instruction::Xor:
3808   case Instruction::Trunc:
3809   case Instruction::BitCast:
3810   case Instruction::AddrSpaceCast:
3811   case Instruction::Mul:
3812   case Instruction::Shl:
3813   case Instruction::GetElementPtr:
3814     // These operations all propagate poison unconditionally. Note that poison
3815     // is not any particular value, so xor or subtraction of poison with
3816     // itself still yields poison, not zero.
3817     return true;
3818
3819   case Instruction::AShr:
3820   case Instruction::SExt:
3821     // For these operations, one bit of the input is replicated across
3822     // multiple output bits. A replicated poison bit is still poison.
3823     return true;
3824
3825   case Instruction::ICmp:
3826     // Comparing poison with any value yields poison.  This is why, for
3827     // instance, x s< (x +nsw 1) can be folded to true.
3828     return true;
3829
3830   default:
3831     return false;
3832   }
3833 }
3834
3835 const Value *llvm::getGuaranteedNonFullPoisonOp(const Instruction *I) {
3836   switch (I->getOpcode()) {
3837     case Instruction::Store:
3838       return cast<StoreInst>(I)->getPointerOperand();
3839
3840     case Instruction::Load:
3841       return cast<LoadInst>(I)->getPointerOperand();
3842
3843     case Instruction::AtomicCmpXchg:
3844       return cast<AtomicCmpXchgInst>(I)->getPointerOperand();
3845
3846     case Instruction::AtomicRMW:
3847       return cast<AtomicRMWInst>(I)->getPointerOperand();
3848
3849     case Instruction::UDiv:
3850     case Instruction::SDiv:
3851     case Instruction::URem:
3852     case Instruction::SRem:
3853       return I->getOperand(1);
3854
3855     default:
3856       return nullptr;
3857   }
3858 }
3859
3860 bool llvm::programUndefinedIfFullPoison(const Instruction *PoisonI) {
3861   // We currently only look for uses of poison values within the same basic
3862   // block, as that makes it easier to guarantee that the uses will be
3863   // executed given that PoisonI is executed.
3864   //
3865   // FIXME: Expand this to consider uses beyond the same basic block. To do
3866   // this, look out for the distinction between post-dominance and strong
3867   // post-dominance.
3868   const BasicBlock *BB = PoisonI->getParent();
3869
3870   // Set of instructions that we have proved will yield poison if PoisonI
3871   // does.
3872   SmallSet<const Value *, 16> YieldsPoison;
3873   SmallSet<const BasicBlock *, 4> Visited;
3874   YieldsPoison.insert(PoisonI);
3875   Visited.insert(PoisonI->getParent());
3876
3877   BasicBlock::const_iterator Begin = PoisonI->getIterator(), End = BB->end();
3878
3879   unsigned Iter = 0;
3880   while (Iter++ < MaxDepth) {
3881     for (auto &I : make_range(Begin, End)) {
3882       if (&I != PoisonI) {
3883         const Value *NotPoison = getGuaranteedNonFullPoisonOp(&I);
3884         if (NotPoison != nullptr && YieldsPoison.count(NotPoison))
3885           return true;
3886         if (!isGuaranteedToTransferExecutionToSuccessor(&I))
3887           return false;
3888       }
3889
3890       // Mark poison that propagates from I through uses of I.
3891       if (YieldsPoison.count(&I)) {
3892         for (const User *User : I.users()) {
3893           const Instruction *UserI = cast<Instruction>(User);
3894           if (propagatesFullPoison(UserI))
3895             YieldsPoison.insert(User);
3896         }
3897       }
3898     }
3899
3900     if (auto *NextBB = BB->getSingleSuccessor()) {
3901       if (Visited.insert(NextBB).second) {
3902         BB = NextBB;
3903         Begin = BB->getFirstNonPHI()->getIterator();
3904         End = BB->end();
3905         continue;
3906       }
3907     }
3908
3909     break;
3910   };
3911   return false;
3912 }
3913
3914 static bool isKnownNonNaN(const Value *V, FastMathFlags FMF) {
3915   if (FMF.noNaNs())
3916     return true;
3917
3918   if (auto *C = dyn_cast<ConstantFP>(V))
3919     return !C->isNaN();
3920   return false;
3921 }
3922
3923 static bool isKnownNonZero(const Value *V) {
3924   if (auto *C = dyn_cast<ConstantFP>(V))
3925     return !C->isZero();
3926   return false;
3927 }
3928
3929 /// Match non-obvious integer minimum and maximum sequences.
3930 static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
3931                                        Value *CmpLHS, Value *CmpRHS,
3932                                        Value *TrueVal, Value *FalseVal,
3933                                        Value *&LHS, Value *&RHS) {
3934   // Assume success. If there's no match, callers should not use these anyway.
3935   LHS = TrueVal;
3936   RHS = FalseVal;
3937
3938   // Recognize variations of:
3939   // CLAMP(v,l,h) ==> ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v)))
3940   const APInt *C1;
3941   if (CmpRHS == TrueVal && match(CmpRHS, m_APInt(C1))) {
3942     const APInt *C2;
3943
3944     // (X <s C1) ? C1 : SMIN(X, C2) ==> SMAX(SMIN(X, C2), C1)
3945     if (match(FalseVal, m_SMin(m_Specific(CmpLHS), m_APInt(C2))) &&
3946         C1->slt(*C2) && Pred == CmpInst::ICMP_SLT)
3947       return {SPF_SMAX, SPNB_NA, false};
3948
3949     // (X >s C1) ? C1 : SMAX(X, C2) ==> SMIN(SMAX(X, C2), C1)
3950     if (match(FalseVal, m_SMax(m_Specific(CmpLHS), m_APInt(C2))) &&
3951         C1->sgt(*C2) && Pred == CmpInst::ICMP_SGT)
3952       return {SPF_SMIN, SPNB_NA, false};
3953
3954     // (X <u C1) ? C1 : UMIN(X, C2) ==> UMAX(UMIN(X, C2), C1)
3955     if (match(FalseVal, m_UMin(m_Specific(CmpLHS), m_APInt(C2))) &&
3956         C1->ult(*C2) && Pred == CmpInst::ICMP_ULT)
3957       return {SPF_UMAX, SPNB_NA, false};
3958
3959     // (X >u C1) ? C1 : UMAX(X, C2) ==> UMIN(UMAX(X, C2), C1)
3960     if (match(FalseVal, m_UMax(m_Specific(CmpLHS), m_APInt(C2))) &&
3961         C1->ugt(*C2) && Pred == CmpInst::ICMP_UGT)
3962       return {SPF_UMIN, SPNB_NA, false};
3963   }
3964
3965   if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT)
3966     return {SPF_UNKNOWN, SPNB_NA, false};
3967
3968   // Z = X -nsw Y
3969   // (X >s Y) ? 0 : Z ==> (Z >s 0) ? 0 : Z ==> SMIN(Z, 0)
3970   // (X <s Y) ? 0 : Z ==> (Z <s 0) ? 0 : Z ==> SMAX(Z, 0)
3971   if (match(TrueVal, m_Zero()) &&
3972       match(FalseVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS))))
3973     return {Pred == CmpInst::ICMP_SGT ? SPF_SMIN : SPF_SMAX, SPNB_NA, false};
3974
3975   // Z = X -nsw Y
3976   // (X >s Y) ? Z : 0 ==> (Z >s 0) ? Z : 0 ==> SMAX(Z, 0)
3977   // (X <s Y) ? Z : 0 ==> (Z <s 0) ? Z : 0 ==> SMIN(Z, 0)
3978   if (match(FalseVal, m_Zero()) &&
3979       match(TrueVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS))))
3980     return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false};
3981
3982   if (!match(CmpRHS, m_APInt(C1)))
3983     return {SPF_UNKNOWN, SPNB_NA, false};
3984
3985   // An unsigned min/max can be written with a signed compare.
3986   const APInt *C2;
3987   if ((CmpLHS == TrueVal && match(FalseVal, m_APInt(C2))) ||
3988       (CmpLHS == FalseVal && match(TrueVal, m_APInt(C2)))) {
3989     // Is the sign bit set?
3990     // (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX
3991     // (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN
3992     if (Pred == CmpInst::ICMP_SLT && *C1 == 0 && C2->isMaxSignedValue())
3993       return {CmpLHS == TrueVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false};
3994
3995     // Is the sign bit clear?
3996     // (X >s -1) ? MINVAL : X ==> (X <u MINVAL) ? MINVAL : X ==> UMAX
3997     // (X >s -1) ? X : MINVAL ==> (X <u MINVAL) ? X : MINVAL ==> UMIN
3998     if (Pred == CmpInst::ICMP_SGT && C1->isAllOnesValue() &&
3999         C2->isMinSignedValue())
4000       return {CmpLHS == FalseVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false};
4001   }
4002
4003   // Look through 'not' ops to find disguised signed min/max.
4004   // (X >s C) ? ~X : ~C ==> (~X <s ~C) ? ~X : ~C ==> SMIN(~X, ~C)
4005   // (X <s C) ? ~X : ~C ==> (~X >s ~C) ? ~X : ~C ==> SMAX(~X, ~C)
4006   if (match(TrueVal, m_Not(m_Specific(CmpLHS))) &&
4007       match(FalseVal, m_APInt(C2)) && ~(*C1) == *C2)
4008     return {Pred == CmpInst::ICMP_SGT ? SPF_SMIN : SPF_SMAX, SPNB_NA, false};
4009
4010   // (X >s C) ? ~C : ~X ==> (~X <s ~C) ? ~C : ~X ==> SMAX(~C, ~X)
4011   // (X <s C) ? ~C : ~X ==> (~X >s ~C) ? ~C : ~X ==> SMIN(~C, ~X)
4012   if (match(FalseVal, m_Not(m_Specific(CmpLHS))) &&
4013       match(TrueVal, m_APInt(C2)) && ~(*C1) == *C2)
4014     return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false};
4015
4016   return {SPF_UNKNOWN, SPNB_NA, false};
4017 }
4018
4019 static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
4020                                               FastMathFlags FMF,
4021                                               Value *CmpLHS, Value *CmpRHS,
4022                                               Value *TrueVal, Value *FalseVal,
4023                                               Value *&LHS, Value *&RHS) {
4024   LHS = CmpLHS;
4025   RHS = CmpRHS;
4026
4027   // If the predicate is an "or-equal"  (FP) predicate, then signed zeroes may
4028   // return inconsistent results between implementations.
4029   //   (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0
4030   //   minNum(0.0, -0.0)          // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1)
4031   // Therefore we behave conservatively and only proceed if at least one of the
4032   // operands is known to not be zero, or if we don't care about signed zeroes.
4033   switch (Pred) {
4034   default: break;
4035   case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE:
4036   case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE:
4037     if (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) &&
4038         !isKnownNonZero(CmpRHS))
4039       return {SPF_UNKNOWN, SPNB_NA, false};
4040   }
4041
4042   SelectPatternNaNBehavior NaNBehavior = SPNB_NA;
4043   bool Ordered = false;
4044
4045   // When given one NaN and one non-NaN input:
4046   //   - maxnum/minnum (C99 fmaxf()/fminf()) return the non-NaN input.
4047   //   - A simple C99 (a < b ? a : b) construction will return 'b' (as the
4048   //     ordered comparison fails), which could be NaN or non-NaN.
4049   // so here we discover exactly what NaN behavior is required/accepted.
4050   if (CmpInst::isFPPredicate(Pred)) {
4051     bool LHSSafe = isKnownNonNaN(CmpLHS, FMF);
4052     bool RHSSafe = isKnownNonNaN(CmpRHS, FMF);
4053
4054     if (LHSSafe && RHSSafe) {
4055       // Both operands are known non-NaN.
4056       NaNBehavior = SPNB_RETURNS_ANY;
4057     } else if (CmpInst::isOrdered(Pred)) {
4058       // An ordered comparison will return false when given a NaN, so it
4059       // returns the RHS.
4060       Ordered = true;
4061       if (LHSSafe)
4062         // LHS is non-NaN, so if RHS is NaN then NaN will be returned.
4063         NaNBehavior = SPNB_RETURNS_NAN;
4064       else if (RHSSafe)
4065         NaNBehavior = SPNB_RETURNS_OTHER;
4066       else
4067         // Completely unsafe.
4068         return {SPF_UNKNOWN, SPNB_NA, false};
4069     } else {
4070       Ordered = false;
4071       // An unordered comparison will return true when given a NaN, so it
4072       // returns the LHS.
4073       if (LHSSafe)
4074         // LHS is non-NaN, so if RHS is NaN then non-NaN will be returned.
4075         NaNBehavior = SPNB_RETURNS_OTHER;
4076       else if (RHSSafe)
4077         NaNBehavior = SPNB_RETURNS_NAN;
4078       else
4079         // Completely unsafe.
4080         return {SPF_UNKNOWN, SPNB_NA, false};
4081     }
4082   }
4083
4084   if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
4085     std::swap(CmpLHS, CmpRHS);
4086     Pred = CmpInst::getSwappedPredicate(Pred);
4087     if (NaNBehavior == SPNB_RETURNS_NAN)
4088       NaNBehavior = SPNB_RETURNS_OTHER;
4089     else if (NaNBehavior == SPNB_RETURNS_OTHER)
4090       NaNBehavior = SPNB_RETURNS_NAN;
4091     Ordered = !Ordered;
4092   }
4093
4094   // ([if]cmp X, Y) ? X : Y
4095   if (TrueVal == CmpLHS && FalseVal == CmpRHS) {
4096     switch (Pred) {
4097     default: return {SPF_UNKNOWN, SPNB_NA, false}; // Equality.
4098     case ICmpInst::ICMP_UGT:
4099     case ICmpInst::ICMP_UGE: return {SPF_UMAX, SPNB_NA, false};
4100     case ICmpInst::ICMP_SGT:
4101     case ICmpInst::ICMP_SGE: return {SPF_SMAX, SPNB_NA, false};
4102     case ICmpInst::ICMP_ULT:
4103     case ICmpInst::ICMP_ULE: return {SPF_UMIN, SPNB_NA, false};
4104     case ICmpInst::ICMP_SLT:
4105     case ICmpInst::ICMP_SLE: return {SPF_SMIN, SPNB_NA, false};
4106     case FCmpInst::FCMP_UGT:
4107     case FCmpInst::FCMP_UGE:
4108     case FCmpInst::FCMP_OGT:
4109     case FCmpInst::FCMP_OGE: return {SPF_FMAXNUM, NaNBehavior, Ordered};
4110     case FCmpInst::FCMP_ULT:
4111     case FCmpInst::FCMP_ULE:
4112     case FCmpInst::FCMP_OLT:
4113     case FCmpInst::FCMP_OLE: return {SPF_FMINNUM, NaNBehavior, Ordered};
4114     }
4115   }
4116
4117   const APInt *C1;
4118   if (match(CmpRHS, m_APInt(C1))) {
4119     if ((CmpLHS == TrueVal && match(FalseVal, m_Neg(m_Specific(CmpLHS)))) ||
4120         (CmpLHS == FalseVal && match(TrueVal, m_Neg(m_Specific(CmpLHS))))) {
4121
4122       // ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X
4123       // NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X
4124       if (Pred == ICmpInst::ICMP_SGT && (*C1 == 0 || C1->isAllOnesValue())) {
4125         return {(CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false};
4126       }
4127
4128       // ABS(X) ==> (X <s 0) ? -X : X and (X <s 1) ? -X : X
4129       // NABS(X) ==> (X <s 0) ? X : -X and (X <s 1) ? X : -X
4130       if (Pred == ICmpInst::ICMP_SLT && (*C1 == 0 || *C1 == 1)) {
4131         return {(CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false};
4132       }
4133     }
4134   }
4135
4136   return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS);
4137 }
4138
4139 static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
4140                               Instruction::CastOps *CastOp) {
4141   auto *Cast1 = dyn_cast<CastInst>(V1);
4142   if (!Cast1)
4143     return nullptr;
4144
4145   *CastOp = Cast1->getOpcode();
4146   Type *SrcTy = Cast1->getSrcTy();
4147   if (auto *Cast2 = dyn_cast<CastInst>(V2)) {
4148     // If V1 and V2 are both the same cast from the same type, look through V1.
4149     if (*CastOp == Cast2->getOpcode() && SrcTy == Cast2->getSrcTy())
4150       return Cast2->getOperand(0);
4151     return nullptr;
4152   }
4153
4154   auto *C = dyn_cast<Constant>(V2);
4155   if (!C)
4156     return nullptr;
4157
4158   Constant *CastedTo = nullptr;
4159   switch (*CastOp) {
4160   case Instruction::ZExt:
4161     if (CmpI->isUnsigned())
4162       CastedTo = ConstantExpr::getTrunc(C, SrcTy);
4163     break;
4164   case Instruction::SExt:
4165     if (CmpI->isSigned())
4166       CastedTo = ConstantExpr::getTrunc(C, SrcTy, true);
4167     break;
4168   case Instruction::Trunc:
4169     CastedTo = ConstantExpr::getIntegerCast(C, SrcTy, CmpI->isSigned());
4170     break;
4171   case Instruction::FPTrunc:
4172     CastedTo = ConstantExpr::getFPExtend(C, SrcTy, true);
4173     break;
4174   case Instruction::FPExt:
4175     CastedTo = ConstantExpr::getFPTrunc(C, SrcTy, true);
4176     break;
4177   case Instruction::FPToUI:
4178     CastedTo = ConstantExpr::getUIToFP(C, SrcTy, true);
4179     break;
4180   case Instruction::FPToSI:
4181     CastedTo = ConstantExpr::getSIToFP(C, SrcTy, true);
4182     break;
4183   case Instruction::UIToFP:
4184     CastedTo = ConstantExpr::getFPToUI(C, SrcTy, true);
4185     break;
4186   case Instruction::SIToFP:
4187     CastedTo = ConstantExpr::getFPToSI(C, SrcTy, true);
4188     break;
4189   default:
4190     break;
4191   }
4192
4193   if (!CastedTo)
4194     return nullptr;
4195
4196   // Make sure the cast doesn't lose any information.
4197   Constant *CastedBack =
4198       ConstantExpr::getCast(*CastOp, CastedTo, C->getType(), true);
4199   if (CastedBack != C)
4200     return nullptr;
4201
4202   return CastedTo;
4203 }
4204
4205 SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
4206                                              Instruction::CastOps *CastOp) {
4207   SelectInst *SI = dyn_cast<SelectInst>(V);
4208   if (!SI) return {SPF_UNKNOWN, SPNB_NA, false};
4209
4210   CmpInst *CmpI = dyn_cast<CmpInst>(SI->getCondition());
4211   if (!CmpI) return {SPF_UNKNOWN, SPNB_NA, false};
4212
4213   CmpInst::Predicate Pred = CmpI->getPredicate();
4214   Value *CmpLHS = CmpI->getOperand(0);
4215   Value *CmpRHS = CmpI->getOperand(1);
4216   Value *TrueVal = SI->getTrueValue();
4217   Value *FalseVal = SI->getFalseValue();
4218   FastMathFlags FMF;
4219   if (isa<FPMathOperator>(CmpI))
4220     FMF = CmpI->getFastMathFlags();
4221
4222   // Bail out early.
4223   if (CmpI->isEquality())
4224     return {SPF_UNKNOWN, SPNB_NA, false};
4225
4226   // Deal with type mismatches.
4227   if (CastOp && CmpLHS->getType() != TrueVal->getType()) {
4228     if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp))
4229       return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
4230                                   cast<CastInst>(TrueVal)->getOperand(0), C,
4231                                   LHS, RHS);
4232     if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp))
4233       return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
4234                                   C, cast<CastInst>(FalseVal)->getOperand(0),
4235                                   LHS, RHS);
4236   }
4237   return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal,
4238                               LHS, RHS);
4239 }
4240
4241 /// Return true if "icmp Pred LHS RHS" is always true.
4242 static bool isTruePredicate(CmpInst::Predicate Pred,
4243                             const Value *LHS, const Value *RHS,
4244                             const DataLayout &DL, unsigned Depth,
4245                             AssumptionCache *AC, const Instruction *CxtI,
4246                             const DominatorTree *DT) {
4247   assert(!LHS->getType()->isVectorTy() && "TODO: extend to handle vectors!");
4248   if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS)
4249     return true;
4250
4251   switch (Pred) {
4252   default:
4253     return false;
4254
4255   case CmpInst::ICMP_SLE: {
4256     const APInt *C;
4257
4258     // LHS s<= LHS +_{nsw} C   if C >= 0
4259     if (match(RHS, m_NSWAdd(m_Specific(LHS), m_APInt(C))))
4260       return !C->isNegative();
4261     return false;
4262   }
4263
4264   case CmpInst::ICMP_ULE: {
4265     const APInt *C;
4266
4267     // LHS u<= LHS +_{nuw} C   for any C
4268     if (match(RHS, m_NUWAdd(m_Specific(LHS), m_APInt(C))))
4269       return true;
4270
4271     // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB)
4272     auto MatchNUWAddsToSameValue = [&](const Value *A, const Value *B,
4273                                        const Value *&X,
4274                                        const APInt *&CA, const APInt *&CB) {
4275       if (match(A, m_NUWAdd(m_Value(X), m_APInt(CA))) &&
4276           match(B, m_NUWAdd(m_Specific(X), m_APInt(CB))))
4277         return true;
4278
4279       // If X & C == 0 then (X | C) == X +_{nuw} C
4280       if (match(A, m_Or(m_Value(X), m_APInt(CA))) &&
4281           match(B, m_Or(m_Specific(X), m_APInt(CB)))) {
4282         KnownBits Known(CA->getBitWidth());
4283         computeKnownBits(X, Known, DL, Depth + 1, AC, CxtI, DT);
4284
4285         if (CA->isSubsetOf(Known.Zero) && CB->isSubsetOf(Known.Zero))
4286           return true;
4287       }
4288
4289       return false;
4290     };
4291
4292     const Value *X;
4293     const APInt *CLHS, *CRHS;
4294     if (MatchNUWAddsToSameValue(LHS, RHS, X, CLHS, CRHS))
4295       return CLHS->ule(*CRHS);
4296
4297     return false;
4298   }
4299   }
4300 }
4301
4302 /// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred
4303 /// ALHS ARHS" is true.  Otherwise, return None.
4304 static Optional<bool>
4305 isImpliedCondOperands(CmpInst::Predicate Pred, const Value *ALHS,
4306                       const Value *ARHS, const Value *BLHS,
4307                       const Value *BRHS, const DataLayout &DL,
4308                       unsigned Depth, AssumptionCache *AC,
4309                       const Instruction *CxtI, const DominatorTree *DT) {
4310   switch (Pred) {
4311   default:
4312     return None;
4313
4314   case CmpInst::ICMP_SLT:
4315   case CmpInst::ICMP_SLE:
4316     if (isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS, DL, Depth, AC, CxtI,
4317                         DT) &&
4318         isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth, AC, CxtI, DT))
4319       return true;
4320     return None;
4321
4322   case CmpInst::ICMP_ULT:
4323   case CmpInst::ICMP_ULE:
4324     if (isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth, AC, CxtI,
4325                         DT) &&
4326         isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth, AC, CxtI, DT))
4327       return true;
4328     return None;
4329   }
4330 }
4331
4332 /// Return true if the operands of the two compares match.  IsSwappedOps is true
4333 /// when the operands match, but are swapped.
4334 static bool isMatchingOps(const Value *ALHS, const Value *ARHS,
4335                           const Value *BLHS, const Value *BRHS,
4336                           bool &IsSwappedOps) {
4337
4338   bool IsMatchingOps = (ALHS == BLHS && ARHS == BRHS);
4339   IsSwappedOps = (ALHS == BRHS && ARHS == BLHS);
4340   return IsMatchingOps || IsSwappedOps;
4341 }
4342
4343 /// Return true if "icmp1 APred ALHS ARHS" implies "icmp2 BPred BLHS BRHS" is
4344 /// true.  Return false if "icmp1 APred ALHS ARHS" implies "icmp2 BPred BLHS
4345 /// BRHS" is false.  Otherwise, return None if we can't infer anything.
4346 static Optional<bool> isImpliedCondMatchingOperands(CmpInst::Predicate APred,
4347                                                     const Value *ALHS,
4348                                                     const Value *ARHS,
4349                                                     CmpInst::Predicate BPred,
4350                                                     const Value *BLHS,
4351                                                     const Value *BRHS,
4352                                                     bool IsSwappedOps) {
4353   // Canonicalize the operands so they're matching.
4354   if (IsSwappedOps) {
4355     std::swap(BLHS, BRHS);
4356     BPred = ICmpInst::getSwappedPredicate(BPred);
4357   }
4358   if (CmpInst::isImpliedTrueByMatchingCmp(APred, BPred))
4359     return true;
4360   if (CmpInst::isImpliedFalseByMatchingCmp(APred, BPred))
4361     return false;
4362
4363   return None;
4364 }
4365
4366 /// Return true if "icmp1 APred ALHS C1" implies "icmp2 BPred BLHS C2" is
4367 /// true.  Return false if "icmp1 APred ALHS C1" implies "icmp2 BPred BLHS
4368 /// C2" is false.  Otherwise, return None if we can't infer anything.
4369 static Optional<bool>
4370 isImpliedCondMatchingImmOperands(CmpInst::Predicate APred, const Value *ALHS,
4371                                  const ConstantInt *C1,
4372                                  CmpInst::Predicate BPred,
4373                                  const Value *BLHS, const ConstantInt *C2) {
4374   assert(ALHS == BLHS && "LHS operands must match.");
4375   ConstantRange DomCR =
4376       ConstantRange::makeExactICmpRegion(APred, C1->getValue());
4377   ConstantRange CR =
4378       ConstantRange::makeAllowedICmpRegion(BPred, C2->getValue());
4379   ConstantRange Intersection = DomCR.intersectWith(CR);
4380   ConstantRange Difference = DomCR.difference(CR);
4381   if (Intersection.isEmptySet())
4382     return false;
4383   if (Difference.isEmptySet())
4384     return true;
4385   return None;
4386 }
4387
4388 Optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS,
4389                                         const DataLayout &DL, bool InvertAPred,
4390                                         unsigned Depth, AssumptionCache *AC,
4391                                         const Instruction *CxtI,
4392                                         const DominatorTree *DT) {
4393   // A mismatch occurs when we compare a scalar cmp to a vector cmp, for example.
4394   if (LHS->getType() != RHS->getType())
4395     return None;
4396
4397   Type *OpTy = LHS->getType();
4398   assert(OpTy->getScalarType()->isIntegerTy(1));
4399
4400   // LHS ==> RHS by definition
4401   if (!InvertAPred && LHS == RHS)
4402     return true;
4403
4404   if (OpTy->isVectorTy())
4405     // TODO: extending the code below to handle vectors
4406     return None;
4407   assert(OpTy->isIntegerTy(1) && "implied by above");
4408
4409   ICmpInst::Predicate APred, BPred;
4410   Value *ALHS, *ARHS;
4411   Value *BLHS, *BRHS;
4412
4413   if (!match(LHS, m_ICmp(APred, m_Value(ALHS), m_Value(ARHS))) ||
4414       !match(RHS, m_ICmp(BPred, m_Value(BLHS), m_Value(BRHS))))
4415     return None;
4416
4417   if (InvertAPred)
4418     APred = CmpInst::getInversePredicate(APred);
4419
4420   // Can we infer anything when the two compares have matching operands?
4421   bool IsSwappedOps;
4422   if (isMatchingOps(ALHS, ARHS, BLHS, BRHS, IsSwappedOps)) {
4423     if (Optional<bool> Implication = isImpliedCondMatchingOperands(
4424             APred, ALHS, ARHS, BPred, BLHS, BRHS, IsSwappedOps))
4425       return Implication;
4426     // No amount of additional analysis will infer the second condition, so
4427     // early exit.
4428     return None;
4429   }
4430
4431   // Can we infer anything when the LHS operands match and the RHS operands are
4432   // constants (not necessarily matching)?
4433   if (ALHS == BLHS && isa<ConstantInt>(ARHS) && isa<ConstantInt>(BRHS)) {
4434     if (Optional<bool> Implication = isImpliedCondMatchingImmOperands(
4435             APred, ALHS, cast<ConstantInt>(ARHS), BPred, BLHS,
4436             cast<ConstantInt>(BRHS)))
4437       return Implication;
4438     // No amount of additional analysis will infer the second condition, so
4439     // early exit.
4440     return None;
4441   }
4442
4443   if (APred == BPred)
4444     return isImpliedCondOperands(APred, ALHS, ARHS, BLHS, BRHS, DL, Depth, AC,
4445                                  CxtI, DT);
4446
4447   return None;
4448 }